diff --git a/src/webui/app.py b/src/webui/app.py
index 9878cb6..0b2f565 100644
--- a/src/webui/app.py
+++ b/src/webui/app.py
@@ -5,935 +5,81 @@ Run with: python src/webui/app.py
 
 from __future__ import annotations
 
+import logging
+import os
 import sys
+import time
 from pathlib import Path
 
 # Ensure project src is on path
 _project_root = Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(_project_root / "src"))
 
-import csv
-import io
-import json
-import time
-import functools
-from collections import defaultdict
+from flask import Flask, g, render_template, request
 
-from flask import Flask, render_template, request, jsonify, abort, g, Response, redirect, url_for
+from webui.auth import init_auth
+from webui.analytics import init_analytics
+from webui.blueprints import register_blueprints
+from webui.data import get_db
 
-from webui.auth import admin_required, init_auth
-from webui.analytics import init_analytics, get_analytics_data
-from webui.obsidian_export import build_obsidian_vault
-from webui.data import (
-    get_db,
-    get_overview_stats,
-    get_category_counts,
-    get_drafts_page,
-    get_draft_detail,
-    get_rating_distributions,
-    get_timeline_data,
-    get_ideas_by_type,
-    get_all_gaps,
-    get_gap_detail,
-    get_generated_drafts,
-    read_generated_draft,
-    get_top_authors,
-    get_org_data,
-    get_category_radar_data,
-    get_score_histogram,
-    get_coauthor_network,
-    get_cross_org_data,
-    get_landscape_tsne,
-    get_similarity_graph,
-    get_timeline_animation_data,
-    get_idea_clusters,
-    get_monitor_status,
-    get_author_network_full,
-    get_citation_graph,
-    get_comparison_data,
-    get_ask_search,
-    get_ask_synthesize,
-    get_category_summary,
-    global_search,
-    get_architecture,
-    get_source_comparison,
-    get_false_positive_profile,
-    get_citation_influence,
-    get_bcp_analysis,
-    get_trends_data,
-    get_complexity_data,
-    get_idea_analysis,
-    get_all_proposals,
-    get_proposal_detail,
-    get_proposals_for_gap,
-)
-
-app = Flask(
-    __name__,
-    template_folder=str(Path(__file__).parent / "templates"),
-    static_folder=str(Path(__file__).parent / "static"),
-    static_url_path="/static",
-)
-import os
-app.config["SECRET_KEY"] = os.environ.get("FLASK_SECRET_KEY", os.urandom(24).hex())
-# Auth is initialized at startup — see __main__ block and create_app()
-# Default: production mode (admin disabled)
-init_auth(app, dev=False)
-
-# Analytics (GDPR-compliant, no cookies)
-_analytics_db = str(_project_root / "data" / "analytics.db")
-init_analytics(app, db_path=_analytics_db)
-
-
-# --- Rate limiting for Claude-calling endpoints ---
-
-_rate_limit_store: dict[str, list[float]] = defaultdict(list)
-_RATE_LIMIT_MAX = 10  # max requests
-_RATE_LIMIT_WINDOW = 60  # per 60 seconds
-
-
-def rate_limit(f):
-    """Simple in-memory rate limiter: max 10 requests per minute per IP."""
-    @functools.wraps(f)
-    def wrapper(*args, **kwargs):
-        ip = request.remote_addr or "unknown"
-        now = time.time()
-        # Prune timestamps outside the sliding window
-        timestamps = _rate_limit_store[ip]
-        _rate_limit_store[ip] = [t for t in timestamps if now - t < _RATE_LIMIT_WINDOW]
-        if len(_rate_limit_store[ip]) >= _RATE_LIMIT_MAX:
-            return jsonify({"error": "Rate limit exceeded. Try again later."}), 429
-        _rate_limit_store[ip].append(now)
-        return f(*args, **kwargs)
-    return wrapper
-
-
-# --- Database lifecycle (per-request to avoid SQLite threading issues) ---
-
-
-def db():
-    if "db" not in g:
-        g.db = get_db()
-    return g.db
-
-
-@app.teardown_appcontext
-def close_db(exception=None):
-    database = g.pop("db", None)
-    if database is not None:
-        database.close()
-
-
-# --- Routes ---
-
-
-@app.route("/")
-def overview():
-    stats = get_overview_stats(db())
-    categories = get_category_counts(db())
-    timeline = get_timeline_data(db())
-    scores = get_score_histogram(db())
-    radar = get_category_radar_data(db())
-    return render_template(
-        "overview.html",
-        stats=stats,
-        categories=categories,
-        timeline=timeline,
-        scores=scores,
-        radar=radar,
-    )
-
-
-@app.route("/drafts")
-def drafts():
-    page = request.args.get("page", 1, type=int)
-    search = request.args.get("q", "")
-    category = request.args.get("cat", "")
-    source = request.args.get("source", "")
-    min_score = request.args.get("min_score", 0.0, type=float)
-    sort = request.args.get("sort", "score")
-    sort_dir = request.args.get("dir", "desc")
-
-    result = get_drafts_page(
-        db(),
-        page=page,
-        search=search,
-        category=category,
-        min_score=min_score,
-        sort=sort,
-        sort_dir=sort_dir,
-        source=source,
-    )
-    categories = get_category_counts(db())
-    cat_summary = get_category_summary(db(), category) if category else None
-    return render_template(
-        "drafts.html",
-        result=result,
-        categories=categories,
-        cat_summary=cat_summary,
-        search=search,
-        current_cat=category,
-        current_source=source,
-        min_score=min_score,
-        sort=sort,
-        sort_dir=sort_dir,
-    )
-
-
-@app.route("/drafts/<string:name>")
-def draft_detail(name: str):
-    database = db()
-    detail = get_draft_detail(database, name)
-    if not detail:
-        abort(404)
-    # Build set of draft ref IDs that exist in our DB for internal linking
-    ref_draft_ids = [r["id"] for r in detail.get("refs", []) if r["type"] == "draft"]
-    known_drafts = set()
-    if ref_draft_ids:
-        placeholders = ",".join("?" * len(ref_draft_ids))
-        rows = database.conn.execute(
-            f"SELECT name FROM drafts WHERE name IN ({placeholders})", ref_draft_ids
-        ).fetchall()
-        known_drafts = {r["name"] for r in rows}
-    return render_template("draft_detail.html", draft=detail, known_drafts=known_drafts)
-
-
-@app.route("/ideas")
-def ideas():
-    data = get_ideas_by_type(db())
-    return render_template("ideas.html", data=data)
-
-
-@app.route("/gaps")
-@admin_required
-def gaps():
-    gap_list = get_all_gaps(db())
-    generated = get_generated_drafts()
-    return render_template("gaps.html", gaps=gap_list, generated_drafts=generated)
-
-
-@app.route("/gaps/demo")
-@admin_required
-def gaps_demo():
-    """Show a pre-generated example draft so users can see output without API calls."""
-    generated = get_generated_drafts()
-    # Default to the first generated draft, or allow selection via query param
-    selected = request.args.get("file", "")
-    draft_text = None
-    draft_info = None
-    if selected:
-        draft_text = read_generated_draft(selected)
-        for gd in generated:
-            if gd["filename"] == selected:
-                draft_info = gd
-                break
-    elif generated:
-        draft_info = generated[0]
-        draft_text = read_generated_draft(draft_info["filename"])
-    return render_template(
-        "gap_demo.html",
-        generated_drafts=generated,
-        draft_text=draft_text,
-        draft_info=draft_info,
-        selected=selected,
-    )
-
-
-@app.route("/gaps/<int:gap_id>")
-@admin_required
-def gap_detail(gap_id: int):
-    gap = get_gap_detail(db(), gap_id)
-    if not gap:
-        abort(404)
-    generated = get_generated_drafts()
-    gap_proposals = get_proposals_for_gap(db(), gap_id)
-    return render_template("gap_detail.html", gap=gap, generated_drafts=generated, proposals=gap_proposals)
-
-
-@app.route("/gaps/<int:gap_id>/generate", methods=["POST"])
-@admin_required
-def gap_generate(gap_id: int):
-    """Trigger draft generation for a gap. Returns JSON with the generated text."""
-    gap = get_gap_detail(db(), gap_id)
-    if not gap:
-        return jsonify({"error": "Gap not found"}), 404
-
-    try:
-        from ietf_analyzer.config import Config
-        from ietf_analyzer.analyzer import Analyzer
-        from ietf_analyzer.draftgen import DraftGenerator
-
-        cfg = Config.load()
-        database = db()
-        analyzer = Analyzer(cfg, database)
-        generator = DraftGenerator(cfg, database, analyzer)
-
-        # Generate into a file named after the gap
-        slug = gap["topic"].lower().replace(" ", "-")[:40]
-        output_path = str(Path(_project_root) / "data" / "reports" / "generated-drafts" / f"draft-gap-{gap_id}-{slug}.txt")
-        path = generator.generate(gap["topic"], output_path=output_path)
-        draft_text = Path(path).read_text(errors="replace")
-
-        return jsonify({
-            "success": True,
-            "text": draft_text,
-            "filename": Path(path).name,
-            "path": path,
-        })
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-
-
-@app.route("/ratings")
-def ratings():
-    distributions = get_rating_distributions(db())
-    radar = get_category_radar_data(db())
-    return render_template(
-        "ratings.html",
-        dist=distributions,
-        radar=radar,
-    )
-
-
-@app.route("/landscape")
-@admin_required
-def landscape():
-    distributions = get_rating_distributions(db())
-    tsne_data = get_landscape_tsne(db())
-    return render_template(
-        "landscape.html",
-        dist=distributions,
-        tsne_data=tsne_data,
-    )
-
-
-@app.route("/timeline")
-def timeline_animation():
-    data = get_timeline_animation_data(db())
-    return render_template("timeline.html", animation=data)
-
-
-@app.route("/idea-clusters")
-def idea_clusters():
-    data = get_idea_clusters(db())
-    return render_template("idea_clusters.html", clusters=data)
-
-
-@app.route("/architecture")
-def architecture():
-    data = get_architecture(db())
-    return render_template("architecture.html", arch=data)
-
-
-@app.route("/api/architecture")
-def api_architecture():
-    return jsonify(get_architecture(db()))
-
-
-@app.route("/similarity")
-@admin_required
-def similarity():
-    network = get_similarity_graph(db())
-    return render_template("similarity.html", network=network)
-
-
-@app.route("/authors")
-def authors():
-    top = get_top_authors(db(), limit=50)
-    orgs = get_org_data(db(), limit=20)
-    network = get_author_network_full(db())
-    cross_org = get_cross_org_data(db(), limit=20)
-    return render_template(
-        "authors.html",
-        authors=top,
-        orgs=orgs,
-        orgs_data=orgs,
-        network=network,
-        cross_org=cross_org,
-    )
-
-
-@app.route("/citations")
-def citations():
-    from webui.auth import is_admin as check_admin
-    graph = get_citation_graph(db())
-    influence = get_citation_influence(db()) if check_admin() else None
-    bcp = get_bcp_analysis(db()) if check_admin() else None
-    return render_template("citations.html", graph=graph, influence=influence, bcp=bcp)
-
-
-@app.route("/monitor")
-@admin_required
-def monitor_page():
-    status = get_monitor_status(db())
-    return render_template("monitor.html", status=status)
-
-
-@app.route("/admin/analytics")
-@admin_required
-def analytics_dashboard():
-    data = get_analytics_data(_analytics_db)
-    return render_template("analytics.html", data=data)
-
-
-@app.route("/about")
-def about():
-    from ietf_analyzer.config import Config
-    cfg = Config.load()
-    stats = get_overview_stats(db())
-    return render_template("about.html", stats=stats, search_keywords=cfg.search_keywords,
-                           fetch_since=cfg.fetch_since)
-
-
-@app.route("/impressum")
-def impressum():
-    return render_template("impressum.html")
-
-
-@app.route("/datenschutz")
-def datenschutz():
-    return render_template("datenschutz.html")
-
-
-@app.route("/search")
-def search():
-    q = request.args.get("q", "").strip()
-    results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
-    total = sum(len(v) for v in results.values())
-    return render_template("search_results.html", query=q, results=results, total=total)
-
-
-@app.route("/ask")
-def ask_page():
-    question = request.args.get("q", "")
-    result = None
-    if question:
-        top_k = request.args.get("top", 5, type=int)
-        # Search only (free) — returns sources + cached answer if available
-        result = get_ask_search(db(), question, top_k=top_k)
-    return render_template("ask.html", question=question, result=result)
-
-
-@app.route("/api/ask/synthesize", methods=["POST"])
-@admin_required
-@rate_limit
-def api_ask_synthesize():
-    """Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
-    data = request.get_json(force=True, silent=True)
-    if not data or "question" not in data:
-        return jsonify({"error": "Missing 'question' in request body"}), 400
-    question = data["question"]
-    top_k = data.get("top_k", 5)
-    result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
-    return jsonify(result)
-
-
-@app.route("/api/ask", methods=["POST"])
-def api_ask():
-    """Search only (free). Returns JSON with sources + cached answer if available."""
-    data = request.get_json(force=True, silent=True)
-    if not data or "question" not in data:
-        return jsonify({"error": "Missing 'question' in request body"}), 400
-    question = data["question"]
-    top_k = data.get("top_k", 5)
-    result = get_ask_search(db(), question, top_k=top_k)
-    return jsonify(result)
-
-
-@app.route("/compare")
-@admin_required
-def compare_page():
-    draft_names = request.args.get("drafts", "")
-    names = [n.strip() for n in draft_names.split(",") if n.strip()] if draft_names else []
-    data = None
-    if len(names) >= 2:
-        data = get_comparison_data(db(), names)
-    return render_template("comparison.html", names=names, data=data)
-
-
-@app.route("/api/compare", methods=["POST"])
-@admin_required
-@rate_limit
-def api_compare():
-    """Run Claude comparison for drafts. Returns JSON with comparison text."""
-    req_data = request.get_json(force=True, silent=True)
-    if not req_data or "drafts" not in req_data:
-        return jsonify({"error": "Missing 'drafts' in request body"}), 400
-
-    names = req_data["drafts"]
-    if len(names) < 2:
-        return jsonify({"error": "Need at least 2 drafts to compare"}), 400
-
-    try:
-        from ietf_analyzer.config import Config
-        from ietf_analyzer.analyzer import Analyzer
-
-        cfg = Config.load()
-        database = db()
-        analyzer = Analyzer(cfg, database)
-        result = analyzer.compare_drafts(names)
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-
-
-# --- API endpoints for AJAX (used by client-side charts) ---
-
-
-def _to_csv_response(rows: list[dict], filename: str = "export.csv") -> Response:
-    """Convert a list of dicts to a CSV download response."""
-    if not rows:
-        return Response("", mimetype="text/csv",
-                        headers={"Content-Disposition": f"attachment; filename={filename}"})
-    si = io.StringIO()
-    writer = csv.DictWriter(si, fieldnames=rows[0].keys())
-    writer.writeheader()
-    for row in rows:
-        # Flatten any list/dict values to JSON strings
-        flat = {}
-        for k, v in row.items():
-            if isinstance(v, (list, dict)):
-                flat[k] = json.dumps(v)
-            else:
-                flat[k] = v
-        writer.writerow(flat)
-    return Response(si.getvalue(), mimetype="text/csv",
-                    headers={"Content-Disposition": f"attachment; filename={filename}"})
-
-
-def _results_to_csv(results: dict) -> Response:
-    """Convert global search results (multi-category) to a single CSV."""
-    rows = []
-    for category, items in results.items():
-        for item in items:
-            row = {"_category": category}
-            row.update(item)
-            rows.append(row)
-    return _to_csv_response(rows, "search_results.csv")
-
-
-@app.route("/api/drafts")
-def api_drafts():
-    page = request.args.get("page", 1, type=int)
-    search = request.args.get("q", "")
-    category = request.args.get("cat", "")
-    source = request.args.get("source", "")
-    min_score = request.args.get("min_score", 0.0, type=float)
-    sort = request.args.get("sort", "score")
-    sort_dir = request.args.get("dir", "desc")
-    data = get_drafts_page(db(), page=page, search=search, category=category,
-                           min_score=min_score, sort=sort, sort_dir=sort_dir,
-                           source=source)
-    if request.args.get("format") == "csv":
-        return _to_csv_response(data.get("drafts", []), "drafts.csv")
-    return jsonify(data)
-
-
-@app.route("/api/stats")
-def api_stats():
-    return jsonify(get_overview_stats(db()))
-
-
-@app.route("/api/authors/network")
-def api_author_network():
-    return jsonify(get_author_network_full(db()))
-
-
-@app.route("/api/citations")
-def api_citations():
-    min_refs = request.args.get("min_refs", 2, type=int)
-    return jsonify(get_citation_graph(db(), min_refs=min_refs))
-
-
-@app.route("/api/search")
-def api_search():
-    q = request.args.get("q", "").strip()
-    results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
-    if request.args.get("format") == "csv":
-        return _results_to_csv(results)
-    return jsonify(results)
-
-
-@app.route("/api/ideas")
-def api_ideas():
-    data = get_ideas_by_type(db())
-    if request.args.get("format") == "csv":
-        return _to_csv_response(data.get("ideas", []), "ideas.csv")
-    return jsonify(data)
-
-
-@app.route("/api/gaps")
-@admin_required
-def api_gaps():
-    data = get_all_gaps(db())
-    if request.args.get("format") == "csv":
-        return _to_csv_response(data, "gaps.csv")
-    return jsonify(data)
-
-
-@app.route("/api/gaps/<int:gap_id>")
-@admin_required
-def api_gap_detail(gap_id: int):
-    gap = get_gap_detail(db(), gap_id)
-    if not gap:
-        return jsonify({"error": "Gap not found"}), 404
-    return jsonify(gap)
-
-
-@app.route("/api/ratings")
-def api_ratings():
-    data = get_rating_distributions(db())
-    if request.args.get("format") == "csv":
-        # Transpose columnar data to rows
-        rows = []
-        for i in range(len(data.get("names", []))):
-            rows.append({
-                "name": data["names"][i],
-                "score": data["scores"][i],
-                "novelty": data["novelty"][i],
-                "maturity": data["maturity"][i],
-                "overlap": data["overlap"][i],
-                "momentum": data["momentum"][i],
-                "relevance": data["relevance"][i],
-                "category": data["categories"][i],
-            })
-        return _to_csv_response(rows, "ratings.csv")
-    return jsonify(data)
-
-
-@app.route("/api/timeline")
-def api_timeline():
-    data = get_timeline_data(db())
-    return jsonify(data)
-
-
-@app.route("/api/landscape")
-@admin_required
-def api_landscape():
-    data = get_landscape_tsne(db())
-    if request.args.get("format") == "csv":
-        return _to_csv_response(data, "landscape.csv")
-    return jsonify(data)
-
-
-@app.route("/api/similarity")
-@admin_required
-def api_similarity():
-    data = get_similarity_graph(db())
-    return jsonify(data)
-
-
-@app.route("/api/idea-clusters")
-def api_idea_clusters():
-    data = get_idea_clusters(db())
-    return jsonify(data)
-
-
-@app.route("/api/monitor")
-@admin_required
-def api_monitor():
-    data = get_monitor_status(db())
-    return jsonify(data)
-
-
-@app.route("/api/drafts/<string:name>")
-def api_draft_detail(name: str):
-    detail = get_draft_detail(db(), name)
-    if not detail:
-        return jsonify({"error": "Draft not found"}), 404
-    return jsonify(detail)
-
-
-@app.route("/api/categories")
-def api_categories():
-    data = get_category_counts(db())
-    if request.args.get("format") == "csv":
-        rows = [{"category": k, "count": v} for k, v in data.items()]
-        return _to_csv_response(rows, "categories.csv")
-    return jsonify(data)
-
-
-@app.route("/api/drafts/<string:name>/annotate", methods=["POST"])
-@admin_required
-def api_annotate(name: str):
-    """Add or update annotation for a draft."""
-    import json as _json
-    database = db()
-    draft = database.get_draft(name)
-    if not draft:
-        return jsonify({"error": "Draft not found"}), 404
-
-    data = request.get_json(force=True, silent=True)
-    if not data:
-        return jsonify({"error": "Invalid JSON body"}), 400
-
-    note = data.get("note")
-    tags = data.get("tags")
-    add_tag = data.get("add_tag")
-    remove_tag = data.get("remove_tag")
-
-    # Handle add/remove tag operations
-    if add_tag or remove_tag:
-        existing = database.get_annotation(name)
-        current_tags = existing["tags"] if existing else []
-        if add_tag and add_tag not in current_tags:
-            current_tags.append(add_tag)
-        if remove_tag and remove_tag in current_tags:
-            current_tags.remove(remove_tag)
-        tags = current_tags
-
-    database.upsert_annotation(name, note=note, tags=tags)
-    annotation = database.get_annotation(name)
-    return jsonify({"success": True, "annotation": annotation})
-
-
-@app.route("/export/obsidian")
-@admin_required
-def export_obsidian():
-    """Download the entire research corpus as an Obsidian vault (ZIP)."""
-    data = build_obsidian_vault(db())
-    return Response(
-        data,
-        mimetype="application/zip",
-        headers={"Content-Disposition": "attachment; filename=IETF-AI-Agent-Drafts.zip"},
-    )
 
+# --- App factory ---
 
 def create_app(dev: bool = False) -> Flask:
-    """Re-initialize auth mode. Call before run() if needed."""
-    init_auth(app, dev=dev)
-    return app
+    """Create and configure the Flask application."""
+    application = Flask(
+        __name__,
+        template_folder=str(Path(__file__).parent / "templates"),
+        static_folder=str(Path(__file__).parent / "static"),
+        static_url_path="/static",
+    )
+    application.config["SECRET_KEY"] = os.environ.get("FLASK_SECRET_KEY", os.urandom(24).hex())
+
+    # Auth
+    init_auth(application, dev=dev)
+
+    # Analytics (GDPR-compliant, no cookies)
+    analytics_db = str(_project_root / "data" / "analytics.db")
+    init_analytics(application, db_path=analytics_db)
+
+    # Register blueprints
+    register_blueprints(application)
+
+    # Database lifecycle (per-request)
+    @application.teardown_appcontext
+    def close_db(exception=None):
+        database = g.pop("db", None)
+        if database is not None:
+            database.close()
+
+    # Error handlers
+    @application.errorhandler(404)
+    def not_found(e):
+        return render_template("errors/404.html"), 404
+
+    @application.errorhandler(500)
+    def server_error(e):
+        return render_template("errors/500.html"), 500
+
+    # Request timing
+    @application.before_request
+    def start_timer():
+        g.start_time = time.time()
+
+    @application.after_request
+    def log_request(response):
+        if hasattr(g, "start_time"):
+            duration = (time.time() - g.start_time) * 1000
+            logger = logging.getLogger("webui")
+            logger.info("%s %s %s %.1fms", request.method, request.path,
+                        response.status_code, duration)
+        return response
+
+    return application
 
 
-# ── Sources & False Positives ────────────────────────────────────────────
-
-
-@app.route("/sources")
-@admin_required
-def sources_page():
-    data = get_source_comparison(db())
-    return render_template("sources.html", data=data)
-
-
-@app.route("/false-positives")
-@admin_required
-def false_positives_page():
-    data = get_false_positive_profile(db())
-    return render_template("false_positives.html", data=data)
-
-
-@app.route("/api/sources")
-@admin_required
-def api_sources():
-    data = get_source_comparison(db())
-    return jsonify(data)
-
-
-@app.route("/api/false-positives")
-@admin_required
-def api_false_positives():
-    data = get_false_positive_profile(db())
-    return jsonify(data)
-
-
-# ── Citation Influence & BCP ─────────────────────────────────────────────
-
-
-@app.route("/api/citations/influence")
-@admin_required
-def api_citation_influence():
-    return jsonify(get_citation_influence(db()))
-
-
-@app.route("/api/citations/bcp")
-@admin_required
-def api_bcp_analysis():
-    return jsonify(get_bcp_analysis(db()))
-
-
-# ── Idea Analysis ────────────────────────────────────────────────────────
-
-
-@app.route("/idea-analysis")
-@admin_required
-def idea_analysis():
-    data = get_idea_analysis(db())
-    return render_template("idea_analysis.html", data=data)
-
-
-@app.route("/api/idea-analysis")
-@admin_required
-def api_idea_analysis():
-    data = get_idea_analysis(db())
-    return jsonify(data)
-
-
-# ── Trends & Complexity ──────────────────────────────────────────────────
-
-
-@app.route("/trends")
-@admin_required
-def trends():
-    data = get_trends_data(db())
-    return render_template("trends_analysis.html", data=data)
-
-
-@app.route("/complexity")
-@admin_required
-def complexity():
-    data = get_complexity_data(db())
-    return render_template("complexity.html", data=data)
-
-
-@app.route("/api/trends")
-@admin_required
-def api_trends():
-    return jsonify(get_trends_data(db()))
-
-
-@app.route("/api/complexity")
-@admin_required
-def api_complexity():
-    return jsonify(get_complexity_data(db()))
-
-
-# ── Proposals (dev-only) ────────────────────────────────────────────────
-
-
-@app.route("/proposals")
-@admin_required
-def proposals():
-    proposal_list = get_all_proposals(db())
-    gap_list = get_all_gaps(db())
-    return render_template("proposals.html", proposals=proposal_list, gaps=gap_list)
-
-
-@app.route("/proposals/new", methods=["GET", "POST"])
-@admin_required
-def proposal_new():
-    if request.method == "POST":
-        data = request.form
-        slug = data.get("slug", "").strip()
-        if not slug:
-            import re
-            slug = re.sub(r'[^a-z0-9]+', '-', data["title"].lower()).strip('-')
-        gap_ids = [int(g) for g in request.form.getlist("gap_ids") if g]
-        proposal = {
-            "title": data["title"],
-            "slug": slug,
-            "status": data.get("status", "idea"),
-            "description": data.get("description", ""),
-            "content_md": data.get("content_md", ""),
-            "source_paper": data.get("source_paper", ""),
-            "source_url": data.get("source_url", ""),
-            "intended_wg": data.get("intended_wg", ""),
-            "draft_name": data.get("draft_name", ""),
-            "gap_ids": gap_ids,
-        }
-        pid = db().upsert_proposal(proposal)
-        return redirect(url_for("proposal_detail", proposal_id=pid))
-    gap_list = get_all_gaps(db())
-    return render_template("proposal_edit.html", proposal=None, gaps=gap_list)
-
-
-@app.route("/proposals/<int:proposal_id>")
-@admin_required
-def proposal_detail(proposal_id):
-    proposal = get_proposal_detail(db(), proposal_id)
-    if not proposal:
-        abort(404)
-    return render_template("proposal_detail.html", proposal=proposal)
-
-
-@app.route("/proposals/<int:proposal_id>/edit", methods=["GET", "POST"])
-@admin_required
-def proposal_edit(proposal_id):
-    if request.method == "POST":
-        data = request.form
-        slug = data.get("slug", "").strip()
-        if not slug:
-            import re
-            slug = re.sub(r'[^a-z0-9]+', '-', data["title"].lower()).strip('-')
-        gap_ids = [int(g) for g in request.form.getlist("gap_ids") if g]
-        proposal = {
-            "id": proposal_id,
-            "title": data["title"],
-            "slug": slug,
-            "status": data.get("status", "idea"),
-            "description": data.get("description", ""),
-            "content_md": data.get("content_md", ""),
-            "source_paper": data.get("source_paper", ""),
-            "source_url": data.get("source_url", ""),
-            "intended_wg": data.get("intended_wg", ""),
-            "draft_name": data.get("draft_name", ""),
-            "gap_ids": gap_ids,
-        }
-        db().upsert_proposal(proposal)
-        return redirect(url_for("proposal_detail", proposal_id=proposal_id))
-    proposal = get_proposal_detail(db(), proposal_id)
-    if not proposal:
-        abort(404)
-    gap_list = get_all_gaps(db())
-    return render_template("proposal_edit.html", proposal=proposal, gaps=gap_list)
-
-
-@app.route("/proposals/<int:proposal_id>/delete", methods=["POST"])
-@admin_required
-def proposal_delete(proposal_id):
-    db().delete_proposal(proposal_id)
-    return redirect(url_for("proposals"))
-
-
-@app.route("/api/proposals")
-@admin_required
-def api_proposals():
-    data = get_all_proposals(db())
-    return jsonify(data)
-
-
-@app.route("/api/proposals/<int:proposal_id>")
-@admin_required
-def api_proposal_detail(proposal_id):
-    p = get_proposal_detail(db(), proposal_id)
-    if not p:
-        return jsonify({"error": "Proposal not found"}), 404
-    return jsonify(p)
-
-
-@app.route("/proposals/intake", methods=["GET", "POST"])
-@admin_required
-def proposal_intake():
-    """Paste text/URLs → Claude generates proposals automatically."""
-    if request.method == "POST":
-        raw_input = request.form.get("input_text", "").strip()
-        if not raw_input:
-            return jsonify({"error": "No input provided"}), 400
-
-        try:
-            from ietf_analyzer.config import Config
-            from ietf_analyzer.proposal_intake import ProposalIntake
-
-            cfg = Config.load()
-            intake = ProposalIntake(cfg, db())
-            proposals, usage = intake.process(raw_input, cheap=True)
-
-            return jsonify({
-                "success": True,
-                "count": len(proposals),
-                "proposals": [
-                    {"id": p.get("id"), "title": p.get("title"), "slug": p.get("slug"),
-                     "gap_ids": p.get("gap_ids", []), "description": p.get("description", ""),
-                     "content_md": p.get("content_md", ""),
-                     "intended_wg": p.get("intended_wg", ""), "draft_name": p.get("draft_name", ""),
-                     "source_paper": p.get("source_paper", ""), "source_url": p.get("source_url", "")}
-                    for p in proposals
-                ],
-                "usage": usage,
-            })
-        except Exception as e:
-            return jsonify({"error": str(e)}), 500
-
-    return render_template("proposal_intake.html")
+# Module-level app instance for backward compatibility (import from webui.app import app)
+app = create_app(dev=False)
 
 
 if __name__ == "__main__":
@@ -946,7 +92,7 @@ if __name__ == "__main__":
     parser.add_argument("--port", type=int, default=5000)
     args = parser.parse_args()
 
-    init_auth(app, dev=args.dev)
+    app = create_app(dev=args.dev)
 
     mode = "\033[33mDEV\033[0m (admin enabled)" if args.dev else "\033[32mPRODUCTION\033[0m (admin disabled)"
     print(f"Starting IETF Draft Analyzer — {mode}")
diff --git a/src/webui/blueprints/__init__.py b/src/webui/blueprints/__init__.py
new file mode 100644
index 0000000..f7d42fb
--- /dev/null
+++ b/src/webui/blueprints/__init__.py
@@ -0,0 +1,15 @@
+"""Flask blueprints for the IETF Draft Analyzer web UI."""
+from __future__ import annotations
+
+from flask import Flask
+
+from webui.blueprints.pages import pages_bp
+from webui.blueprints.api import api_bp
+from webui.blueprints.admin import admin_bp
+
+
+def register_blueprints(app: Flask) -> None:
+    """Register all blueprints with the Flask app."""
+    app.register_blueprint(pages_bp)
+    app.register_blueprint(api_bp)
+    app.register_blueprint(admin_bp)
diff --git a/src/webui/blueprints/admin.py b/src/webui/blueprints/admin.py
new file mode 100644
index 0000000..de02bf7
--- /dev/null
+++ b/src/webui/blueprints/admin.py
@@ -0,0 +1,562 @@
+"""Admin-only routes (require @admin_required)."""
+from __future__ import annotations
+
+import functools
+import time
+from collections import defaultdict
+from pathlib import Path
+
+from flask import Blueprint, render_template, request, jsonify, abort, g, Response, redirect, url_for
+
+from webui.auth import admin_required
+from webui.analytics import get_analytics_data
+from webui.obsidian_export import build_obsidian_vault
+from webui.data import (
+    get_db,
+    get_overview_stats,
+    get_rating_distributions,
+    get_all_gaps,
+    get_gap_detail,
+    get_generated_drafts,
+    read_generated_draft,
+    get_monitor_status,
+    get_landscape_tsne,
+    get_similarity_graph,
+    get_comparison_data,
+    get_ask_search,
+    get_ask_synthesize,
+    get_source_comparison,
+    get_false_positive_profile,
+    get_citation_influence,
+    get_bcp_analysis,
+    get_idea_analysis,
+    get_trends_data,
+    get_complexity_data,
+    get_all_proposals,
+    get_proposal_detail,
+    get_proposals_for_gap,
+)
+
+admin_bp = Blueprint("admin", __name__)
+
+_project_root = Path(__file__).resolve().parent.parent.parent.parent
+
+# --- Rate limiting for Claude-calling endpoints ---
+
+_rate_limit_store: dict[str, list[float]] = defaultdict(list)
+_RATE_LIMIT_MAX = 10  # max requests
+_RATE_LIMIT_WINDOW = 60  # per 60 seconds
+
+
+def rate_limit(f):
+    """Simple in-memory rate limiter: max 10 requests per minute per IP."""
+    @functools.wraps(f)
+    def wrapper(*args, **kwargs):
+        ip = request.remote_addr or "unknown"
+        now = time.time()
+        timestamps = _rate_limit_store[ip]
+        _rate_limit_store[ip] = [t for t in timestamps if now - t < _RATE_LIMIT_WINDOW]
+        if len(_rate_limit_store[ip]) >= _RATE_LIMIT_MAX:
+            return jsonify({"error": "Rate limit exceeded. Try again later."}), 429
+        _rate_limit_store[ip].append(now)
+        return f(*args, **kwargs)
+    return wrapper
+
+
+def db():
+    if "db" not in g:
+        g.db = get_db()
+    return g.db
+
+
+# ── Gap pages ────────────────────────────────────────────────────────────
+
+@admin_bp.route("/gaps")
+@admin_required
+def gaps():
+    gap_list = get_all_gaps(db())
+    generated = get_generated_drafts()
+    return render_template("gaps.html", gaps=gap_list, generated_drafts=generated)
+
+
+@admin_bp.route("/gaps/demo")
+@admin_required
+def gaps_demo():
+    """Show a pre-generated example draft so users can see output without API calls."""
+    generated = get_generated_drafts()
+    selected = request.args.get("file", "")
+    draft_text = None
+    draft_info = None
+    if selected:
+        draft_text = read_generated_draft(selected)
+        for gd in generated:
+            if gd["filename"] == selected:
+                draft_info = gd
+                break
+    elif generated:
+        draft_info = generated[0]
+        draft_text = read_generated_draft(draft_info["filename"])
+    return render_template(
+        "gap_demo.html",
+        generated_drafts=generated,
+        draft_text=draft_text,
+        draft_info=draft_info,
+        selected=selected,
+    )
+
+
+@admin_bp.route("/gaps/<int:gap_id>")
+@admin_required
+def gap_detail(gap_id: int):
+    gap = get_gap_detail(db(), gap_id)
+    if not gap:
+        abort(404)
+    generated = get_generated_drafts()
+    gap_proposals = get_proposals_for_gap(db(), gap_id)
+    return render_template("gap_detail.html", gap=gap, generated_drafts=generated, proposals=gap_proposals)
+
+
+@admin_bp.route("/gaps/<int:gap_id>/generate", methods=["POST"])
+@admin_required
+def gap_generate(gap_id: int):
+    """Trigger draft generation for a gap. Returns JSON with the generated text."""
+    gap = get_gap_detail(db(), gap_id)
+    if not gap:
+        return jsonify({"error": "Gap not found"}), 404
+
+    try:
+        from ietf_analyzer.config import Config
+        from ietf_analyzer.analyzer import Analyzer
+        from ietf_analyzer.draftgen import DraftGenerator
+
+        cfg = Config.load()
+        database = db()
+        analyzer = Analyzer(cfg, database)
+        generator = DraftGenerator(cfg, database, analyzer)
+
+        slug = gap["topic"].lower().replace(" ", "-")[:40]
+        output_path = str(Path(_project_root) / "data" / "reports" / "generated-drafts" / f"draft-gap-{gap_id}-{slug}.txt")
+        path = generator.generate(gap["topic"], output_path=output_path)
+        draft_text = Path(path).read_text(errors="replace")
+
+        return jsonify({
+            "success": True,
+            "text": draft_text,
+            "filename": Path(path).name,
+            "path": path,
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+@admin_bp.route("/api/gaps")
+@admin_required
+def api_gaps():
+    from webui.blueprints.api import _to_csv_response
+    data = get_all_gaps(db())
+    if request.args.get("format") == "csv":
+        return _to_csv_response(data, "gaps.csv")
+    return jsonify(data)
+
+
+@admin_bp.route("/api/gaps/<int:gap_id>")
+@admin_required
+def api_gap_detail(gap_id: int):
+    gap = get_gap_detail(db(), gap_id)
+    if not gap:
+        return jsonify({"error": "Gap not found"}), 404
+    return jsonify(gap)
+
+
+# ── Monitor ──────────────────────────────────────────────────────────────
+
+@admin_bp.route("/monitor")
+@admin_required
+def monitor_page():
+    status = get_monitor_status(db())
+    return render_template("monitor.html", status=status)
+
+
+@admin_bp.route("/api/monitor")
+@admin_required
+def api_monitor():
+    data = get_monitor_status(db())
+    return jsonify(data)
+
+
+# ── Analytics ────────────────────────────────────────────────────────────
+
+@admin_bp.route("/admin/analytics")
+@admin_required
+def analytics_dashboard():
+    analytics_db = str(_project_root / "data" / "analytics.db")
+    data = get_analytics_data(analytics_db)
+    return render_template("analytics.html", data=data)
+
+
+# ── Landscape & Similarity ───────────────────────────────────────────────
+
+@admin_bp.route("/landscape")
+@admin_required
+def landscape():
+    distributions = get_rating_distributions(db())
+    tsne_data = get_landscape_tsne(db())
+    return render_template(
+        "landscape.html",
+        dist=distributions,
+        tsne_data=tsne_data,
+    )
+
+
+@admin_bp.route("/api/landscape")
+@admin_required
+def api_landscape():
+    from webui.blueprints.api import _to_csv_response
+    data = get_landscape_tsne(db())
+    if request.args.get("format") == "csv":
+        return _to_csv_response(data, "landscape.csv")
+    return jsonify(data)
+
+
+@admin_bp.route("/similarity")
+@admin_required
+def similarity():
+    network = get_similarity_graph(db())
+    return render_template("similarity.html", network=network)
+
+
+@admin_bp.route("/api/similarity")
+@admin_required
+def api_similarity():
+    data = get_similarity_graph(db())
+    return jsonify(data)
+
+
+# ── Compare ──────────────────────────────────────────────────────────────
+
+@admin_bp.route("/compare")
+@admin_required
+def compare_page():
+    draft_names = request.args.get("drafts", "")
+    names = [n.strip() for n in draft_names.split(",") if n.strip()] if draft_names else []
+    data = None
+    if len(names) >= 2:
+        data = get_comparison_data(db(), names)
+    return render_template("comparison.html", names=names, data=data)
+
+
+@admin_bp.route("/api/compare", methods=["POST"])
+@admin_required
+@rate_limit
+def api_compare():
+    """Run Claude comparison for drafts. Returns JSON with comparison text."""
+    req_data = request.get_json(force=True, silent=True)
+    if not req_data or "drafts" not in req_data:
+        return jsonify({"error": "Missing 'drafts' in request body"}), 400
+
+    names = req_data["drafts"]
+    if len(names) < 2:
+        return jsonify({"error": "Need at least 2 drafts to compare"}), 400
+
+    try:
+        from ietf_analyzer.config import Config
+        from ietf_analyzer.analyzer import Analyzer
+
+        cfg = Config.load()
+        database = db()
+        analyzer = Analyzer(cfg, database)
+        result = analyzer.compare_drafts(names)
+        return jsonify(result)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+# ── Annotations ──────────────────────────────────────────────────────────
+
+@admin_bp.route("/api/drafts/<string:name>/annotate", methods=["POST"])
+@admin_required
+def api_annotate(name: str):
+    """Add or update annotation for a draft."""
+    import json as _json
+    database = db()
+    draft = database.get_draft(name)
+    if not draft:
+        return jsonify({"error": "Draft not found"}), 404
+
+    data = request.get_json(force=True, silent=True)
+    if not data:
+        return jsonify({"error": "Invalid JSON body"}), 400
+
+    note = data.get("note")
+    tags = data.get("tags")
+    add_tag = data.get("add_tag")
+    remove_tag = data.get("remove_tag")
+
+    if add_tag or remove_tag:
+        existing = database.get_annotation(name)
+        current_tags = existing["tags"] if existing else []
+        if add_tag and add_tag not in current_tags:
+            current_tags.append(add_tag)
+        if remove_tag and remove_tag in current_tags:
+            current_tags.remove(remove_tag)
+        tags = current_tags
+
+    database.upsert_annotation(name, note=note, tags=tags)
+    annotation = database.get_annotation(name)
+    return jsonify({"success": True, "annotation": annotation})
+
+
+# ── Ask/Synthesize (Claude-powered) ──────────────────────────────────────
+
+@admin_bp.route("/api/ask/synthesize", methods=["POST"])
+@admin_required
+@rate_limit
+def api_ask_synthesize():
+    """Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
+    data = request.get_json(force=True, silent=True)
+    if not data or "question" not in data:
+        return jsonify({"error": "Missing 'question' in request body"}), 400
+    question = data["question"]
+    top_k = data.get("top_k", 5)
+    result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
+    return jsonify(result)
+
+
+# ── Sources & False Positives ────────────────────────────────────────────
+
+@admin_bp.route("/sources")
+@admin_required
+def sources_page():
+    data = get_source_comparison(db())
+    return render_template("sources.html", data=data)
+
+
+@admin_bp.route("/false-positives")
+@admin_required
+def false_positives_page():
+    data = get_false_positive_profile(db())
+    return render_template("false_positives.html", data=data)
+
+
+@admin_bp.route("/api/sources")
+@admin_required
+def api_sources():
+    data = get_source_comparison(db())
+    return jsonify(data)
+
+
+@admin_bp.route("/api/false-positives")
+@admin_required
+def api_false_positives():
+    data = get_false_positive_profile(db())
+    return jsonify(data)
+
+
+# ── Citation Influence & BCP ─────────────────────────────────────────────
+
+@admin_bp.route("/api/citations/influence")
+@admin_required
+def api_citation_influence():
+    return jsonify(get_citation_influence(db()))
+
+
+@admin_bp.route("/api/citations/bcp")
+@admin_required
+def api_bcp_analysis():
+    return jsonify(get_bcp_analysis(db()))
+
+
+# ── Idea Analysis ────────────────────────────────────────────────────────
+
+@admin_bp.route("/idea-analysis")
+@admin_required
+def idea_analysis():
+    data = get_idea_analysis(db())
+    return render_template("idea_analysis.html", data=data)
+
+
+@admin_bp.route("/api/idea-analysis")
+@admin_required
+def api_idea_analysis():
+    data = get_idea_analysis(db())
+    return jsonify(data)
+
+
+# ── Trends & Complexity ──────────────────────────────────────────────────
+
+@admin_bp.route("/trends")
+@admin_required
+def trends():
+    data = get_trends_data(db())
+    return render_template("trends_analysis.html", data=data)
+
+
+@admin_bp.route("/complexity")
+@admin_required
+def complexity():
+    data = get_complexity_data(db())
+    return render_template("complexity.html", data=data)
+
+
+@admin_bp.route("/api/trends")
+@admin_required
+def api_trends():
+    return jsonify(get_trends_data(db()))
+
+
+@admin_bp.route("/api/complexity")
+@admin_required
+def api_complexity():
+    return jsonify(get_complexity_data(db()))
+
+
+# ── Proposals ────────────────────────────────────────────────────────────
+
+@admin_bp.route("/proposals")
+@admin_required
+def proposals():
+    proposal_list = get_all_proposals(db())
+    gap_list = get_all_gaps(db())
+    return render_template("proposals.html", proposals=proposal_list, gaps=gap_list)
+
+
+@admin_bp.route("/proposals/new", methods=["GET", "POST"])
+@admin_required
+def proposal_new():
+    if request.method == "POST":
+        data = request.form
+        slug = data.get("slug", "").strip()
+        if not slug:
+            import re
+            slug = re.sub(r'[^a-z0-9]+', '-', data["title"].lower()).strip('-')
+        gap_ids = [int(g_val) for g_val in request.form.getlist("gap_ids") if g_val]
+        proposal = {
+            "title": data["title"],
+            "slug": slug,
+            "status": data.get("status", "idea"),
+            "description": data.get("description", ""),
+            "content_md": data.get("content_md", ""),
+            "source_paper": data.get("source_paper", ""),
+            "source_url": data.get("source_url", ""),
+            "intended_wg": data.get("intended_wg", ""),
+            "draft_name": data.get("draft_name", ""),
+            "gap_ids": gap_ids,
+        }
+        pid = db().upsert_proposal(proposal)
+        return redirect(url_for("admin.proposal_detail", proposal_id=pid))
+    gap_list = get_all_gaps(db())
+    return render_template("proposal_edit.html", proposal=None, gaps=gap_list)
+
+
+@admin_bp.route("/proposals/<int:proposal_id>")
+@admin_required
+def proposal_detail(proposal_id):
+    proposal = get_proposal_detail(db(), proposal_id)
+    if not proposal:
+        abort(404)
+    return render_template("proposal_detail.html", proposal=proposal)
+
+
+@admin_bp.route("/proposals/<int:proposal_id>/edit", methods=["GET", "POST"])
+@admin_required
+def proposal_edit(proposal_id):
+    if request.method == "POST":
+        data = request.form
+        slug = data.get("slug", "").strip()
+        if not slug:
+            import re
+            slug = re.sub(r'[^a-z0-9]+', '-', data["title"].lower()).strip('-')
+        gap_ids = [int(g_val) for g_val in request.form.getlist("gap_ids") if g_val]
+        proposal = {
+            "id": proposal_id,
+            "title": data["title"],
+            "slug": slug,
+            "status": data.get("status", "idea"),
+            "description": data.get("description", ""),
+            "content_md": data.get("content_md", ""),
+            "source_paper": data.get("source_paper", ""),
+            "source_url": data.get("source_url", ""),
+            "intended_wg": data.get("intended_wg", ""),
+            "draft_name": data.get("draft_name", ""),
+            "gap_ids": gap_ids,
+        }
+        db().upsert_proposal(proposal)
+        return redirect(url_for("admin.proposal_detail", proposal_id=proposal_id))
+    proposal = get_proposal_detail(db(), proposal_id)
+    if not proposal:
+        abort(404)
+    gap_list = get_all_gaps(db())
+    return render_template("proposal_edit.html", proposal=proposal, gaps=gap_list)
+
+
+@admin_bp.route("/proposals/<int:proposal_id>/delete", methods=["POST"])
+@admin_required
+def proposal_delete(proposal_id):
+    db().delete_proposal(proposal_id)
+    return redirect(url_for("admin.proposals"))
+
+
+@admin_bp.route("/api/proposals")
+@admin_required
+def api_proposals():
+    data = get_all_proposals(db())
+    return jsonify(data)
+
+
+@admin_bp.route("/api/proposals/<int:proposal_id>")
+@admin_required
+def api_proposal_detail(proposal_id):
+    p = get_proposal_detail(db(), proposal_id)
+    if not p:
+        return jsonify({"error": "Proposal not found"}), 404
+    return jsonify(p)
+
+
+@admin_bp.route("/proposals/intake", methods=["GET", "POST"])
+@admin_required
+def proposal_intake():
+    """Paste text/URLs -> Claude generates proposals automatically."""
+    if request.method == "POST":
+        raw_input = request.form.get("input_text", "").strip()
+        if not raw_input:
+            return jsonify({"error": "No input provided"}), 400
+
+        try:
+            from ietf_analyzer.config import Config
+            from ietf_analyzer.proposal_intake import ProposalIntake
+
+            cfg = Config.load()
+            intake = ProposalIntake(cfg, db())
+            proposals_result, usage = intake.process(raw_input, cheap=True)
+
+            return jsonify({
+                "success": True,
+                "count": len(proposals_result),
+                "proposals": [
+                    {"id": p.get("id"), "title": p.get("title"), "slug": p.get("slug"),
+                     "gap_ids": p.get("gap_ids", []), "description": p.get("description", ""),
+                     "content_md": p.get("content_md", ""),
+                     "intended_wg": p.get("intended_wg", ""), "draft_name": p.get("draft_name", ""),
+                     "source_paper": p.get("source_paper", ""), "source_url": p.get("source_url", "")}
+                    for p in proposals_result
+                ],
+                "usage": usage,
+            })
+        except Exception as e:
+            return jsonify({"error": str(e)}), 500
+
+    return render_template("proposal_intake.html")
+
+
+# ── Obsidian Export ──────────────────────────────────────────────────────
+
+@admin_bp.route("/export/obsidian")
+@admin_required
+def export_obsidian():
+    """Download the entire research corpus as an Obsidian vault (ZIP)."""
+    data = build_obsidian_vault(db())
+    return Response(
+        data,
+        mimetype="application/zip",
+        headers={"Content-Disposition": "attachment; filename=IETF-AI-Agent-Drafts.zip"},
+    )
diff --git a/src/webui/blueprints/api.py b/src/webui/blueprints/api.py
new file mode 100644
index 0000000..96f51d8
--- /dev/null
+++ b/src/webui/blueprints/api.py
@@ -0,0 +1,180 @@
+"""Public API endpoints (JSON responses)."""
+from __future__ import annotations
+
+import csv
+import io
+import json
+
+from flask import Blueprint, request, jsonify, g, Response
+
+from webui.data import (
+    get_db,
+    get_overview_stats,
+    get_drafts_page,
+    get_draft_detail,
+    get_rating_distributions,
+    get_timeline_data,
+    get_ideas_by_type,
+    get_category_counts,
+    get_author_network_full,
+    get_citation_graph,
+    get_idea_clusters,
+    global_search,
+    get_architecture,
+    get_ask_search,
+)
+
+api_bp = Blueprint("api", __name__)
+
+
+def db():
+    if "db" not in g:
+        g.db = get_db()
+    return g.db
+
+
+def _to_csv_response(rows: list[dict], filename: str = "export.csv") -> Response:
+    """Convert a list of dicts to a CSV download response."""
+    if not rows:
+        return Response("", mimetype="text/csv",
+                        headers={"Content-Disposition": f"attachment; filename={filename}"})
+    si = io.StringIO()
+    writer = csv.DictWriter(si, fieldnames=rows[0].keys())
+    writer.writeheader()
+    for row in rows:
+        flat = {}
+        for k, v in row.items():
+            if isinstance(v, (list, dict)):
+                flat[k] = json.dumps(v)
+            else:
+                flat[k] = v
+        writer.writerow(flat)
+    return Response(si.getvalue(), mimetype="text/csv",
+                    headers={"Content-Disposition": f"attachment; filename={filename}"})
+
+
+def _results_to_csv(results: dict) -> Response:
+    """Convert global search results (multi-category) to a single CSV."""
+    rows = []
+    for category, items in results.items():
+        for item in items:
+            row = {"_category": category}
+            row.update(item)
+            rows.append(row)
+    return _to_csv_response(rows, "search_results.csv")
+
+
+@api_bp.route("/api/drafts")
+def api_drafts():
+    page = request.args.get("page", 1, type=int)
+    search = request.args.get("q", "")
+    category = request.args.get("cat", "")
+    source = request.args.get("source", "")
+    min_score = request.args.get("min_score", 0.0, type=float)
+    sort = request.args.get("sort", "score")
+    sort_dir = request.args.get("dir", "desc")
+    data = get_drafts_page(db(), page=page, search=search, category=category,
+                           min_score=min_score, sort=sort, sort_dir=sort_dir,
+                           source=source)
+    if request.args.get("format") == "csv":
+        return _to_csv_response(data.get("drafts", []), "drafts.csv")
+    return jsonify(data)
+
+
+@api_bp.route("/api/stats")
+def api_stats():
+    return jsonify(get_overview_stats(db()))
+
+
+@api_bp.route("/api/authors/network")
+def api_author_network():
+    return jsonify(get_author_network_full(db()))
+
+
+@api_bp.route("/api/citations")
+def api_citations():
+    min_refs = request.args.get("min_refs", 2, type=int)
+    return jsonify(get_citation_graph(db(), min_refs=min_refs))
+
+
+@api_bp.route("/api/search")
+def api_search():
+    q = request.args.get("q", "").strip()
+    results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
+    if request.args.get("format") == "csv":
+        return _results_to_csv(results)
+    return jsonify(results)
+
+
+@api_bp.route("/api/ideas")
+def api_ideas():
+    data = get_ideas_by_type(db())
+    if request.args.get("format") == "csv":
+        return _to_csv_response(data.get("ideas", []), "ideas.csv")
+    return jsonify(data)
+
+
+@api_bp.route("/api/ratings")
+def api_ratings():
+    data = get_rating_distributions(db())
+    if request.args.get("format") == "csv":
+        rows = []
+        for i in range(len(data.get("names", []))):
+            rows.append({
+                "name": data["names"][i],
+                "score": data["scores"][i],
+                "novelty": data["novelty"][i],
+                "maturity": data["maturity"][i],
+                "overlap": data["overlap"][i],
+                "momentum": data["momentum"][i],
+                "relevance": data["relevance"][i],
+                "category": data["categories"][i],
+            })
+        return _to_csv_response(rows, "ratings.csv")
+    return jsonify(data)
+
+
+@api_bp.route("/api/timeline")
+def api_timeline():
+    data = get_timeline_data(db())
+    return jsonify(data)
+
+
+@api_bp.route("/api/idea-clusters")
+def api_idea_clusters():
+    data = get_idea_clusters(db())
+    return jsonify(data)
+
+
+@api_bp.route("/api/categories")
+def api_categories():
+    data = get_category_counts(db())
+    if request.args.get("format") == "csv":
+        rows = [{"category": k, "count": v} for k, v in data.items()]
+        return _to_csv_response(rows, "categories.csv")
+    return jsonify(data)
+
+
+@api_bp.route("/api/drafts/<string:name>")
+def api_draft_detail(name: str):
+    detail = get_draft_detail(db(), name)
+    if not detail:
+        return jsonify({"error": "Draft not found"}), 404
+    return jsonify(detail)
+
+
+@api_bp.route("/api/architecture")
+def api_architecture():
+    return jsonify(get_architecture(db()))
+
+
+@api_bp.route("/api/ask", methods=["POST"])
+def api_ask():
+    """Search only (free). Returns JSON with sources + cached answer if available."""
+    data = request.get_json(force=True, silent=True)
+    if not data or "question" not in data:
+        return jsonify({"error": "Missing 'question' in request body"}), 400
+    question = data["question"]
+    top_k = data.get("top_k", 5)
+    result = get_ask_search(db(), question, top_k=top_k)
+    return jsonify(result)
diff --git a/src/webui/blueprints/pages.py b/src/webui/blueprints/pages.py
new file mode 100644
index 0000000..45636d2
--- /dev/null
+++ b/src/webui/blueprints/pages.py
@@ -0,0 +1,206 @@
+"""Public page routes (no admin required)."""
+from __future__ import annotations
+
+from flask import Blueprint, render_template, request, abort, g
+
+from webui.data import (
+    get_db,
+    get_overview_stats,
+    get_category_counts,
+    get_drafts_page,
+    get_draft_detail,
+    get_rating_distributions,
+    get_timeline_data,
+    get_timeline_animation_data,
+    get_ideas_by_type,
+    get_top_authors,
+    get_org_data,
+    get_category_radar_data,
+    get_score_histogram,
+    get_author_network_full,
+    get_cross_org_data,
+    get_citation_graph,
+    get_idea_clusters,
+    get_category_summary,
+    global_search,
+    get_architecture,
+    get_ask_search,
+    get_citation_influence,
+    get_bcp_analysis,
+)
+
+pages_bp = Blueprint("pages", __name__)
+
+
+def db():
+    if "db" not in g:
+        g.db = get_db()
+    return g.db
+
+
+@pages_bp.route("/")
+def overview():
+    stats = get_overview_stats(db())
+    categories = get_category_counts(db())
+    timeline = get_timeline_data(db())
+    scores = get_score_histogram(db())
+    radar = get_category_radar_data(db())
+    return render_template(
+        "overview.html",
+        stats=stats,
+        categories=categories,
+        timeline=timeline,
+        scores=scores,
+        radar=radar,
+    )
+
+
+@pages_bp.route("/drafts")
+def drafts():
+    page = request.args.get("page", 1, type=int)
+    search = request.args.get("q", "")
+    category = request.args.get("cat", "")
+    source = request.args.get("source", "")
+    min_score = request.args.get("min_score", 0.0, type=float)
+    sort = request.args.get("sort", "score")
+    sort_dir = request.args.get("dir", "desc")
+
+    result = get_drafts_page(
+        db(),
+        page=page,
+        search=search,
+        category=category,
+        min_score=min_score,
+        sort=sort,
+        sort_dir=sort_dir,
+        source=source,
+    )
+    categories = get_category_counts(db())
+    cat_summary = get_category_summary(db(), category) if category else None
+    return render_template(
+        "drafts.html",
+        result=result,
+        categories=categories,
+        cat_summary=cat_summary,
+        search=search,
+        current_cat=category,
+        current_source=source,
+        min_score=min_score,
+        sort=sort,
+        sort_dir=sort_dir,
+    )
+
+
+@pages_bp.route("/drafts/<string:name>")
+def draft_detail(name: str):
+    database = db()
+    detail = get_draft_detail(database, name)
+    if not detail:
+        abort(404)
+    # Build set of draft ref IDs that exist in our DB for internal linking
+    ref_draft_ids = [r["id"] for r in detail.get("refs", []) if r["type"] == "draft"]
+    known_drafts = set()
+    if ref_draft_ids:
+        placeholders = ",".join("?" * len(ref_draft_ids))
+        rows = database.conn.execute(
+            f"SELECT name FROM drafts WHERE name IN ({placeholders})", ref_draft_ids
+        ).fetchall()
+        known_drafts = {r["name"] for r in rows}
+    return render_template("draft_detail.html", draft=detail, known_drafts=known_drafts)
+
+
+@pages_bp.route("/ideas")
+def ideas():
+    data = get_ideas_by_type(db())
+    return render_template("ideas.html", data=data)
+
+
+@pages_bp.route("/ratings")
+def ratings():
+    distributions = get_rating_distributions(db())
+    radar = get_category_radar_data(db())
+    return render_template(
+        "ratings.html",
+        dist=distributions,
+        radar=radar,
+    )
+
+
+@pages_bp.route("/timeline")
+def timeline_animation():
+    data = get_timeline_animation_data(db())
+    return render_template("timeline.html", animation=data)
+
+
+@pages_bp.route("/idea-clusters")
+def idea_clusters():
+    data = get_idea_clusters(db())
+    return render_template("idea_clusters.html", clusters=data)
+
+
+@pages_bp.route("/architecture")
+def architecture():
+    data = get_architecture(db())
+    return render_template("architecture.html", arch=data)
+
+
+@pages_bp.route("/authors")
+def authors():
+    top = get_top_authors(db(), limit=50)
+    orgs = get_org_data(db(), limit=20)
+    network = get_author_network_full(db())
+    cross_org = get_cross_org_data(db(), limit=20)
+    return render_template(
+        "authors.html",
+        authors=top,
+        orgs=orgs,
+        orgs_data=orgs,
+        network=network,
+        cross_org=cross_org,
+    )
+
+
+@pages_bp.route("/citations")
+def citations():
+    from webui.auth import is_admin as check_admin
+    graph = get_citation_graph(db())
+    influence = get_citation_influence(db()) if check_admin() else None
+    bcp = get_bcp_analysis(db()) if check_admin() else None
+    return render_template("citations.html", graph=graph, influence=influence, bcp=bcp)
+
+
+@pages_bp.route("/about")
+def about():
+    from ietf_analyzer.config import Config
+    cfg = Config.load()
+    stats = get_overview_stats(db())
+    return render_template("about.html", stats=stats, search_keywords=cfg.search_keywords,
+                           fetch_since=cfg.fetch_since)
+
+
+@pages_bp.route("/impressum")
+def impressum():
+    return render_template("impressum.html")
+
+
+@pages_bp.route("/datenschutz")
+def datenschutz():
+    return render_template("datenschutz.html")
+
+
+@pages_bp.route("/search")
+def search():
+    q = request.args.get("q", "").strip()
+    results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
+    total = sum(len(v) for v in results.values())
+    return render_template("search_results.html", query=q, results=results, total=total)
+
+
+@pages_bp.route("/ask")
+def ask_page():
+    question = request.args.get("q", "")
+    result = None
+    if question:
+        top_k = request.args.get("top", 5, type=int)
+        result = get_ask_search(db(), question, top_k=top_k)
+    return render_template("ask.html", question=question, result=result)
diff --git a/src/webui/data.py b/src/webui/data.py
deleted file mode 100644
index 6a55eea..0000000
--- a/src/webui/data.py
+++ /dev/null
@@ -1,4254 +0,0 @@
-"""Data access layer for the web dashboard.
-
-Thin wrapper around ietf_analyzer.db.Database that returns plain dicts
-ready for JSON serialization or Jinja2 template rendering.
-"""
-
-from __future__ import annotations
-
-import json
-import re
-import sys
-import time
-from collections import Counter, defaultdict
-from functools import lru_cache
-from pathlib import Path
-from typing import TypedDict
-
-import numpy as np
-from sklearn.cluster import AgglomerativeClustering
-from sklearn.manifold import TSNE
-from sklearn.preprocessing import normalize as sk_normalize
-
-
-# ---------------------------------------------------------------------------
-# TypedDicts for common return shapes
-# ---------------------------------------------------------------------------
-
-class OverviewStats(TypedDict):
-    """High-level dashboard statistics from :func:`get_overview_stats`."""
-    total_drafts: int
-    rated_count: int
-    author_count: int
-    idea_count: int
-    gap_count: int
-    input_tokens: int
-    output_tokens: int
-    false_positive_count: int
-
-
-class DraftListItem(TypedDict):
-    """Single draft in the paginated listing from :func:`get_drafts_page`."""
-    name: str
-    title: str
-    date: str | None
-    url: str
-    pages: int
-    group: str
-    source: str
-    score: float
-    novelty: float
-    maturity: float
-    overlap: float
-    momentum: float
-    relevance: float
-    categories: list[str]
-    summary: str
-    readiness: float
-
-
-class DraftsPage(TypedDict):
-    """Paginated draft listing from :func:`get_drafts_page`."""
-    drafts: list[DraftListItem]
-    total: int
-    page: int
-    per_page: int
-    pages: int
-
-
-class AuthorInfo(TypedDict):
-    """Author entry from :func:`get_top_authors`."""
-    name: str
-    affiliation: str
-    draft_count: int
-    drafts: list[str]
-
-
-class AuthorNetworkNode(TypedDict):
-    """Node in the author network graph."""
-    id: str
-    name: str
-    org: str
-    draft_count: int
-    avg_score: float
-    drafts: list[str]
-
-
-class AuthorNetworkEdge(TypedDict):
-    """Edge in the author network graph."""
-    source: str
-    target: str
-    weight: int
-
-
-class AuthorCluster(TypedDict):
-    """Cluster in the author network."""
-    id: int
-    members: list[str]
-    org_mix: dict[str, int]
-    size: int
-    drafts: list[dict[str, str]]
-    draft_count: int
-
-
-class AuthorNetwork(TypedDict):
-    """Full author network from :func:`get_author_network_full`."""
-    nodes: list[AuthorNetworkNode]
-    edges: list[AuthorNetworkEdge]
-    clusters: list[AuthorCluster]
-
-
-class SimilarityGraphStats(TypedDict):
-    """Stats sub-dict in similarity graph."""
-    node_count: int
-    edge_count: int
-    avg_similarity: float
-
-
-class SimilarityGraph(TypedDict):
-    """Draft similarity network from :func:`get_similarity_graph`."""
-    nodes: list[dict]
-    edges: list[dict]
-    stats: SimilarityGraphStats
-
-
-class TimelineData(TypedDict):
-    """Monthly category counts from :func:`get_timeline_data`."""
-    months: list[str]
-    series: dict[str, list[int]]
-    categories: list[str]
-
-
-class MonitorCost(TypedDict):
-    """Cost sub-dict in monitor status."""
-    input_tokens: int
-    output_tokens: int
-    estimated_usd: float
-
-
-class MonitorPipeline(TypedDict):
-    """Pipeline sub-dict in monitor status."""
-    total_drafts: int
-    rated: int
-    embedded: int
-    with_ideas: int
-    idea_total: int
-    gap_count: int
-
-
-class MonitorStatus(TypedDict):
-    """Monitor status from :func:`get_monitor_status`."""
-    last_run: dict | None
-    runs: list[dict]
-    unprocessed: dict[str, int]
-    total_runs: int
-    pipeline: MonitorPipeline
-    cost: MonitorCost
-
-
-class SearchResults(TypedDict):
-    """Global search results from :func:`global_search`."""
-    drafts: list[dict]
-    ideas: list[dict]
-    authors: list[dict]
-    gaps: list[dict]
-
-
-class CitationGraphStats(TypedDict):
-    """Stats sub-dict in citation graph."""
-    node_count: int
-    edge_count: int
-    rfc_count: int
-    draft_count: int
-
-
-class CitationGraph(TypedDict):
-    """Citation network from :func:`get_citation_graph`."""
-    nodes: list[dict]
-    edges: list[dict]
-    stats: CitationGraphStats
-
-# Add project root to path so we can import ietf_analyzer
-_project_root = Path(__file__).resolve().parent.parent.parent
-if str(_project_root) not in sys.path:
-    sys.path.insert(0, str(_project_root / "src"))
-
-from ietf_analyzer.config import Config
-from ietf_analyzer.db import Database
-from ietf_analyzer.readiness import compute_readiness, compute_readiness_batch
-from ietf_analyzer.search import HybridSearch
-
-def _extract_month(time_str: str | None) -> str:
-    """Normalize a date string to YYYY-MM format."""
-    if not time_str:
-        return "unknown"
-    if len(time_str) >= 7 and time_str[4] == '-':
-        return time_str[:7]  # Already YYYY-MM-DD
-    if len(time_str) >= 6 and time_str[:4].isdigit():
-        return time_str[:4] + '-' + time_str[4:6]  # YYYYMMDD → YYYY-MM
-    return time_str[:7]
-
-
-# Simple TTL cache for expensive computations (t-SNE, clustering, similarity)
-_cache: dict[str, tuple[float, object]] = {}
-_CACHE_TTL = 300  # 5 minutes
-
-
-def _cached(key: str, fn, ttl: float = _CACHE_TTL):
-    """Return cached result or compute and cache it."""
-    now = time.monotonic()
-    if key in _cache:
-        ts, val = _cache[key]
-        if now - ts < ttl:
-            return val
-    val = fn()
-    _cache[key] = (now, val)
-    return val
-
-
-def get_db() -> Database:
-    """Get a Database instance using default config."""
-    config = Config.load()
-    return Database(config)
-
-
-def get_overview_stats(db: Database) -> OverviewStats:
-    """Return high-level stats for the dashboard home page.
-
-    Excludes drafts flagged as false positives from rated counts.
-    """
-    total_drafts = db.count_drafts(include_false_positives=False)
-    rated_pairs = db.drafts_with_ratings(limit=1000)  # already excludes FPs
-    rated_count = len(rated_pairs)
-    author_count = db.author_count()
-    idea_count = db.idea_count()
-    gaps = db.all_gaps()
-    input_tok, output_tok = db.total_tokens_used()
-
-    # Count false positives separately for transparency
-    total_all = db.count_drafts(include_false_positives=True)
-    false_positive_count = total_all - total_drafts
-
-    return {
-        "total_drafts": total_drafts,
-        "rated_count": rated_count,
-        "author_count": author_count,
-        "idea_count": idea_count,
-        "gap_count": len(gaps),
-        "input_tokens": input_tok,
-        "output_tokens": output_tok,
-        "false_positive_count": false_positive_count,
-    }
-
-
-def get_category_counts(db: Database) -> dict[str, int]:
-    """Return {category: draft_count} for all categories."""
-    return db.category_counts()
-
-
-def get_category_summary(db: Database, category: str) -> dict | None:
-    """Build a data-driven summary for a category. Returns None if category not found."""
-    pairs = db.drafts_with_ratings(limit=2000)
-    all_authors = db.top_authors(limit=500)
-
-    # Filter to drafts in this category
-    cat_pairs = [(d, r) for d, r in pairs if category in r.categories]
-    if not cat_pairs:
-        return None
-
-    # Author lookup: draft_name -> [author names]
-    author_drafts_map: dict[str, list[str]] = defaultdict(list)
-    for name, aff, cnt, drafts in all_authors:
-        for dn in drafts:
-            author_drafts_map[dn].append(name)
-
-    # Dimension averages
-    n = len(cat_pairs)
-    avg = lambda vals: round(sum(vals) / len(vals), 1) if vals else 0
-    novelty_vals = [r.novelty for _, r in cat_pairs]
-    maturity_vals = [r.maturity for _, r in cat_pairs]
-    overlap_vals = [r.overlap for _, r in cat_pairs]
-    momentum_vals = [r.momentum for _, r in cat_pairs]
-    relevance_vals = [r.relevance for _, r in cat_pairs]
-    scores = [r.composite_score for _, r in cat_pairs]
-
-    # Top drafts
-    sorted_pairs = sorted(cat_pairs, key=lambda p: p[1].composite_score, reverse=True)
-    top_3 = [(d.name, d.title, round(r.composite_score, 1)) for d, r in sorted_pairs[:3]]
-
-    # Top authors in this category
-    author_counter: Counter = Counter()
-    org_counter: Counter = Counter()
-    author_aff: dict[str, str] = {}
-    for name, aff, cnt, drafts in all_authors:
-        author_aff[name] = aff or ""
-    for d, r in cat_pairs:
-        for a in author_drafts_map.get(d.name, []):
-            author_counter[a] += 1
-            if author_aff.get(a):
-                org_counter[author_aff[a]] += 1
-    top_authors = author_counter.most_common(5)
-    top_orgs = org_counter.most_common(5)
-
-    # Strongest and weakest dimensions
-    dim_avgs = {
-        "Novelty": avg(novelty_vals),
-        "Maturity": avg(maturity_vals),
-        "Overlap": avg(overlap_vals),
-        "Momentum": avg(momentum_vals),
-        "Relevance": avg(relevance_vals),
-    }
-    strongest = max(dim_avgs, key=dim_avgs.get)
-    weakest = min(dim_avgs, key=dim_avgs.get)
-
-    # Activity trend: how many are recent (last 6 months)?
-    recent = sum(1 for d, _ in cat_pairs if d.time and d.time >= "2025-09")
-    total_all = len(pairs)
-
-    # Build text summary
-    lines = []
-    lines.append(f"**{n} drafts** ({n * 100 // total_all}% of all rated drafts) "
-                 f"with an average composite score of **{avg(scores):.1f}/5.0**.")
-
-    # Dimension profile
-    lines.append(f"Strongest dimension: **{strongest}** ({dim_avgs[strongest]}), "
-                 f"weakest: **{weakest}** ({dim_avgs[weakest]}).")
-
-    # Maturity vs novelty insight
-    if dim_avgs["Maturity"] < 2.5 and dim_avgs["Novelty"] >= 3.0:
-        lines.append("This category has **high novelty but low maturity** — many early-stage proposals with fresh ideas that haven't been fully developed yet.")
-    elif dim_avgs["Maturity"] >= 3.0 and dim_avgs["Novelty"] < 2.5:
-        lines.append("This category is **mature but less novel** — established approaches being refined rather than introducing fundamentally new concepts.")
-    elif dim_avgs["Maturity"] >= 3.0 and dim_avgs["Novelty"] >= 3.0:
-        lines.append("This category shows **both high novelty and maturity** — well-developed proposals with genuinely new contributions.")
-
-    # Overlap insight
-    if dim_avgs["Overlap"] >= 3.5:
-        lines.append(f"High overlap ({dim_avgs['Overlap']}) suggests **significant duplication** — multiple drafts cover similar ground, which may indicate convergence or fragmentation.")
-    elif dim_avgs["Overlap"] <= 2.0:
-        lines.append(f"Low overlap ({dim_avgs['Overlap']}) indicates **diverse approaches** — drafts in this category tackle distinct problems with little redundancy.")
-
-    # Activity
-    if recent > 0:
-        lines.append(f"**{recent} draft{'s' if recent != 1 else ''}** submitted in the last 6 months, "
-                     f"suggesting {'active' if recent >= 3 else 'moderate'} development.")
-
-    return {
-        "text": " ".join(lines),
-        "count": n,
-        "avg_score": avg(scores),
-        "dimensions": dim_avgs,
-        "top_drafts": top_3,
-        "top_authors": top_authors,
-        "top_orgs": top_orgs,
-        "strongest": strongest,
-        "weakest": weakest,
-    }
-
-
-def get_drafts_page(
-    db: Database,
-    page: int = 1,
-    per_page: int = 50,
-    search: str = "",
-    category: str = "",
-    min_score: float = 0.0,
-    sort: str = "score",
-    sort_dir: str = "desc",
-    source: str = "",
-) -> DraftsPage:
-    """Return a paginated, filtered list of drafts with ratings.
-
-    Returns dict with keys: drafts, total, page, per_page, pages.
-    """
-    pairs = db.drafts_with_ratings(limit=1000)
-
-    # Build author lookup for search (draft_name -> "author1 author2 ...")
-    author_text_by_draft: dict[str, str] = {}
-    if search:
-        rows = db.conn.execute(
-            """SELECT da.draft_name, GROUP_CONCAT(a.name, ' ') as names
-               FROM draft_authors da JOIN authors a ON da.person_id = a.person_id
-               GROUP BY da.draft_name"""
-        ).fetchall()
-        for r in rows:
-            author_text_by_draft[r[0]] = r[1] or ""
-
-    # Filter
-    filtered = []
-    for draft, rating in pairs:
-        if min_score > 0 and rating.composite_score < min_score:
-            continue
-        if category and category not in rating.categories:
-            continue
-        if source and draft.source != source:
-            continue
-        if search:
-            author_names = author_text_by_draft.get(draft.name, "")
-            haystack = f"{draft.name} {draft.title} {rating.summary} {author_names}".lower()
-            if not all(w in haystack for w in search.lower().split()):
-                continue
-        filtered.append((draft, rating))
-
-    # Sort
-    sort_keys = {
-        "score": lambda p: p[1].composite_score,
-        "name": lambda p: p[0].name,
-        "date": lambda p: p[0].time or "",
-        "novelty": lambda p: p[1].novelty,
-        "maturity": lambda p: p[1].maturity,
-        "relevance": lambda p: p[1].relevance,
-        "overlap": lambda p: p[1].overlap,
-        "momentum": lambda p: p[1].momentum,
-        "readiness": lambda p: (1.0 if p[0].name.startswith("draft-ietf-") else 0.0) * 0.25 +
-                                min(int(p[0].rev or "0") / 5.0, 1.0) * 0.15 +
-                                ((p[1].momentum - 1) / 4.0) * 0.15,
-    }
-    key_fn = sort_keys.get(sort, sort_keys["score"])
-    reverse = sort_dir == "desc"
-    filtered.sort(key=key_fn, reverse=reverse)
-
-    total = len(filtered)
-    pages = max(1, (total + per_page - 1) // per_page)
-    page = max(1, min(page, pages))
-    start = (page - 1) * per_page
-    page_items = filtered[start : start + per_page]
-
-    # Pre-compute readiness in batch (~6 queries total instead of ~200)
-
-    readiness_cache = compute_readiness_batch(db, [d.name for d, _ in page_items])
-
-    drafts = []
-    for draft, rating in page_items:
-        r_score = readiness_cache.get(draft.name, {}).get("score", 0)
-        drafts.append({
-            "name": draft.name,
-            "title": draft.title,
-            "date": draft.date,
-            "url": draft.source_url if draft.source != "ietf" else draft.datatracker_url,
-            "pages": draft.pages or 0,
-            "group": draft.group or "individual",
-            "source": draft.source or "ietf",
-            "score": round(rating.composite_score, 2),
-            "novelty": rating.novelty,
-            "maturity": rating.maturity,
-            "overlap": rating.overlap,
-            "momentum": rating.momentum,
-            "relevance": rating.relevance,
-            "categories": rating.categories,
-            "summary": rating.summary,
-            "readiness": r_score,
-        })
-
-    return {
-        "drafts": drafts,
-        "total": total,
-        "page": page,
-        "per_page": per_page,
-        "pages": pages,
-    }
-
-
-def get_draft_detail(db: Database, name: str) -> dict | None:
-    """Return full detail for a single draft."""
-    draft = db.get_draft(name)
-    if not draft:
-        return None
-
-    rating = db.get_rating(name)
-    authors = db.get_authors_for_draft(name)
-    ideas = db.get_ideas_for_draft(name)
-    refs = db.get_refs_for_draft(name)
-
-    result = {
-        "name": draft.name,
-        "title": draft.title,
-        "rev": draft.rev,
-        "abstract": draft.abstract,
-        "date": draft.date,
-        "time": draft.time,
-        "url": draft.datatracker_url,
-        "text_url": draft.text_url,
-        "pages": draft.pages,
-        "words": draft.words,
-        "group": draft.group or "individual",
-        "categories": draft.categories,
-        "tags": draft.tags,
-        "authors": [
-            {"name": a.name, "affiliation": a.affiliation, "person_id": a.person_id}
-            for a in authors
-        ],
-        "ideas": ideas,
-        "refs": [{"type": t, "id": rid} for t, rid in refs],
-    }
-
-    if rating:
-        result["rating"] = {
-            "score": round(rating.composite_score, 2),
-            "novelty": rating.novelty,
-            "maturity": rating.maturity,
-            "overlap": rating.overlap,
-            "momentum": rating.momentum,
-            "relevance": rating.relevance,
-            "summary": rating.summary,
-            "novelty_note": rating.novelty_note,
-            "maturity_note": rating.maturity_note,
-            "overlap_note": rating.overlap_note,
-            "momentum_note": rating.momentum_note,
-            "relevance_note": rating.relevance_note,
-            "categories": rating.categories,
-        }
-
-    # Readiness score
-
-    result["readiness"] = compute_readiness(db, name)
-
-    # Annotation
-    annotation = db.get_annotation(name)
-    result["annotation"] = annotation
-
-    return result
-
-
-def get_rating_distributions(db: Database) -> dict:
-    """Return arrays for each rating dimension, suitable for Plotly."""
-    pairs = db.drafts_with_ratings(limit=1000)
-    dims = {
-        "novelty": [],
-        "maturity": [],
-        "overlap": [],
-        "momentum": [],
-        "relevance": [],
-        "scores": [],
-        "categories": [],
-        "names": [],
-        "sources": [],
-    }
-    for draft, rating in pairs:
-        dims["novelty"].append(rating.novelty)
-        dims["maturity"].append(rating.maturity)
-        dims["overlap"].append(rating.overlap)
-        dims["momentum"].append(rating.momentum)
-        dims["relevance"].append(rating.relevance)
-        dims["scores"].append(round(rating.composite_score, 2))
-        dims["categories"].append(rating.categories[0] if rating.categories else "Other")
-        dims["names"].append(draft.name)
-        dims["sources"].append(getattr(draft, "source", "ietf") or "ietf")
-    return dims
-
-
-def get_timeline_data(db: Database) -> TimelineData:
-    """Return monthly counts by category for timeline chart."""
-    pairs = db.drafts_with_ratings(limit=1000)
-    all_drafts = db.list_drafts(limit=1000, order_by="time ASC")
-    rating_map = {d.name: r for d, r in pairs}
-
-    month_cat: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
-    for d in all_drafts:
-        month = _extract_month(d.time)
-        r = rating_map.get(d.name)
-        if r:
-            cat = r.categories[0] if r.categories else "Other"
-            month_cat[month][cat] += 1
-
-    months = sorted(month_cat.keys())
-    cat_totals: Counter = Counter()
-    for mc in month_cat.values():
-        for c, cnt in mc.items():
-            cat_totals[c] += cnt
-    top_cats = [c for c, _ in cat_totals.most_common(10)]
-
-    series = {}
-    for cat in top_cats:
-        series[cat] = [month_cat[m].get(cat, 0) for m in months]
-
-    return {"months": months, "series": series, "categories": top_cats}
-
-
-def get_ideas_by_type(db: Database) -> dict:
-    """Return ideas grouped by type with counts."""
-    all_ideas = db.all_ideas()
-    type_counts = Counter(i.get("type", "other") or "other" for i in all_ideas)
-    return {
-        "total": len(all_ideas),
-        "by_type": dict(type_counts.most_common()),
-        "ideas": all_ideas,
-    }
-
-
-def get_all_gaps(db: Database) -> list[dict]:
-    """Return all gap analysis results, sorted by severity (critical first)."""
-    _sev_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
-    gaps = db.all_gaps()
-    gaps.sort(key=lambda g: _sev_order.get(g.get("severity", "low"), 99))
-    return gaps
-
-
-def get_gap_detail(db: Database, gap_id: int) -> dict | None:
-    """Return a single gap by ID, or None if not found."""
-    gaps = db.all_gaps()
-    for g in gaps:
-        if g["id"] == gap_id:
-            return g
-    return None
-
-
-def get_generated_drafts() -> list[dict]:
-    """Return list of pre-generated draft files in data/reports/generated-drafts/."""
-    drafts_dir = _project_root / "data" / "reports" / "generated-drafts"
-    if not drafts_dir.exists():
-        return []
-    results = []
-    for f in sorted(drafts_dir.glob("draft-*.txt")):
-        # Extract title from first non-empty content line after header
-        title = f.stem
-        text = f.read_text(errors="replace")
-        for line in text.splitlines():
-            stripped = line.strip()
-            if stripped and not stripped.startswith("Internet-Draft") and \
-               not stripped.startswith("Intended status") and \
-               not stripped.startswith("Expires:") and stripped != "":
-                title = stripped
-                break
-        results.append({
-            "filename": f.name,
-            "stem": f.stem,
-            "title": title,
-            "size": f.stat().st_size,
-            "path": str(f),
-        })
-    return results
-
-
-def read_generated_draft(filename: str) -> str | None:
-    """Read a generated draft file by filename. Returns text or None."""
-    drafts_dir = _project_root / "data" / "reports" / "generated-drafts"
-    path = drafts_dir / filename
-    if not path.exists() or not path.is_file():
-        return None
-    # Safety: ensure we're not reading outside the directory
-    if not str(path.resolve()).startswith(str(drafts_dir.resolve())):
-        return None
-    return path.read_text(errors="replace")
-
-
-def get_top_authors(db: Database, limit: int = 30) -> list[AuthorInfo]:
-    """Return top authors by draft count."""
-    rows = db.top_authors(limit=limit)
-    return [
-        {"name": name, "affiliation": aff, "draft_count": cnt, "drafts": drafts}
-        for name, aff, cnt, drafts in rows
-    ]
-
-
-def get_org_data(db: Database, limit: int = 20) -> list[dict]:
-    """Return organization contribution data."""
-    rows = db.top_orgs(limit=limit)
-    return [
-        {"org": org, "author_count": authors, "draft_count": drafts}
-        for org, authors, drafts in rows
-    ]
-
-
-def get_category_radar_data(db: Database) -> dict:
-    """Return average rating profiles per category for radar chart."""
-    pairs = db.drafts_with_ratings(limit=1000)
-    cat_ratings: dict[str, list] = defaultdict(list)
-    for _, r in pairs:
-        for c in r.categories:
-            cat_ratings[c].append(r)
-
-    top_cats = sorted(cat_ratings.keys(), key=lambda c: len(cat_ratings[c]), reverse=True)[:8]
-    result = {}
-    for cat in top_cats:
-        ratings = cat_ratings[cat]
-        n = len(ratings)
-        result[cat] = {
-            "count": n,
-            "novelty": round(sum(r.novelty for r in ratings) / n, 2),
-            "maturity": round(sum(r.maturity for r in ratings) / n, 2),
-            "relevance": round(sum(r.relevance for r in ratings) / n, 2),
-            "momentum": round(sum(r.momentum for r in ratings) / n, 2),
-            "low_overlap": round(sum(6 - r.overlap for r in ratings) / n, 2),
-        }
-    return result
-
-
-def get_score_histogram(db: Database) -> list[float]:
-    """Return list of composite scores for histogram."""
-    pairs = db.drafts_with_ratings(limit=1000)
-    return [round(r.composite_score, 2) for _, r in pairs]
-
-
-def get_coauthor_network(db: Database, min_shared: int = 1) -> dict:
-    """Return co-authorship network data for force-directed graph.
-
-    Returns {nodes: [{id, name, org, draft_count}], edges: [{source, target, weight}]}
-    """
-    pairs = db.coauthor_pairs()
-    top = db.top_authors(limit=100)
-
-    # Build node set from authors who have co-authorships
-    author_info = {name: {"org": aff, "draft_count": cnt} for name, aff, cnt, _ in top}
-    node_set = set()
-    edges = []
-    for a, b, shared in pairs:
-        if shared >= min_shared:
-            node_set.add(a)
-            node_set.add(b)
-            edges.append({"source": a, "target": b, "weight": shared})
-
-    nodes = []
-    for name in node_set:
-        info = author_info.get(name, {"org": "", "draft_count": 1})
-        nodes.append({
-            "id": name,
-            "name": name,
-            "org": info["org"],
-            "draft_count": info["draft_count"],
-        })
-
-    return {"nodes": nodes, "edges": edges}
-
-
-def get_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
-    """Return draft similarity network (cached)."""
-    return _cached(f"similarity_{threshold}", lambda: _compute_similarity_graph(db, threshold))
-
-
-def _compute_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
-    """Return draft similarity network for force-directed graph.
-
-    Returns {nodes: [{name, title, category, score}],
-             edges: [{source, target, similarity}],
-             stats: {node_count, edge_count, avg_similarity}}
-    """
-
-
-    embeddings = db.all_embeddings()
-    if len(embeddings) < 2:
-        return {"nodes": [], "edges": [], "stats": {"node_count": 0, "edge_count": 0, "avg_similarity": 0}}
-
-    pairs = db.drafts_with_ratings(limit=1000)
-    rating_map = {d.name: r for d, r in pairs}
-    draft_map = {d.name: d for d, _ in pairs}
-
-    # Filter to drafts with both embeddings and ratings
-    names = [n for n in embeddings if n in rating_map]
-    if len(names) < 2:
-        return {"nodes": [], "edges": [], "stats": {"node_count": 0, "edge_count": 0, "avg_similarity": 0}}
-
-    matrix = np.array([embeddings[n] for n in names])
-
-    # L2-normalize and compute cosine similarity
-    norms = np.linalg.norm(matrix, axis=1, keepdims=True)
-    norms[norms == 0] = 1.0
-    normalized = matrix / norms
-    sim_matrix = normalized @ normalized.T
-
-    # Find pairs above threshold (upper triangle only)
-    edges = []
-    node_set = set()
-    for i in range(len(names)):
-        for j in range(i + 1, len(names)):
-            sim = float(sim_matrix[i, j])
-            if sim >= threshold:
-                edges.append({"source": names[i], "target": names[j], "similarity": round(sim, 4)})
-                node_set.add(names[i])
-                node_set.add(names[j])
-
-    # Build nodes from connected drafts only
-    nodes = []
-    for name in names:
-        if name not in node_set:
-            continue
-        r = rating_map[name]
-        d = draft_map.get(name)
-        nodes.append({
-            "name": name,
-            "title": d.title if d else name,
-            "category": r.categories[0] if r.categories else "Other",
-            "score": round(r.composite_score, 2),
-        })
-
-    avg_sim = round(sum(e["similarity"] for e in edges) / max(len(edges), 1), 4)
-
-    return {
-        "nodes": nodes,
-        "edges": edges,
-        "stats": {"node_count": len(nodes), "edge_count": len(edges), "avg_similarity": avg_sim},
-    }
-
-
-def get_cross_org_data(db: Database, limit: int = 20) -> list[dict]:
-    """Return cross-org collaboration pairs."""
-    rows = db.cross_org_collaborations(limit=limit)
-    return [
-        {"org_a": a, "org_b": b, "shared_drafts": cnt}
-        for a, b, cnt in rows
-    ]
-
-
-def get_author_network_full(db: Database) -> AuthorNetwork:
-    """Return author network (cached for 5 min)."""
-    return _cached("author_network", lambda: _compute_author_network_full(db))
-
-
-def _compute_author_network_full(db: Database) -> AuthorNetwork:
-    """Return enriched co-authorship network with avg scores and cluster info.
-
-    Returns {
-        nodes: [{id, name, org, draft_count, avg_score, drafts: [name,...]}],
-        edges: [{source, target, weight}],
-        clusters: [{id, members: [name,...], org_mix: {org: count}, size}],
-    }
-    """
-    pairs = db.coauthor_pairs()
-    top = db.top_authors(limit=500)
-
-    # Build rating lookup for avg scores
-    rated = db.drafts_with_ratings(limit=2000)
-    draft_score = {d.name: r.composite_score for d, r in rated}
-
-    # Author info map
-    author_info = {}
-    for name, aff, cnt, drafts in top:
-        scores = [draft_score[dn] for dn in drafts if dn in draft_score]
-        avg = round(sum(scores) / len(scores), 2) if scores else 0
-        author_info[name] = {
-            "org": aff, "draft_count": cnt, "drafts": drafts, "avg_score": avg
-        }
-
-    # Build node set: authors with meaningful collaboration (2+ shared drafts)
-    node_set = set()
-    edges = []
-    for a, b, shared in pairs:
-        if shared >= 2:
-            node_set.add(a)
-            node_set.add(b)
-            edges.append({"source": a, "target": b, "weight": shared})
-
-    # Also include authors with 3+ drafts even if no co-authorships
-    for name, info in author_info.items():
-        if info["draft_count"] >= 3:
-            node_set.add(name)
-
-    nodes = []
-    for name in node_set:
-        info = author_info.get(name, {"org": "", "draft_count": 1, "drafts": [], "avg_score": 0})
-        nodes.append({
-            "id": name,
-            "name": name,
-            "org": info["org"],
-            "draft_count": info["draft_count"],
-            "avg_score": info["avg_score"],
-            "drafts": info["drafts"][:8],  # cap for JSON size
-        })
-
-    # Cluster detection via connected components (BFS)
-    adjacency: dict[str, set[str]] = defaultdict(set)
-    for e in edges:
-        adjacency[e["source"]].add(e["target"])
-        adjacency[e["target"]].add(e["source"])
-
-    visited: set[str] = set()
-    clusters = []
-
-    # Batch-load all drafts referenced by authors (avoid N+1 in cluster loop)
-    _all_dn = set()
-    for _ai in author_info.values():
-        _all_dn.update(_ai.get("drafts", []))
-    _all_drafts_map = db.get_drafts_by_names(list(_all_dn))
-
-    for node in sorted(node_set):
-        if node in visited:
-            continue
-        component: list[str] = []
-        queue = [node]
-        while queue:
-            current = queue.pop(0)
-            if current in visited:
-                continue
-            visited.add(current)
-            component.append(current)
-            for neighbor in adjacency.get(current, []):
-                if neighbor not in visited:
-                    queue.append(neighbor)
-
-        if len(component) >= 2:
-            org_mix: dict[str, int] = Counter()
-            member_orgs: dict[str, str] = {}
-            cluster_drafts: dict[str, str] = {}  # name -> title
-            for m in component:
-                org = author_info.get(m, {}).get("org", "")
-                if org:
-                    org_mix[org] += 1
-                    member_orgs[m] = org
-                for dn in author_info.get(m, {}).get("drafts", []):
-                    if dn not in cluster_drafts:
-                        d = _all_drafts_map.get(dn)
-                        cluster_drafts[dn] = d.title[:80] if d else dn
-            clusters.append({
-                "id": len(clusters),
-                "members": component,
-                "member_orgs": member_orgs,
-                "org_mix": dict(org_mix.most_common()),
-                "size": len(component),
-                "drafts": [{"name": n, "title": t} for n, t in list(cluster_drafts.items())],
-                "draft_count": len(cluster_drafts),
-            })
-
-    clusters.sort(key=lambda c: c["size"], reverse=True)
-
-    # Generate meaningful names for clusters
-    for cl in clusters:
-        cl["name"] = _author_cluster_name(cl)
-
-    return {"nodes": nodes, "edges": edges, "clusters": clusters}
-
-
-def _normalize_org(name: str) -> str:
-    """Shorten verbose org names for display."""
-    # Remove common suffixes
-    for suffix in (", Inc.", " Inc.", ", Ltd.", " Ltd.", " Co.", " Technologies",
-                   " Corporation", " Corp.", " Limited", " GmbH", " AG",
-                   " Europe Ltd", " Research", " Systems"):
-        name = name.replace(suffix, "")
-    return name.strip().rstrip(",").rstrip("&").rstrip()
-
-
-def _author_cluster_name(cluster: dict) -> str:
-    """Derive a meaningful name for an author cluster from orgs and draft titles."""
-    # Org part: top 1-2 orgs, normalized
-    raw_orgs = list(cluster.get("org_mix", {}).keys())
-    orgs = []
-    seen_short: set[str] = set()
-    for o in raw_orgs:
-        short = _normalize_org(o)
-        if short.lower() not in seen_short:
-            seen_short.add(short.lower())
-            orgs.append(short)
-    if len(orgs) >= 2:
-        org_label = f"{orgs[0]} + {orgs[1]}"
-    elif orgs:
-        org_label = orgs[0]
-    else:
-        # Fall back to first member's last name
-        members = cluster.get("members", [])
-        org_label = members[0].split()[-1] if members else "Unknown"
-
-    # Topic part: extract common keywords from draft titles
-    stopwords = {
-        "a", "an", "the", "of", "for", "in", "to", "and", "on", "with",
-        "using", "based", "draft", "internet", "ietf", "protocol", "framework",
-        "requirements", "architecture", "considerations", "use", "cases", "via",
-        "towards", "over", "from", "into", "between", "specification", "extension",
-        "extensions", "mechanisms", "mechanism", "version", "new", "general",
-    }
-    word_counts: Counter = Counter()
-    for d in cluster.get("drafts", []):
-        title = d.get("title", "")
-        words = re.findall(r"[A-Za-z]{3,}", title)
-        for w in words:
-            wl = w.lower()
-            if wl not in stopwords:
-                word_counts[wl] += 1
-
-    # Pick top keyword(s) that appear in multiple drafts
-    top_words = [w for w, c in word_counts.most_common(3) if c >= 2]
-    if not top_words:
-        top_words = [w for w, _ in word_counts.most_common(1)]
-
-    if top_words:
-        topic = " ".join(w.capitalize() for w in top_words[:2])
-        name = f"{org_label} — {topic}"
-    else:
-        name = org_label
-    # Truncate if too long for display
-    return name if len(name) <= 50 else name[:47] + "…"
-
-
-def get_idea_clusters(db: Database) -> dict:
-    """Cluster ideas (cached for 5 min)."""
-    return _cached("idea_clusters", lambda: _compute_idea_clusters(db))
-
-
-def _compute_idea_clusters(db: Database) -> dict:
-    """Cluster ideas by embedding similarity, return clusters + t-SNE scatter.
-
-    Uses Ward linkage on L2-normalized embeddings (approximates cosine) with
-    a target of ~30 clusters for readable groupings.  Enriches each cluster
-    with WG info and category breakdown.
-    """
-
-
-    embeddings = db.all_idea_embeddings()
-    if not embeddings:
-        return {"clusters": [], "scatter": [], "stats": {"total": 0, "clustered": 0, "num_clusters": 0}, "empty": True}
-
-    # Exclude ideas from false-positive drafts
-    fp_names = db.false_positive_names()
-
-    # Fetch ideas with IDs for metadata lookup
-    rows = db.conn.execute("SELECT id, title, description, idea_type, draft_name FROM ideas").fetchall()
-    idea_map = {r["id"]: {"title": r["title"], "description": r["description"],
-                           "type": r["idea_type"], "draft_name": r["draft_name"]}
-                for r in rows if r["draft_name"] not in fp_names}
-
-    # Remove FP ideas from embeddings too
-    embeddings = {k: v for k, v in embeddings.items() if k in idea_map}
-
-    # Draft -> WG and category lookup
-    draft_rows = db.conn.execute('SELECT name, "group", title FROM drafts').fetchall()
-    draft_wg = {r["name"]: r["group"] or "none" for r in draft_rows}
-    draft_title_map = {r["name"]: r["title"] for r in draft_rows}
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings WHERE COALESCE(false_positive, 0) = 0").fetchall()
-    draft_cats: dict[str, list[str]] = {}
-    for r in rating_rows:
-        try:
-            draft_cats[r["draft_name"]] = json.loads(r["categories"]) if r["categories"] else []
-        except (json.JSONDecodeError, TypeError):
-            draft_cats[r["draft_name"]] = []
-
-    # Build matrix from embeddings that have matching ideas
-    idea_ids = [iid for iid in embeddings if iid in idea_map]
-    if len(idea_ids) < 5:
-        return {"clusters": [], "scatter": [], "stats": {"total": len(idea_ids), "clustered": 0, "num_clusters": 0}, "empty": True}
-
-    matrix = np.array([embeddings[iid] for iid in idea_ids])
-    matrix_norm = sk_normalize(matrix)
-
-    # Ward clustering on normalized vectors — target ~30 clusters scaled by dataset size
-    n_target = max(10, min(40, len(idea_ids) // 12))
-    try:
-        clustering = AgglomerativeClustering(n_clusters=n_target, linkage='ward')
-        labels = clustering.fit_predict(matrix_norm)
-    except Exception:
-        return {"clusters": [], "scatter": [], "stats": {"total": len(idea_ids), "clustered": 0, "num_clusters": 0}, "empty": True}
-
-    # Build cluster data
-    cluster_ideas_map: dict[int, list] = defaultdict(list)
-    for idx, iid in enumerate(idea_ids):
-        cluster_ideas_map[labels[idx]].append(iid)
-
-    stop = {"a", "an", "the", "of", "for", "in", "to", "and", "or", "with",
-            "on", "by", "is", "as", "at", "from", "that", "this", "it",
-            "based", "using", "protocol", "mechanism", "framework", "system",
-            "network", "agent", "agents"}
-    clusters = []
-    for cid in sorted(cluster_ideas_map.keys()):
-        members = cluster_ideas_map[cid]
-        ideas_in_cluster = [idea_map[iid] for iid in members if iid in idea_map]
-        if len(ideas_in_cluster) < 2:
-            continue
-
-        # Theme: most common significant words in titles
-        words = Counter()
-        for idea in ideas_in_cluster:
-            for w in idea["title"].lower().split():
-                w_clean = w.strip("()[].,;:-\"'")
-                if len(w_clean) > 2 and w_clean not in stop:
-                    words[w_clean] += 1
-        top_words = [w for w, _ in words.most_common(4)]
-        theme = " ".join(top_words).title() if top_words else f"Cluster {cid}"
-
-        drafts = list({idea["draft_name"] for idea in ideas_in_cluster})
-
-        # Enrich: WG breakdown
-        wg_counts: dict[str, int] = Counter()
-        cat_counts: dict[str, int] = Counter()
-        for dname in drafts:
-            wg = draft_wg.get(dname, "none")
-            wg_counts[wg] += 1
-            for cat in draft_cats.get(dname, []):
-                cat_counts[cat] += 1
-
-        wg_list = [{"wg": wg, "count": cnt} for wg, cnt in wg_counts.most_common(5)]
-        cat_list = [{"cat": cat, "count": cnt} for cat, cnt in cat_counts.most_common(3)]
-        cross_wg = len([w for w in wg_counts if w != "none"]) >= 2
-
-        clusters.append({
-            "id": len(clusters),
-            "theme": theme,
-            "size": len(ideas_in_cluster),
-            "ideas": ideas_in_cluster[:20],
-            "drafts": drafts,
-            "wgs": wg_list,
-            "categories": cat_list,
-            "cross_wg": cross_wg,
-            "wg_count": len(wg_counts),
-        })
-
-    clusters.sort(key=lambda c: c["size"], reverse=True)
-
-    # Build mapping: original cluster label -> sorted index
-    # Each cluster remembers which original label it came from via its member ids
-    old_label_to_new: dict[int, int] = {}
-    for new_idx, c in enumerate(clusters):
-        c["id"] = new_idx
-        # Find original label for any member of this cluster
-        for old_cid, members in cluster_ideas_map.items():
-            if members and members[0] in [iid for iid in members if iid in idea_map]:
-                member_titles = {idea_map[m]["title"] for m in members if m in idea_map}
-                c_titles = {idea["title"] for idea in c["ideas"]}
-                if member_titles == c_titles or (member_titles & c_titles and len(members) == c["size"]):
-                    old_label_to_new[old_cid] = new_idx
-                    break
-
-    # Fallback: build from idea_id -> label mapping
-    iid_to_new: dict[int, int] = {}
-    for old_cid, members in cluster_ideas_map.items():
-        new_idx = old_label_to_new.get(old_cid, old_cid)
-        for iid in members:
-            iid_to_new[iid] = new_idx
-
-    # t-SNE for scatter
-    scatter = []
-    try:
-        perp = min(30, len(idea_ids) - 1)
-        tsne = TSNE(n_components=2, perplexity=perp, random_state=42, max_iter=500)
-        coords = tsne.fit_transform(matrix_norm)
-
-        for idx, iid in enumerate(idea_ids):
-            info = idea_map.get(iid, {})
-            scatter.append({
-                "x": round(float(coords[idx, 0]), 3),
-                "y": round(float(coords[idx, 1]), 3),
-                "cluster_id": iid_to_new.get(iid, int(labels[idx])),
-                "title": info.get("title", ""),
-                "draft_name": info.get("draft_name", ""),
-                "wg": draft_wg.get(info.get("draft_name", ""), ""),
-            })
-    except Exception:
-        pass
-
-    # --- Cross-cluster links ---
-    # Find pairs of clusters whose ideas are semantically related
-    # Use centroid similarity + best idea-pair links
-    links = []
-    if len(clusters) >= 2:
-        # Build cluster centroids from normalized embeddings
-        cluster_centroids = {}
-        cluster_member_indices: dict[int, list[int]] = defaultdict(list)
-        for idx, iid in enumerate(idea_ids):
-            cid = iid_to_new.get(iid, int(labels[idx]))
-            cluster_member_indices[cid].append(idx)
-
-        for cid, indices in cluster_member_indices.items():
-            if indices:
-                centroid = matrix_norm[indices].mean(axis=0)
-                norm = np.linalg.norm(centroid)
-                if norm > 0:
-                    cluster_centroids[cid] = centroid / norm
-
-        # Compute pairwise centroid similarity for all cluster pairs
-        cids_sorted = sorted(cluster_centroids.keys())
-        for ci_idx, ci in enumerate(cids_sorted):
-            for cj in cids_sorted[ci_idx + 1:]:
-                sim = float(np.dot(cluster_centroids[ci], cluster_centroids[cj]))
-                if sim < 0.45:
-                    continue
-
-                # Find the best idea pair across these two clusters
-                best_sim = 0.0
-                best_pair = (None, None)
-                # Sample up to 20 ideas per cluster to keep it fast
-                ci_members = cluster_member_indices[ci][:20]
-                cj_members = cluster_member_indices[cj][:20]
-                for mi in ci_members:
-                    for mj in cj_members:
-                        pair_sim = float(np.dot(matrix_norm[mi], matrix_norm[mj]))
-                        if pair_sim > best_sim:
-                            best_sim = pair_sim
-                            best_pair = (idea_ids[mi], idea_ids[mj])
-
-                if best_sim < 0.5:
-                    continue
-
-                # Get theme names
-                ci_theme = next((c["theme"] for c in clusters if c["id"] == ci), f"Cluster {ci}")
-                cj_theme = next((c["theme"] for c in clusters if c["id"] == cj), f"Cluster {cj}")
-
-                idea_a = idea_map.get(best_pair[0], {})
-                idea_b = idea_map.get(best_pair[1], {})
-
-                links.append({
-                    "source": ci,
-                    "target": cj,
-                    "source_theme": ci_theme,
-                    "target_theme": cj_theme,
-                    "similarity": round(sim, 3),
-                    "best_pair_sim": round(best_sim, 3),
-                    "idea_a": idea_a.get("title", ""),
-                    "idea_a_draft": idea_a.get("draft_name", ""),
-                    "idea_b": idea_b.get("title", ""),
-                    "idea_b_draft": idea_b.get("draft_name", ""),
-                })
-
-        links.sort(key=lambda l: l["best_pair_sim"], reverse=True)
-        links = links[:50]  # cap at top 50 links
-
-    total = len(idea_ids)
-    clustered = sum(c["size"] for c in clusters)
-    return {
-        "clusters": clusters,
-        "scatter": scatter,
-        "links": links,
-        "stats": {"total": total, "clustered": clustered, "num_clusters": len(clusters)},
-        "empty": False,
-    }
-
-
-def get_timeline_animation_data(db: Database) -> dict:
-    """Timeline animation (cached for 5 min)."""
-    return _cached("timeline_animation", lambda: _compute_timeline_animation_data(db))
-
-
-def _compute_timeline_animation_data(db: Database) -> dict:
-    """Compute t-SNE on all drafts, return points with month info + category_monthly.
-
-    t-SNE is computed once on ALL drafts so coordinates are stable across
-    animation frames.  Each point carries a ``month`` field (YYYY-MM) so the
-    front-end can build cumulative animation frames.
-    """
-
-
-    embeddings = db.all_embeddings()
-    if len(embeddings) < 5:
-        return {"points": [], "months": [], "category_monthly": {}}
-
-    pairs = db.drafts_with_ratings(limit=1000)
-    rating_map = {d.name: r for d, r in pairs}
-    draft_map = {d.name: d for d, _ in pairs}
-
-    # Filter to drafts that have both embeddings and ratings
-    names = [n for n in embeddings if n in rating_map]
-    if len(names) < 5:
-        return {"points": [], "months": [], "category_monthly": {}}
-
-    matrix = np.array([embeddings[n] for n in names])
-
-    try:
-        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
-                     random_state=42, max_iter=500)
-        coords = tsne.fit_transform(matrix)
-    except Exception:
-        return {"points": [], "months": [], "category_monthly": {}}
-
-    # Build points with month
-    points = []
-    month_set: set[str] = set()
-    category_monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
-
-    for i, name in enumerate(names):
-        r = rating_map[name]
-        d = draft_map.get(name)
-        month = _extract_month(d.time if d else None)
-        cat = r.categories[0] if r.categories else "Other"
-        month_set.add(month)
-        category_monthly[month][cat] += 1
-        points.append({
-            "name": name,
-            "title": d.title if d else name,
-            "x": round(float(coords[i, 0]), 3),
-            "y": round(float(coords[i, 1]), 3),
-            "category": cat,
-            "score": round(r.composite_score, 2),
-            "month": month,
-        })
-
-    months = sorted(month_set)
-    # Convert defaultdict to plain dict for JSON
-    cat_monthly_plain = {m: dict(cats) for m, cats in category_monthly.items()}
-
-    return {
-        "points": points,
-        "months": months,
-        "category_monthly": cat_monthly_plain,
-    }
-
-
-def get_monitor_status(db: Database) -> MonitorStatus:
-    """Return monitoring status data for dashboard."""
-    runs = db.get_monitor_runs(limit=20)
-    last = runs[0] if runs else None
-    total_drafts = db.count_drafts()
-    rated_count = len(db.drafts_with_ratings(limit=10000))
-    unrated = len(db.unrated_drafts(limit=9999))
-    unembedded = len(db.drafts_without_embeddings(limit=9999))
-    embedded_count = total_drafts - unembedded
-    no_ideas = len(db.drafts_without_ideas(limit=9999))
-    ideas_count = total_drafts - no_ideas
-    idea_total = db.idea_count()
-    gap_count = len(db.all_gaps())
-    input_tok, output_tok = db.total_tokens_used()
-
-    # Estimate cost (Sonnet pricing: $3/M input, $15/M output)
-    est_cost = (input_tok * 3.0 / 1_000_000) + (output_tok * 15.0 / 1_000_000)
-
-    return {
-        "last_run": last,
-        "runs": runs,
-        "unprocessed": {"unrated": unrated, "unembedded": unembedded, "no_ideas": no_ideas},
-        "total_runs": len(runs),
-        "pipeline": {
-            "total_drafts": total_drafts,
-            "rated": rated_count,
-            "embedded": embedded_count,
-            "with_ideas": ideas_count,
-            "idea_total": idea_total,
-            "gap_count": gap_count,
-        },
-        "cost": {
-            "input_tokens": input_tok,
-            "output_tokens": output_tok,
-            "estimated_usd": round(est_cost, 2),
-        },
-    }
-
-
-def get_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
-    """Return citation graph (cached for 5 min)."""
-    return _cached(f"citation_graph_{min_refs}", lambda: _compute_citation_graph(db, min_refs))
-
-
-def _compute_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
-    """Return citation network data for force-directed graph.
-
-    Returns {nodes: [{id, type, title, influence, ...}],
-             edges: [{source, target}],
-             stats: {node_count, edge_count, ...}}
-    """
-    # Get all references
-    rows = db.conn.execute(
-        "SELECT draft_name, ref_type, ref_id FROM draft_refs"
-    ).fetchall()
-
-    # Count in-degree for each referenced item
-    in_degree: dict[str, int] = Counter()
-    edges_raw = []
-    for r in rows:
-        ref_key = f"{r['ref_type']}:{r['ref_id']}"
-        in_degree[ref_key] += 1
-        edges_raw.append((r["draft_name"], ref_key))
-
-    # Also count drafts as source nodes
-    draft_out: dict[str, int] = Counter()
-    for draft_name, _ in edges_raw:
-        draft_out[draft_name] += 1
-
-    # Get draft titles for labeling
-    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
-    draft_titles = {r["name"]: r["title"] for r in draft_rows}
-
-    # Get rating categories for draft coloring
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
-    draft_cats = {}
-    for r in rating_rows:
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
-        except Exception:
-            draft_cats[r["draft_name"]] = "Other"
-
-    # Filter: keep RFCs with min_refs+ references and all drafts that reference them
-    top_refs = {k: v for k, v in in_degree.items() if v >= min_refs}
-
-    # Build node set
-    node_set = set()
-    filtered_edges = []
-    for draft_name, ref_key in edges_raw:
-        if ref_key in top_refs:
-            node_set.add(draft_name)
-            node_set.add(ref_key)
-            filtered_edges.append({"source": draft_name, "target": ref_key})
-
-    # Limit to ~200 nodes max for readability
-    if len(node_set) > 250:
-        # Keep only refs with higher in-degree
-        sorted_refs = sorted(top_refs.items(), key=lambda x: x[1], reverse=True)
-        keep_refs = set(k for k, _ in sorted_refs[:80])
-        node_set = set()
-        filtered_edges = []
-        for draft_name, ref_key in edges_raw:
-            if ref_key in keep_refs:
-                node_set.add(draft_name)
-                node_set.add(ref_key)
-                filtered_edges.append({"source": draft_name, "target": ref_key})
-
-    # Build nodes
-    nodes = []
-    for nid in node_set:
-        if ":" in nid and not nid.startswith("draft-"):
-            # It's a reference node (rfc:1234, bcp:14, etc.)
-            ref_type, ref_id = nid.split(":", 1)
-            influence = in_degree.get(nid, 0)
-            if ref_type == "rfc":
-                try:
-                    title = f"RFC {int(ref_id)}"
-                except ValueError:
-                    title = f"RFC {ref_id}"
-            else:
-                title = f"{ref_type.upper()} {ref_id}"
-            nodes.append({
-                "id": nid,
-                "type": ref_type,
-                "title": title,
-                "influence": influence,
-                "ref_id": ref_id,
-            })
-        else:
-            # It's a draft node
-            influence = in_degree.get(nid, 0) + draft_out.get(nid, 0)
-            nodes.append({
-                "id": nid,
-                "type": "draft",
-                "title": draft_titles.get(nid, nid),
-                "influence": draft_out.get(nid, 0),
-                "category": draft_cats.get(nid, "Other"),
-            })
-
-    # Stats
-    rfc_count = sum(1 for n in nodes if n["type"] == "rfc")
-    draft_count = sum(1 for n in nodes if n["type"] == "draft")
-
-    return {
-        "nodes": nodes,
-        "edges": filtered_edges,
-        "stats": {
-            "node_count": len(nodes),
-            "edge_count": len(filtered_edges),
-            "rfc_count": rfc_count,
-            "draft_count": draft_count,
-        },
-    }
-
-
-def global_search(db: Database, query: str) -> SearchResults:
-    """Search across drafts (FTS5), ideas, authors, and gaps.
-
-    Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}.
-    """
-    results: dict = {"drafts": [], "ideas": [], "authors": [], "gaps": []}
-    if not query or not query.strip():
-        return results
-
-    q = query.strip()
-
-    # 1. Drafts via FTS5
-    try:
-        fts_query = re.sub(r'[^\w\s]', '', q)
-        fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE)
-        fts_query = re.sub(r'\s+', ' ', fts_query).strip()
-        if not fts_query:
-            raise ValueError("empty query after sanitization")
-        rows = db.conn.execute(
-            """SELECT d.name, d.title, d.abstract, d.time, d."group"
-            FROM drafts d
-            JOIN drafts_fts f ON d.rowid = f.rowid
-            WHERE drafts_fts MATCH ?
-            ORDER BY rank
-            LIMIT 50""",
-            (fts_query,),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-    except Exception:
-        # FTS5 match can fail on certain query syntax; fall back to LIKE
-        like = f"%{q}%"
-        rows = db.conn.execute(
-            """SELECT name, title, abstract, time, "group" FROM drafts
-            WHERE title LIKE ? OR name LIKE ? OR abstract LIKE ?
-            LIMIT 50""",
-            (like, like, like),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-
-    # 2. Ideas via LIKE
-    like = f"%{q}%"
-    rows = db.conn.execute(
-        """SELECT id, title, description, idea_type, draft_name FROM ideas
-        WHERE (title LIKE ? OR description LIKE ?)
-        AND draft_name NOT IN (SELECT draft_name FROM ratings WHERE false_positive = 1)
-        ORDER BY id LIMIT 50""",
-        (like, like),
-    ).fetchall()
-    for r in rows:
-        results["ideas"].append({
-            "id": r["id"],
-            "title": r["title"],
-            "description": (r["description"] or "")[:200],
-            "type": r["idea_type"],
-            "draft_name": r["draft_name"],
-        })
-
-    # 3. Authors via LIKE
-    results["authors"] = db.search_authors(q, limit=50)
-
-    # 4. Gaps via LIKE
-    results["gaps"] = db.search_gaps(q, limit=50)
-
-    return results
-
-
-def get_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE (cached for 5 min)."""
-    return _cached("landscape_tsne", lambda: _compute_landscape_tsne(db))
-
-
-def _compute_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE from embeddings, return [{name, title, x, y, category, score}]."""
-
-
-    embeddings = db.all_embeddings()
-    if len(embeddings) < 5:
-        return []
-
-    pairs = db.drafts_with_ratings(limit=1000)
-    rating_map = {d.name: r for d, r in pairs}
-    draft_map = {d.name: d for d, _ in pairs}
-
-    # Filter to drafts that have both embeddings and ratings
-    names = [n for n in embeddings if n in rating_map]
-    if len(names) < 5:
-        return []
-
-    matrix = np.array([embeddings[n] for n in names])
-
-    try:
-        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
-                     random_state=42, max_iter=500)
-        coords = tsne.fit_transform(matrix)
-    except Exception:
-        return []
-
-    result = []
-    for i, name in enumerate(names):
-        r = rating_map[name]
-        d = draft_map.get(name)
-        result.append({
-            "name": name,
-            "title": d.title if d else name,
-            "x": round(float(coords[i, 0]), 3),
-            "y": round(float(coords[i, 1]), 3),
-            "category": r.categories[0] if r.categories else "Other",
-            "score": round(r.composite_score, 2),
-        })
-    return result
-
-
-def get_comparison_data(db: Database, names: list[str]) -> dict | None:
-    """Get comparison data for a list of drafts.
-
-    Returns {
-        drafts: [{name, title, abstract, rating, ideas, refs, ...}],
-        shared_ideas: [{title, drafts: [name,...]}],
-        unique_ideas: {name: [{title, description}]},
-        shared_refs: [{type, id, drafts: [name,...]}],
-        unique_refs: {name: [{type, id}]},
-        similarities: [{a, b, similarity}],
-        comparison_text: str | None,
-    }
-    """
-
-
-    drafts_data = []
-    all_ideas: dict[str, list[dict]] = {}
-    all_refs: dict[str, list[tuple[str, str]]] = {}
-
-    for name in names:
-        detail = get_draft_detail(db, name)
-        if not detail:
-            continue
-        drafts_data.append(detail)
-        all_ideas[name] = detail.get("ideas", [])
-        all_refs[name] = [(r["type"], r["id"]) for r in detail.get("refs", [])]
-
-    if len(drafts_data) < 2:
-        return None
-
-    # Find shared vs unique ideas (by title similarity)
-    idea_title_drafts: dict[str, list[str]] = {}
-    for name, ideas in all_ideas.items():
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if title_lower not in idea_title_drafts:
-                idea_title_drafts[title_lower] = []
-            idea_title_drafts[title_lower].append(name)
-
-    shared_ideas = [
-        {"title": title, "drafts": draft_list}
-        for title, draft_list in idea_title_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_ideas: dict[str, list[dict]] = {}
-    for name, ideas in all_ideas.items():
-        unique = []
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if len(set(idea_title_drafts.get(title_lower, []))) <= 1:
-                unique.append({"title": idea["title"], "description": idea.get("description", "")})
-        unique_ideas[name] = unique
-
-    # Find shared vs unique references
-    ref_drafts: dict[tuple[str, str], list[str]] = {}
-    for name, refs in all_refs.items():
-        for ref in refs:
-            if ref not in ref_drafts:
-                ref_drafts[ref] = []
-            ref_drafts[ref].append(name)
-
-    shared_refs = [
-        {"type": ref[0], "id": ref[1], "drafts": draft_list}
-        for ref, draft_list in ref_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_refs: dict[str, list[dict]] = {}
-    for name, refs in all_refs.items():
-        unique = []
-        for ref in refs:
-            if len(set(ref_drafts.get(ref, []))) <= 1:
-                unique.append({"type": ref[0], "id": ref[1]})
-        unique_refs[name] = unique
-
-    # Pairwise embedding similarities
-    embeddings = db.all_embeddings()
-    similarities = []
-    valid_names = [d["name"] for d in drafts_data]
-    for i in range(len(valid_names)):
-        for j in range(i + 1, len(valid_names)):
-            a, b = valid_names[i], valid_names[j]
-            if a in embeddings and b in embeddings:
-                vec_a = embeddings[a]
-                vec_b = embeddings[b]
-                dot = np.dot(vec_a, vec_b)
-                norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
-                sim = float(dot / norm) if norm > 0 else 0.0
-                similarities.append({"a": a, "b": b, "similarity": round(sim, 4)})
-
-    return {
-        "drafts": drafts_data,
-        "shared_ideas": shared_ideas,
-        "unique_ideas": unique_ideas,
-        "shared_refs": shared_refs,
-        "unique_refs": unique_refs,
-        "similarities": similarities,
-        "comparison_text": None,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Architecture Designer — System-of-Systems view
-# ---------------------------------------------------------------------------
-
-# Architectural layers (bottom-up stack)
-_ARCH_LAYERS = [
-    {"id": "transport", "label": "Transport & Networking", "order": 0,
-     "keywords": {"transport", "network", "routing", "tunnel", "packet", "flow", "traffic", "qos", "sdwan", "mpls", "bgp", "ospf", "segment", "srv6", "quic", "http", "grpc", "mqtt", "yang", "snmp", "netconf", "restconf"}},
-    {"id": "identity", "label": "Identity & Trust", "order": 1,
-     "keywords": {"identity", "auth", "authentication", "authorization", "credential", "certificate", "trust", "attestation", "oauth", "token", "signing", "verification", "verifiable", "did", "vc", "pki", "spiffe", "acl"}},
-    {"id": "discovery", "label": "Discovery & Registration", "order": 2,
-     "keywords": {"discovery", "registration", "registry", "catalog", "advertisement", "announce", "capability", "service", "lookup", "resolution", "dns", "directory"}},
-    {"id": "communication", "label": "Agent Communication", "order": 3,
-     "keywords": {"a2a", "agent", "communication", "message", "messaging", "protocol", "exchange", "negotiation", "handshake", "session", "dialogue", "interaction", "mcp", "interop"}},
-    {"id": "coordination", "label": "Task & Coordination", "order": 4,
-     "keywords": {"task", "delegation", "orchestration", "workflow", "planning", "coordination", "consensus", "collaboration", "multi-agent", "swarm", "composition", "scheduling"}},
-    {"id": "intelligence", "label": "AI & Inference", "order": 5,
-     "keywords": {"model", "inference", "learning", "training", "ml", "neural", "llm", "embedding", "reasoning", "decision", "prediction", "classification", "generative", "rag", "fine-tuning"}},
-    {"id": "safety", "label": "Safety & Governance", "order": 6,
-     "keywords": {"safety", "ethical", "governance", "policy", "audit", "explainability", "transparency", "accountability", "bias", "fairness", "compliance", "regulation", "risk", "shutdown", "alignment", "adversarial", "privacy", "consent"}},
-    {"id": "application", "label": "Application Domains", "order": 7,
-     "keywords": {"healthcare", "autonomous", "vehicle", "robotics", "iot", "digital twin", "supply chain", "finance", "manufacturing", "energy", "smart", "edge", "cloud", "sensing"}},
-]
-
-_LAYER_KEYWORDS = {l["id"]: l["keywords"] for l in _ARCH_LAYERS}
-
-
-def _classify_to_layer(text: str) -> str:
-    """Classify a piece of text to the best-matching architectural layer."""
-    text_lower = text.lower()
-    words = set(re.findall(r"[a-z][a-z0-9-]+", text_lower))
-    scores: dict[str, int] = {}
-    for layer_id, kws in _LAYER_KEYWORDS.items():
-        scores[layer_id] = len(words & kws)
-        # Also check for multi-word keywords as substrings
-        for kw in kws:
-            if len(kw) > 4 and kw in text_lower:
-                scores[layer_id] += 1
-    best = max(scores, key=lambda k: scores[k])
-    return best if scores[best] > 0 else "communication"  # default
-
-
-def get_architecture(db: Database) -> dict:
-    """Build system-of-systems architecture from idea clusters, gaps, and source coverage."""
-    return _cached("architecture", lambda: _compute_architecture(db), ttl=600)
-
-
-def _compute_architecture(db: Database) -> dict:
-    """Compute the architecture view.
-
-    Returns:
-        {
-            "components": [...],  # architectural building blocks
-            "dependencies": [...],  # edges between components
-            "gaps": [...],  # gaps mapped to layers
-            "layers": [...],  # layer definitions
-            "source_coverage": {...},  # per-layer source coverage
-            "stats": {...}
-        }
-    """
-    # --- Gather raw data ---
-    cluster_data = get_idea_clusters(db)
-    clusters = cluster_data.get("clusters", [])
-    links = cluster_data.get("links", [])
-    all_gaps = db.all_gaps()
-
-    # Source coverage: count drafts per source per layer
-    draft_rows = db.conn.execute(
-        "SELECT d.name, d.title, d.abstract, d.source, r.categories "
-        "FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name "
-        "WHERE COALESCE(r.false_positive, 0) = 0"
-    ).fetchall()
-
-    # Build components from idea clusters
-    components = []
-    cluster_to_component: dict[int, int] = {}  # cluster_id -> component index
-
-    for cl in clusters:
-        if cl["size"] < 3:
-            continue  # skip tiny clusters
-
-        # Determine layer from cluster theme + idea titles
-        text_blob = cl.get("theme", "")
-        for idea in cl.get("ideas", [])[:10]:
-            text_blob += " " + idea.get("title", "") + " " + idea.get("description", "")
-        layer = _classify_to_layer(text_blob)
-
-        # Source coverage for this component's drafts
-        draft_names = set(cl.get("drafts", []))
-        sources: Counter = Counter()
-        comp_drafts: list[dict] = []
-        for dr in draft_rows:
-            if dr["name"] in draft_names:
-                sources[dr["source"] or "ietf"] += 1
-                comp_drafts.append({"name": dr["name"], "title": (dr["title"] or dr["name"])[:80], "source": dr["source"] or "ietf"})
-
-        # Idea type breakdown
-        type_counts: Counter = Counter()
-        for idea in cl.get("ideas", []):
-            t = idea.get("type", "")
-            if t:
-                type_counts[t] += 1
-
-        # Maturity: rough proxy from idea count and source diversity
-        maturity = min(5, 1 + len(sources) + (1 if cl["size"] >= 10 else 0) + (1 if cl.get("cross_wg") else 0))
-
-        comp = {
-            "id": len(components),
-            "cluster_id": cl["id"],
-            "name": cl.get("theme", f"Component {cl['id']}"),
-            "layer": layer,
-            "size": cl["size"],
-            "draft_count": len(draft_names),
-            "drafts": comp_drafts[:20],
-            "sources": dict(sources.most_common()),
-            "type_breakdown": dict(type_counts.most_common(5)),
-            "maturity": maturity,
-            "wgs": cl.get("wgs", [])[:3],
-            "top_ideas": [{"title": i["title"], "type": i.get("type", ""), "draft_name": i.get("draft_name", "")}
-                          for i in cl.get("ideas", [])[:5]],
-            "categories": cl.get("categories", []),
-        }
-        cluster_to_component[cl["id"]] = comp["id"]
-        components.append(comp)
-
-    # Build dependencies from cross-cluster links
-    dependencies = []
-    for link in links:
-        src_comp = cluster_to_component.get(link["source"])
-        tgt_comp = cluster_to_component.get(link["target"])
-        if src_comp is not None and tgt_comp is not None and src_comp != tgt_comp:
-            dependencies.append({
-                "source": src_comp,
-                "target": tgt_comp,
-                "similarity": link.get("best_pair_sim", link.get("similarity", 0)),
-                "idea_a": link.get("idea_a", ""),
-                "idea_b": link.get("idea_b", ""),
-            })
-
-    # Map gaps to layers
-    gap_items = []
-    for gap in all_gaps:
-        text = gap["topic"] + " " + gap.get("description", "") + " " + gap.get("category", "")
-        layer = _classify_to_layer(text)
-        gap_items.append({
-            "id": gap["id"],
-            "topic": gap["topic"],
-            "description": gap["description"],
-            "evidence": gap.get("evidence", ""),
-            "severity": gap.get("severity", "medium"),
-            "category": gap.get("category", ""),
-            "layer": layer,
-        })
-
-    # Source coverage per layer
-    source_coverage: dict[str, dict[str, int]] = {l["id"]: Counter() for l in _ARCH_LAYERS}
-    for dr in draft_rows:
-        text = (dr["title"] or "") + " " + (dr["abstract"] or "")[:200]
-        layer = _classify_to_layer(text)
-        source_coverage[layer][dr["source"] or "ietf"] += 1
-    # Convert Counters to dicts
-    source_coverage = {k: dict(v) for k, v in source_coverage.items()}
-
-    # Layer summary stats
-    layer_info = []
-    for l in _ARCH_LAYERS:
-        lid = l["id"]
-        comp_count = sum(1 for c in components if c["layer"] == lid)
-        idea_count = sum(c["size"] for c in components if c["layer"] == lid)
-        gap_count = sum(1 for g in gap_items if g["layer"] == lid)
-        layer_info.append({
-            "id": l["id"],
-            "label": l["label"],
-            "order": l["order"],
-            "component_count": comp_count,
-            "idea_count": idea_count,
-            "gap_count": gap_count,
-            "coverage": source_coverage.get(lid, {}),
-            "total_drafts": sum(source_coverage.get(lid, {}).values()),
-        })
-
-    return {
-        "components": components,
-        "dependencies": dependencies,
-        "gaps": gap_items,
-        "layers": layer_info,
-        "stats": {
-            "total_components": len(components),
-            "total_dependencies": len(dependencies),
-            "total_gaps": len(gap_items),
-            "layers_with_gaps": len(set(g["layer"] for g in gap_items)),
-        },
-    }
-
-
-def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
-    """Search-only (free) — returns sources + cached answer if available."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.search_only(question, top_k=top_k)
-
-
-def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run Claude synthesis (costs tokens, result is cached permanently)."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.ask(question, top_k=top_k, cheap=cheap)
-
-
-# ── New Analysis Functions ──────────────────────────────────────────────
-
-def get_idea_analysis(db: Database) -> dict:
-    """Return comprehensive idea analysis data for the idea-analysis page.
-
-    Includes novelty distribution, type breakdown with avg novelty,
-    top novel ideas, ideas-per-draft distribution, cross-tab of type x source,
-    shared ideas across drafts, and idea novelty vs draft rating correlation.
-    """
-    from collections import Counter, defaultdict
-    from difflib import SequenceMatcher
-
-    # Fetch raw data
-    all_ideas = db.conn.execute(
-        """SELECT i.id, i.draft_name, i.title, i.description, i.idea_type,
-                  i.novelty_score
-           FROM ideas i ORDER BY i.novelty_score DESC NULLS LAST"""
-    ).fetchall()
-    all_ideas = [dict(r) for r in all_ideas]
-
-    # Draft ratings lookup
-    ratings_rows = db.conn.execute(
-        """SELECT d.name, d.title as draft_title, d.source,
-                  r.novelty AS r_novelty, r.maturity, r.overlap, r.momentum, r.relevance
-           FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name"""
-    ).fetchall()
-    draft_info = {}
-    for r in ratings_rows:
-        row = dict(r)
-        # Compute composite score (average of 5 dimensions)
-        dims = [row.get("r_novelty"), row.get("maturity"), row.get("overlap"),
-                row.get("momentum"), row.get("relevance")]
-        valid = [d for d in dims if d is not None]
-        row["composite_score"] = sum(valid) / len(valid) if valid else None
-        draft_info[row["name"]] = row
-
-    total = len(all_ideas)
-    scored = [i for i in all_ideas if i.get("novelty_score") is not None]
-    unscored = total - len(scored)
-    avg_novelty = sum(i["novelty_score"] for i in scored) / len(scored) if scored else 0
-
-    # Embedding coverage
-    embed_count = db.conn.execute("SELECT COUNT(*) FROM idea_embeddings").fetchone()[0]
-
-    # --- Novelty score distribution (histogram) ---
-    novelty_dist = Counter(i["novelty_score"] for i in scored)
-    novelty_histogram = {
-        "labels": [1, 2, 3, 4, 5],
-        "values": [novelty_dist.get(s, 0) for s in [1, 2, 3, 4, 5]],
-    }
-
-    # --- Ideas by type with counts and avg novelty ---
-    type_data = defaultdict(lambda: {"count": 0, "novelty_sum": 0, "novelty_n": 0})
-    for idea in all_ideas:
-        t = idea.get("idea_type") or "other"
-        type_data[t]["count"] += 1
-        if idea.get("novelty_score") is not None:
-            type_data[t]["novelty_sum"] += idea["novelty_score"]
-            type_data[t]["novelty_n"] += 1
-
-    by_type = []
-    for t, d in sorted(type_data.items(), key=lambda x: x[1]["count"], reverse=True):
-        avg = d["novelty_sum"] / d["novelty_n"] if d["novelty_n"] > 0 else 0
-        by_type.append({"type": t, "count": d["count"], "avg_novelty": round(avg, 2)})
-
-    type_names = [t["type"] for t in by_type]
-
-    # --- Top 20 most novel ideas (score 4-5) ---
-    top_novel = []
-    for idea in all_ideas:
-        if idea.get("novelty_score") and idea["novelty_score"] >= 4:
-            di = draft_info.get(idea["draft_name"], {})
-            top_novel.append({
-                "title": idea["title"],
-                "description": idea["description"],
-                "type": idea.get("idea_type", "other"),
-                "novelty_score": idea["novelty_score"],
-                "draft_name": idea["draft_name"],
-                "draft_title": di.get("draft_title", ""),
-                "draft_score": di.get("composite_score"),
-            })
-    top_novel.sort(key=lambda x: (x["novelty_score"], x.get("draft_score") or 0), reverse=True)
-    top_novel = top_novel[:20]
-
-    # --- Ideas per draft distribution ---
-    ideas_per_draft = Counter(i["draft_name"] for i in all_ideas)
-    ipd_dist = Counter(ideas_per_draft.values())
-    ideas_per_draft_hist = {
-        "labels": sorted(ipd_dist.keys()),
-        "values": [ipd_dist[k] for k in sorted(ipd_dist.keys())],
-    }
-    # Also top drafts by idea count
-    top_idea_drafts = []
-    for name, count in ideas_per_draft.most_common(10):
-        di = draft_info.get(name, {})
-        top_idea_drafts.append({
-            "name": name,
-            "draft_title": di.get("draft_title", ""),
-            "idea_count": count,
-            "score": di.get("composite_score"),
-        })
-
-    # --- Cross-tabulation: idea_type x source ---
-    type_source = defaultdict(lambda: defaultdict(int))
-    for idea in all_ideas:
-        t = idea.get("idea_type") or "other"
-        di = draft_info.get(idea["draft_name"], {})
-        source = di.get("source", "ietf") or "ietf"
-        type_source[t][source] += 1
-
-    sources = sorted(set(
-        di.get("source", "ietf") or "ietf" for di in draft_info.values()
-    ))
-    cross_tab = []
-    for t in type_names:
-        row = {"type": t}
-        for s in sources:
-            row[s] = type_source[t].get(s, 0)
-        cross_tab.append(row)
-
-    # --- Shared ideas across drafts ---
-    idea_groups: list[dict] = []
-    for idea in all_ideas:
-        title_lower = idea["title"].lower().strip()
-        matched = False
-        for group in idea_groups:
-            ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio()
-            if ratio >= 0.75:
-                group["ideas"].append(idea)
-                group["drafts"].add(idea["draft_name"])
-                matched = True
-                break
-        if not matched:
-            idea_groups.append({
-                "canonical": title_lower,
-                "title": idea["title"],
-                "ideas": [idea],
-                "drafts": {idea["draft_name"]},
-            })
-
-    shared_ideas = []
-    for g in sorted(idea_groups, key=lambda x: len(x["drafts"]), reverse=True):
-        if len(g["drafts"]) < 2:
-            break
-        shared_ideas.append({
-            "title": g["title"],
-            "appearances": len(g["drafts"]),
-            "drafts": sorted(g["drafts"])[:8],
-            "types": list(set(i.get("idea_type", "other") for i in g["ideas"])),
-        })
-
-    # --- Scatter: draft avg idea novelty vs draft relevance ---
-    draft_idea_novelty = defaultdict(list)
-    for idea in scored:
-        draft_idea_novelty[idea["draft_name"]].append(idea["novelty_score"])
-
-    scatter_data = []
-    for name, scores in draft_idea_novelty.items():
-        di = draft_info.get(name, {})
-        if di.get("relevance") is not None and di.get("composite_score") is not None:
-            scatter_data.append({
-                "name": name,
-                "avg_idea_novelty": round(sum(scores) / len(scores), 2),
-                "relevance": di["relevance"],
-                "score": di["composite_score"],
-                "idea_count": len(scores),
-                "source": di.get("source", "ietf") or "ietf",
-            })
-
-    # --- Sunburst data: type -> novelty band ---
-    sunburst_labels = []
-    sunburst_parents = []
-    sunburst_values = []
-    # Root
-    sunburst_labels.append("All Ideas")
-    sunburst_parents.append("")
-    sunburst_values.append(total)
-
-    novelty_bands = {"High (4-5)": lambda s: s is not None and s >= 4,
-                     "Medium (3)": lambda s: s is not None and s == 3,
-                     "Low (1-2)": lambda s: s is not None and s <= 2,
-                     "Unscored": lambda s: s is None}
-
-    for t_info in by_type:
-        t = t_info["type"]
-        sunburst_labels.append(t)
-        sunburst_parents.append("All Ideas")
-        sunburst_values.append(t_info["count"])
-        # Sub-bands
-        type_ideas = [i for i in all_ideas if (i.get("idea_type") or "other") == t]
-        for band, fn in novelty_bands.items():
-            cnt = sum(1 for i in type_ideas if fn(i.get("novelty_score")))
-            if cnt > 0:
-                sunburst_labels.append(f"{t} - {band}")
-                sunburst_parents.append(t)
-                sunburst_values.append(cnt)
-
-    return {
-        "total": total,
-        "scored": len(scored),
-        "unscored": unscored,
-        "avg_novelty": round(avg_novelty, 2),
-        "embed_count": embed_count,
-        "embed_pct": round(embed_count / total * 100, 1) if total > 0 else 0,
-        "type_count": len(by_type),
-        "novelty_histogram": novelty_histogram,
-        "by_type": by_type,
-        "top_novel": top_novel,
-        "ideas_per_draft_hist": ideas_per_draft_hist,
-        "top_idea_drafts": top_idea_drafts,
-        "cross_tab": cross_tab,
-        "sources": sources,
-        "shared_ideas": shared_ideas,
-        "scatter_data": scatter_data,
-        "sunburst": {
-            "labels": sunburst_labels,
-            "parents": sunburst_parents,
-            "values": sunburst_values,
-        },
-    }
-
-
-
-
-def get_source_comparison(db: Database) -> dict:
-    """Cross-source comparison: ratings, categories, counts by standards body."""
-    pairs_all = db.drafts_with_ratings(limit=2000)
-    # Also include false positives for completeness of source counts
-    pairs_fp = db.drafts_with_ratings(limit=2000, include_false_positives=True)
-
-    # Build per-source data
-    source_stats: dict[str, dict] = {}
-    source_categories: dict[str, Counter] = defaultdict(Counter)
-    source_ratings: dict[str, dict[str, list]] = defaultdict(lambda: {
-        "novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": [], "scores": [],
-    })
-    # Collect author counts per source
-    all_authors_by_source: dict[str, set] = defaultdict(set)
-
-    for draft, rating in pairs_all:
-        src = getattr(draft, "source", "ietf") or "ietf"
-        source_ratings[src]["novelty"].append(rating.novelty)
-        source_ratings[src]["maturity"].append(rating.maturity)
-        source_ratings[src]["overlap"].append(rating.overlap)
-        source_ratings[src]["momentum"].append(rating.momentum)
-        source_ratings[src]["relevance"].append(rating.relevance)
-        source_ratings[src]["scores"].append(round(rating.composite_score, 2))
-        for cat in rating.categories:
-            source_categories[src][cat] += 1
-
-    # Get all drafts (including unrated) for draft counts
-    all_drafts = db.list_drafts(limit=5000)
-    source_draft_counts: Counter = Counter()
-    for d in all_drafts:
-        src = getattr(d, "source", "ietf") or "ietf"
-        source_draft_counts[src] += 1
-
-    # Author counts by source
-    try:
-        rows = db.conn.execute(
-            """SELECT d.source, COUNT(DISTINCT da.person_id) as author_count
-            FROM drafts d
-            JOIN draft_authors da ON d.name = da.draft_name
-            GROUP BY d.source"""
-        ).fetchall()
-        for r in rows:
-            src = r["source"] or "ietf"
-            all_authors_by_source[src] = r["author_count"]
-    except Exception:
-        pass
-
-    # Idea counts by source
-    source_idea_counts: Counter = Counter()
-    try:
-        rows = db.conn.execute(
-            """SELECT d.source, COUNT(*) as idea_count
-            FROM ideas i
-            JOIN drafts d ON i.draft_name = d.name
-            GROUP BY d.source"""
-        ).fetchall()
-        for r in rows:
-            src = r["source"] or "ietf"
-            source_idea_counts[src] = r["idea_count"]
-    except Exception:
-        pass
-
-    # Build summary table
-    all_sources = sorted(set(source_draft_counts.keys()) | set(source_ratings.keys()))
-    summary = []
-    for src in all_sources:
-        rats = source_ratings.get(src, {"scores": []})
-        cats = source_categories.get(src, Counter())
-        top_cat = cats.most_common(1)[0][0] if cats else "N/A"
-        avg_score = round(sum(rats["scores"]) / len(rats["scores"]), 2) if rats["scores"] else 0.0
-        summary.append({
-            "source": src,
-            "drafts": source_draft_counts.get(src, 0),
-            "rated": len(rats["scores"]),
-            "authors": all_authors_by_source.get(src, 0),
-            "ideas": source_idea_counts.get(src, 0),
-            "avg_score": avg_score,
-            "top_category": top_cat,
-        })
-
-    # Radar data: average of each dimension per source
-    radar = {}
-    for src, rats in source_ratings.items():
-        if not rats["scores"]:
-            continue
-        n = len(rats["scores"])
-        radar[src] = {
-            "novelty": round(sum(rats["novelty"]) / n, 2),
-            "maturity": round(sum(rats["maturity"]) / n, 2),
-            "overlap": round(sum(rats["overlap"]) / n, 2),
-            "momentum": round(sum(rats["momentum"]) / n, 2),
-            "relevance": round(sum(rats["relevance"]) / n, 2),
-            "count": n,
-        }
-
-    # Category distribution by source (for stacked bar / heatmap)
-    all_cats = sorted({cat for cats in source_categories.values() for cat in cats})
-    heatmap = {
-        "sources": list(source_categories.keys()),
-        "categories": all_cats,
-        "values": [],
-    }
-    for src in heatmap["sources"]:
-        row = [source_categories[src].get(cat, 0) for cat in all_cats]
-        heatmap["values"].append(row)
-
-    # Unique/shared categories analysis
-    source_cat_sets = {src: set(cats.keys()) for src, cats in source_categories.items()}
-    unique_cats = {}
-    for src, cats in source_cat_sets.items():
-        others = set()
-        for s2, c2 in source_cat_sets.items():
-            if s2 != src:
-                others |= c2
-        unique_cats[src] = sorted(cats - others)
-
-    shared_cats = set()
-    for src, cats in source_cat_sets.items():
-        for s2, c2 in source_cat_sets.items():
-            if s2 != src:
-                shared_cats |= (cats & c2)
-    shared_cats = sorted(shared_cats)
-
-    return {
-        "summary": summary,
-        "radar": radar,
-        "heatmap": heatmap,
-        "unique_categories": unique_cats,
-        "shared_categories": shared_cats,
-    }
-
-
-def get_false_positive_profile(db: Database) -> dict:
-    """Profile drafts flagged as false positives."""
-    # Get false positives
-    fp_rows = db.false_positive_drafts_raw()
-
-    # Get non-FP rated drafts for comparison
-    nonfp_rows = db.non_false_positive_ratings_raw()
-
-    total_rated = db.rated_count()
-    total_drafts = db.count_drafts(include_false_positives=True)
-
-    # Build FP list
-    fp_list = []
-    fp_categories: Counter = Counter()
-    fp_sources: Counter = Counter()
-    fp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
-
-    for row in fp_rows:
-        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
-        src = row["source"] or "ietf"
-        fp_list.append({
-            "name": row["name"],
-            "title": row["title"],
-            "source": src,
-            "categories": cats,
-            "relevance": row["relevance"],
-            "novelty": row["novelty"],
-            "maturity": row["maturity"],
-            "overlap": row["overlap"],
-            "momentum": row["momentum"],
-            "summary": row["summary"] or "",
-        })
-        for cat in cats:
-            fp_categories[cat] += 1
-        fp_sources[src] += 1
-        fp_dims["novelty"].append(row["novelty"])
-        fp_dims["maturity"].append(row["maturity"])
-        fp_dims["overlap"].append(row["overlap"])
-        fp_dims["momentum"].append(row["momentum"])
-        fp_dims["relevance"].append(row["relevance"])
-
-    # Non-FP dimensions for comparison
-    nonfp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
-    nonfp_categories: Counter = Counter()
-    for row in nonfp_rows:
-        nonfp_dims["novelty"].append(row["novelty"])
-        nonfp_dims["maturity"].append(row["maturity"])
-        nonfp_dims["overlap"].append(row["overlap"])
-        nonfp_dims["momentum"].append(row["momentum"])
-        nonfp_dims["relevance"].append(row["relevance"])
-        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
-        for cat in cats:
-            nonfp_categories[cat] += 1
-
-    # Top terms from FP abstracts
-    from collections import Counter as _Counter
-    stop_words = {
-        "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
-        "of", "with", "by", "from", "is", "it", "that", "this", "are", "was",
-        "be", "as", "can", "may", "will", "not", "has", "have", "been", "which",
-        "their", "its", "also", "such", "these", "would", "should", "could",
-        "more", "other", "than", "into", "about", "between", "over", "after",
-        "all", "one", "two", "new", "they", "we", "our", "each", "some", "any",
-        "there", "what", "when", "how", "where", "who", "does", "do", "did",
-        "no", "if", "so", "up", "out", "only", "used", "using", "use", "based",
-        "through", "both", "well", "within", "must", "while", "had", "were",
-    }
-    word_counter: Counter = Counter()
-    for row in fp_rows:
-        abstract = (row["abstract"] or "").lower()
-        title = (row["title"] or "").lower()
-        text = abstract + " " + title
-        words = re.findall(r'[a-z]{3,}', text)
-        for w in words:
-            if w not in stop_words:
-                word_counter[w] += 1
-    top_terms = word_counter.most_common(30)
-
-    return {
-        "count": len(fp_list),
-        "total_rated": total_rated,
-        "total_drafts": total_drafts,
-        "pct_of_total": round(100 * len(fp_list) / total_drafts, 1) if total_drafts else 0,
-        "pct_of_rated": round(100 * len(fp_list) / total_rated, 1) if total_rated else 0,
-        "fp_list": fp_list,
-        "fp_categories": dict(fp_categories.most_common()),
-        "fp_sources": dict(fp_sources.most_common()),
-        "fp_dims": fp_dims,
-        "nonfp_dims": nonfp_dims,
-        "top_terms": top_terms,
-        "nonfp_categories": dict(nonfp_categories.most_common(20)),
-    }
-
-
-def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
-    """Search-only (free) — returns sources + cached answer if available."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.search_only(question, top_k=top_k)
-
-
-def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run Claude synthesis (costs tokens, result is cached permanently)."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.ask(question, top_k=top_k, cheap=cheap)
-
-
-def get_citation_influence(db: Database) -> dict:
-    """Return citation influence analysis data (cached for 5 min)."""
-    return _cached("citation_influence", lambda: _compute_citation_influence(db))
-
-
-def _compute_citation_influence(db: Database) -> dict:
-    """Compute citation influence metrics from the draft_refs table.
-
-    Returns dict with:
-    - top_cited_rfcs: top 20 most-cited RFCs with citation counts and citing drafts
-    - top_citing_drafts: top 20 drafts that cite the most references
-    - citations_by_category: average citations per category
-    - stats: total citations, unique RFCs, avg refs per draft
-    - draft_network: draft-to-draft citation edges for visualization
-    """
-    # Get all references
-    rows = db.conn.execute(
-        "SELECT draft_name, ref_type, ref_id FROM draft_refs"
-    ).fetchall()
-
-    # Get draft titles and categories
-    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
-    draft_titles = {r["name"]: r["title"] for r in draft_rows}
-
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
-    draft_cats: dict[str, str] = {}
-    for r in rating_rows:
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
-        except Exception:
-            draft_cats[r["draft_name"]] = "Other"
-
-    # Well-known RFC names
-    rfc_names = {
-        "2119": "Key words (MUST/SHALL/MAY)", "8174": "Key words update",
-        "8259": "JSON", "7519": "JWT", "6749": "OAuth 2.0",
-        "7540": "HTTP/2", "9110": "HTTP Semantics", "7525": "TLS Recommendations",
-        "8446": "TLS 1.3", "3986": "URIs", "7230": "HTTP/1.1 Syntax",
-        "7231": "HTTP/1.1 Semantics", "8288": "Web Linking", "6125": "TLS Server Identity",
-        "7515": "JWS", "7516": "JWE", "7517": "JWK", "7518": "JWA",
-        "9449": "DPoP", "6750": "OAuth Bearer", "8725": "JWT Best Practices",
-        "9396": "Rich Authorization Requests", "9101": "JAR",
-        "8414": "OAuth Server Metadata", "7591": "Dynamic Client Registration",
-        "8705": "mTLS for OAuth", "9068": "JWT Access Tokens",
-        "6819": "OAuth Threat Model", "9200": "ACE-OAuth", "9052": "COSE",
-        "8392": "CWT", "7252": "CoAP",
-    }
-
-    # In-degree: how many times each RFC is cited
-    rfc_citations: dict[str, list[str]] = defaultdict(list)
-    draft_out_count: dict[str, int] = Counter()
-    draft_to_draft_edges = []
-    total_citations = 0
-
-    for r in rows:
-        draft_name = r["draft_name"]
-        ref_type = r["ref_type"]
-        ref_id = r["ref_id"]
-        total_citations += 1
-        draft_out_count[draft_name] += 1
-
-        if ref_type == "rfc":
-            rfc_citations[ref_id].append(draft_name)
-        elif ref_type == "draft":
-            draft_to_draft_edges.append({
-                "source": draft_name,
-                "target": ref_id,
-                "source_title": draft_titles.get(draft_name, draft_name),
-                "target_title": draft_titles.get(ref_id, ref_id),
-            })
-
-    # Top 20 most-cited RFCs
-    rfc_sorted = sorted(rfc_citations.items(), key=lambda x: len(x[1]), reverse=True)
-    top_cited_rfcs = []
-    for ref_id, citing_drafts in rfc_sorted[:20]:
-        top_cited_rfcs.append({
-            "rfc_id": ref_id,
-            "name": rfc_names.get(ref_id, ""),
-            "count": len(citing_drafts),
-            "drafts": citing_drafts[:10],  # Limit to first 10 for display
-            "total_drafts": len(citing_drafts),
-        })
-
-    # Top 20 most-citing drafts (out-degree)
-    draft_sorted = sorted(draft_out_count.items(), key=lambda x: x[1], reverse=True)
-    top_citing_drafts = []
-    for draft_name, count in draft_sorted[:20]:
-        top_citing_drafts.append({
-            "name": draft_name,
-            "title": draft_titles.get(draft_name, draft_name),
-            "count": count,
-            "category": draft_cats.get(draft_name, "Other"),
-        })
-
-    # Citation density by category
-    cat_totals: dict[str, int] = Counter()
-    cat_counts: dict[str, int] = Counter()
-    for draft_name, count in draft_out_count.items():
-        cat = draft_cats.get(draft_name, "Other")
-        cat_totals[cat] += count
-        cat_counts[cat] += 1
-
-    citations_by_category = []
-    for cat in sorted(cat_totals.keys()):
-        avg = cat_totals[cat] / cat_counts[cat] if cat_counts[cat] > 0 else 0
-        citations_by_category.append({
-            "category": cat,
-            "total_citations": cat_totals[cat],
-            "draft_count": cat_counts[cat],
-            "avg_citations": round(avg, 1),
-        })
-    citations_by_category.sort(key=lambda x: x["avg_citations"], reverse=True)
-
-    # PageRank-style influence: drafts that cite highly-cited RFCs
-    # Simple approximation: sum of (1 / citation_count) for each RFC cited
-    rfc_influence = {rid: len(drafts) for rid, drafts in rfc_citations.items()}
-    draft_pagerank: dict[str, float] = Counter()
-    for r in rows:
-        if r["ref_type"] == "rfc" and r["ref_id"] in rfc_influence:
-            # Higher score for citing highly-cited RFCs
-            draft_pagerank[r["draft_name"]] += rfc_influence[r["ref_id"]]
-
-    pagerank_sorted = sorted(draft_pagerank.items(), key=lambda x: x[1], reverse=True)
-    top_pagerank = []
-    for draft_name, score in pagerank_sorted[:20]:
-        top_pagerank.append({
-            "name": draft_name,
-            "title": draft_titles.get(draft_name, draft_name),
-            "score": round(score, 1),
-            "category": draft_cats.get(draft_name, "Other"),
-            "out_degree": draft_out_count.get(draft_name, 0),
-        })
-
-    # Stats
-    unique_rfcs = len(rfc_citations)
-    drafts_with_refs = len(draft_out_count)
-    avg_refs = total_citations / drafts_with_refs if drafts_with_refs > 0 else 0
-
-    return {
-        "top_cited_rfcs": top_cited_rfcs,
-        "top_citing_drafts": top_citing_drafts,
-        "top_pagerank": top_pagerank,
-        "citations_by_category": citations_by_category,
-        "draft_network": draft_to_draft_edges[:200],  # Limit for perf
-        "stats": {
-            "total_citations": total_citations,
-            "unique_rfcs": unique_rfcs,
-            "drafts_with_refs": drafts_with_refs,
-            "avg_refs_per_draft": round(avg_refs, 1),
-        },
-    }
-
-
-def get_bcp_analysis(db: Database) -> dict:
-    """Return BCP dependency analysis data (cached for 5 min)."""
-    return _cached("bcp_analysis", lambda: _compute_bcp_analysis(db))
-
-
-def _compute_bcp_analysis(db: Database) -> dict:
-    """Compute BCP dependency analysis.
-
-    Returns dict with:
-    - bcps: all BCPs with citation counts and citing drafts
-    - co_citation: which BCPs tend to be co-cited
-    - by_category: BCP citation patterns by category
-    - coverage: what % of drafts cite at least one BCP
-    """
-    # Get all BCP references
-    bcp_rows = db.conn.execute(
-        "SELECT draft_name, ref_id FROM draft_refs WHERE ref_type = 'bcp'"
-    ).fetchall()
-
-    # Get draft titles and categories
-    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
-    draft_titles = {r["name"]: r["title"] for r in draft_rows}
-    total_drafts = len(draft_titles)
-
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
-    draft_cats: dict[str, str] = {}
-    for r in rating_rows:
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
-        except Exception:
-            draft_cats[r["draft_name"]] = "Other"
-
-    # BCP citation counts
-    bcp_citations: dict[str, list[str]] = defaultdict(list)
-    draft_bcps: dict[str, list[str]] = defaultdict(list)
-
-    for r in bcp_rows:
-        bcp_citations[r["ref_id"]].append(r["draft_name"])
-        draft_bcps[r["draft_name"]].append(r["ref_id"])
-
-    # All BCPs with counts
-    bcps = []
-    for bcp_id, citing_drafts in sorted(bcp_citations.items(),
-                                         key=lambda x: len(x[1]), reverse=True):
-        bcps.append({
-            "bcp_id": bcp_id,
-            "count": len(citing_drafts),
-            "drafts": citing_drafts[:10],
-            "total_drafts": len(citing_drafts),
-        })
-
-    # Co-citation matrix: which BCPs appear together in the same draft
-    bcp_ids = sorted(bcp_citations.keys())
-    co_citation = []
-    for i, bcp_a in enumerate(bcp_ids):
-        drafts_a = set(bcp_citations[bcp_a])
-        for j, bcp_b in enumerate(bcp_ids):
-            if j <= i:
-                continue
-            drafts_b = set(bcp_citations[bcp_b])
-            shared = len(drafts_a & drafts_b)
-            if shared > 0:
-                co_citation.append({
-                    "bcp_a": bcp_a,
-                    "bcp_b": bcp_b,
-                    "count": shared,
-                })
-
-    # Heatmap data: full matrix for all BCPs (top 20 by citation count)
-    top_bcp_ids = [b["bcp_id"] for b in bcps[:20]]
-    heatmap_matrix = []
-    for bcp_a in top_bcp_ids:
-        row = []
-        drafts_a = set(bcp_citations.get(bcp_a, []))
-        for bcp_b in top_bcp_ids:
-            drafts_b = set(bcp_citations.get(bcp_b, []))
-            shared = len(drafts_a & drafts_b)
-            row.append(shared)
-        heatmap_matrix.append(row)
-
-    # BCP citations by category
-    cat_bcp_count: dict[str, Counter] = defaultdict(Counter)
-    for draft_name, bcp_list in draft_bcps.items():
-        cat = draft_cats.get(draft_name, "Other")
-        for bcp_id in bcp_list:
-            cat_bcp_count[cat][bcp_id] += 1
-
-    by_category = []
-    for cat in sorted(cat_bcp_count.keys()):
-        top_bcps = cat_bcp_count[cat].most_common(5)
-        by_category.append({
-            "category": cat,
-            "total_bcp_refs": sum(cat_bcp_count[cat].values()),
-            "unique_bcps": len(cat_bcp_count[cat]),
-            "top_bcps": [{"bcp_id": bid, "count": c} for bid, c in top_bcps],
-        })
-    by_category.sort(key=lambda x: x["total_bcp_refs"], reverse=True)
-
-    # Coverage
-    drafts_with_bcp = len(draft_bcps)
-    coverage_pct = (drafts_with_bcp / total_drafts * 100) if total_drafts > 0 else 0
-
-    return {
-        "bcps": bcps,
-        "co_citation": co_citation,
-        "heatmap_labels": top_bcp_ids,
-        "heatmap_matrix": heatmap_matrix,
-        "by_category": by_category,
-        "coverage": {
-            "total_drafts": total_drafts,
-            "drafts_with_bcp": drafts_with_bcp,
-            "coverage_pct": round(coverage_pct, 1),
-            "unique_bcps": len(bcp_citations),
-            "total_bcp_refs": len(bcp_rows),
-        },
-    }
-
-
-def global_search(db: Database, query: str) -> SearchResults:
-    """Search across drafts (FTS5), ideas, authors, and gaps.
-
-    Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}.
-    """
-    results: dict = {"drafts": [], "ideas": [], "authors": [], "gaps": []}
-    if not query or not query.strip():
-        return results
-
-    q = query.strip()
-
-    # 1. Drafts via FTS5
-    try:
-        fts_query = re.sub(r'[^\w\s]', '', q)
-        fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE)
-        fts_query = re.sub(r'\s+', ' ', fts_query).strip()
-        if not fts_query:
-            raise ValueError("empty query after sanitization")
-        rows = db.conn.execute(
-            """SELECT d.name, d.title, d.abstract, d.time, d."group"
-            FROM drafts d
-            JOIN drafts_fts f ON d.rowid = f.rowid
-            WHERE drafts_fts MATCH ?
-            ORDER BY rank
-            LIMIT 50""",
-            (fts_query,),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-    except Exception:
-        # FTS5 match can fail on certain query syntax; fall back to LIKE
-        like = f"%{q}%"
-        rows = db.conn.execute(
-            """SELECT name, title, abstract, time, "group" FROM drafts
-            WHERE title LIKE ? OR name LIKE ? OR abstract LIKE ?
-            LIMIT 50""",
-            (like, like, like),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-
-    # 2. Ideas via LIKE
-    like = f"%{q}%"
-    rows = db.conn.execute(
-        """SELECT id, title, description, idea_type, draft_name FROM ideas
-        WHERE (title LIKE ? OR description LIKE ?)
-        AND draft_name NOT IN (SELECT draft_name FROM ratings WHERE false_positive = 1)
-        ORDER BY id LIMIT 50""",
-        (like, like),
-    ).fetchall()
-    for r in rows:
-        results["ideas"].append({
-            "id": r["id"],
-            "title": r["title"],
-            "description": (r["description"] or "")[:200],
-            "type": r["idea_type"],
-            "draft_name": r["draft_name"],
-        })
-
-    # 3. Authors via LIKE
-    results["authors"] = db.search_authors(q, limit=50)
-
-    # 4. Gaps via LIKE
-    results["gaps"] = db.search_gaps(q, limit=50)
-
-    return results
-
-
-def get_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE (cached for 5 min)."""
-    return _cached("landscape_tsne", lambda: _compute_landscape_tsne(db))
-
-
-def _compute_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE from embeddings, return [{name, title, x, y, category, score}]."""
-
-
-    embeddings = db.all_embeddings()
-    if len(embeddings) < 5:
-        return []
-
-    pairs = db.drafts_with_ratings(limit=1000)
-    rating_map = {d.name: r for d, r in pairs}
-    draft_map = {d.name: d for d, _ in pairs}
-
-    # Filter to drafts that have both embeddings and ratings
-    names = [n for n in embeddings if n in rating_map]
-    if len(names) < 5:
-        return []
-
-    matrix = np.array([embeddings[n] for n in names])
-
-    try:
-        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
-                     random_state=42, max_iter=500)
-        coords = tsne.fit_transform(matrix)
-    except Exception:
-        return []
-
-    result = []
-    for i, name in enumerate(names):
-        r = rating_map[name]
-        d = draft_map.get(name)
-        result.append({
-            "name": name,
-            "title": d.title if d else name,
-            "x": round(float(coords[i, 0]), 3),
-            "y": round(float(coords[i, 1]), 3),
-            "category": r.categories[0] if r.categories else "Other",
-            "score": round(r.composite_score, 2),
-        })
-    return result
-
-
-def get_comparison_data(db: Database, names: list[str]) -> dict | None:
-    """Get comparison data for a list of drafts.
-
-    Returns {
-        drafts: [{name, title, abstract, rating, ideas, refs, ...}],
-        shared_ideas: [{title, drafts: [name,...]}],
-        unique_ideas: {name: [{title, description}]},
-        shared_refs: [{type, id, drafts: [name,...]}],
-        unique_refs: {name: [{type, id}]},
-        similarities: [{a, b, similarity}],
-        comparison_text: str | None,
-    }
-    """
-
-
-    drafts_data = []
-    all_ideas: dict[str, list[dict]] = {}
-    all_refs: dict[str, list[tuple[str, str]]] = {}
-
-    for name in names:
-        detail = get_draft_detail(db, name)
-        if not detail:
-            continue
-        drafts_data.append(detail)
-        all_ideas[name] = detail.get("ideas", [])
-        all_refs[name] = [(r["type"], r["id"]) for r in detail.get("refs", [])]
-
-    if len(drafts_data) < 2:
-        return None
-
-    # Find shared vs unique ideas (by title similarity)
-    idea_title_drafts: dict[str, list[str]] = {}
-    for name, ideas in all_ideas.items():
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if title_lower not in idea_title_drafts:
-                idea_title_drafts[title_lower] = []
-            idea_title_drafts[title_lower].append(name)
-
-    shared_ideas = [
-        {"title": title, "drafts": draft_list}
-        for title, draft_list in idea_title_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_ideas: dict[str, list[dict]] = {}
-    for name, ideas in all_ideas.items():
-        unique = []
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if len(set(idea_title_drafts.get(title_lower, []))) <= 1:
-                unique.append({"title": idea["title"], "description": idea.get("description", "")})
-        unique_ideas[name] = unique
-
-    # Find shared vs unique references
-    ref_drafts: dict[tuple[str, str], list[str]] = {}
-    for name, refs in all_refs.items():
-        for ref in refs:
-            if ref not in ref_drafts:
-                ref_drafts[ref] = []
-            ref_drafts[ref].append(name)
-
-    shared_refs = [
-        {"type": ref[0], "id": ref[1], "drafts": draft_list}
-        for ref, draft_list in ref_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_refs: dict[str, list[dict]] = {}
-    for name, refs in all_refs.items():
-        unique = []
-        for ref in refs:
-            if len(set(ref_drafts.get(ref, []))) <= 1:
-                unique.append({"type": ref[0], "id": ref[1]})
-        unique_refs[name] = unique
-
-    # Pairwise embedding similarities
-    embeddings = db.all_embeddings()
-    similarities = []
-    valid_names = [d["name"] for d in drafts_data]
-    for i in range(len(valid_names)):
-        for j in range(i + 1, len(valid_names)):
-            a, b = valid_names[i], valid_names[j]
-            if a in embeddings and b in embeddings:
-                vec_a = embeddings[a]
-                vec_b = embeddings[b]
-                dot = np.dot(vec_a, vec_b)
-                norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
-                sim = float(dot / norm) if norm > 0 else 0.0
-                similarities.append({"a": a, "b": b, "similarity": round(sim, 4)})
-
-    return {
-        "drafts": drafts_data,
-        "shared_ideas": shared_ideas,
-        "unique_ideas": unique_ideas,
-        "shared_refs": shared_refs,
-        "unique_refs": unique_refs,
-        "similarities": similarities,
-        "comparison_text": None,
-    }
-
-
-def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
-    """Search-only (free) — returns sources + cached answer if available."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.search_only(question, top_k=top_k)
-
-
-def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run Claude synthesis (costs tokens, result is cached permanently)."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.ask(question, top_k=top_k, cheap=cheap)
-
-
-SAFETY_CATEGORIES = {"AI safety/alignment", "Agent identity/auth", "Policy/governance"}
-CAPABILITY_CATEGORIES = {"A2A protocols", "Agent discovery/reg", "Autonomous netops",
-                         "Data formats/interop", "Human-agent interaction", "Model serving/inference"}
-
-
-def get_trends_data(db: Database) -> dict:
-    """Return temporal evolution data for the /trends page.
-
-    Returns dict with:
-      - monthly_submissions: [{month, source, count}, ...]
-      - monthly_ratings: [{month, novelty, maturity, overlap, momentum, relevance}, ...]
-      - monthly_categories: [{month, category, count}, ...]
-      - safety_ratio: [{month, safety, capability, ratio}, ...]
-      - cumulative_ideas: [{month, total}, ...]
-      - monthly_new_authors: [{month, count}, ...]
-      - stats: {fastest_growing, newest_active}
-      - monthly_table: [{month, total, sources: {}, avg_score}, ...]
-    """
-    conn = db.conn
-
-    # 1. Monthly submissions by source
-    rows = conn.execute("""
-        SELECT substr(time, 1, 7) AS month, source, COUNT(*) AS cnt
-        FROM drafts
-        WHERE time IS NOT NULL AND time != ''
-        GROUP BY month, source
-        ORDER BY month
-    """).fetchall()
-    monthly_submissions = [{"month": r["month"], "source": r["source"], "count": r["cnt"]} for r in rows]
-
-    # 2. Monthly average ratings (all 5 dimensions)
-    rows = conn.execute("""
-        SELECT substr(d.time, 1, 7) AS month,
-               AVG(r.novelty) AS novelty, AVG(r.maturity) AS maturity,
-               AVG(r.overlap) AS overlap, AVG(r.momentum) AS momentum,
-               AVG(r.relevance) AS relevance,
-               COUNT(*) AS cnt
-        FROM drafts d
-        JOIN ratings r ON d.name = r.draft_name
-        WHERE d.time IS NOT NULL AND d.time != '' AND r.false_positive = 0
-        GROUP BY month
-        ORDER BY month
-    """).fetchall()
-    monthly_ratings = [{
-        "month": r["month"],
-        "novelty": round(r["novelty"], 2),
-        "maturity": round(r["maturity"], 2),
-        "overlap": round(r["overlap"], 2),
-        "momentum": round(r["momentum"], 2),
-        "relevance": round(r["relevance"], 2),
-        "count": r["cnt"],
-    } for r in rows]
-
-    # 3. Monthly category distribution
-    rows = conn.execute("""
-        SELECT substr(d.time, 1, 7) AS month, r.categories
-        FROM drafts d
-        JOIN ratings r ON d.name = r.draft_name
-        WHERE d.time IS NOT NULL AND d.time != '' AND r.false_positive = 0
-    """).fetchall()
-    cat_monthly: dict[str, Counter] = defaultdict(Counter)
-    all_cats: Counter = Counter()
-    for r in rows:
-        month = r["month"]
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-        except (json.JSONDecodeError, TypeError):
-            cats = []
-        for c in cats:
-            cat_monthly[month][c] += 1
-            all_cats[c] += 1
-
-    # Top 8 categories
-    top_cats = [c for c, _ in all_cats.most_common(8)]
-    months_sorted = sorted(cat_monthly.keys())
-    monthly_categories = []
-    for month in months_sorted:
-        for cat in top_cats:
-            monthly_categories.append({
-                "month": month,
-                "category": cat,
-                "count": cat_monthly[month].get(cat, 0),
-            })
-
-    # 4. Safety ratio over time
-    safety_ratio = []
-    for month in months_sorted:
-        safety = sum(cat_monthly[month].get(c, 0) for c in SAFETY_CATEGORIES)
-        capability = sum(cat_monthly[month].get(c, 0) for c in CAPABILITY_CATEGORIES)
-        ratio = round(safety / capability, 2) if capability > 0 else 0
-        safety_ratio.append({
-            "month": month,
-            "safety": safety,
-            "capability": capability,
-            "ratio": ratio,
-        })
-
-    # 5. Cumulative idea count over time
-    rows = conn.execute("""
-        SELECT substr(d.time, 1, 7) AS month, COUNT(i.id) AS cnt
-        FROM ideas i
-        JOIN drafts d ON i.draft_name = d.name
-        WHERE d.time IS NOT NULL AND d.time != ''
-        GROUP BY month
-        ORDER BY month
-    """).fetchall()
-    cumulative = 0
-    cumulative_ideas = []
-    for r in rows:
-        cumulative += r["cnt"]
-        cumulative_ideas.append({"month": r["month"], "total": cumulative})
-
-    # 6. Monthly new author count (first-time contributors)
-    rows = conn.execute("""
-        SELECT da.person_id, MIN(substr(d.time, 1, 7)) AS first_month
-        FROM draft_authors da
-        JOIN drafts d ON da.draft_name = d.name
-        WHERE d.time IS NOT NULL AND d.time != ''
-        GROUP BY da.person_id
-    """).fetchall()
-    new_author_monthly: Counter = Counter()
-    for r in rows:
-        if r["first_month"]:
-            new_author_monthly[r["first_month"]] += 1
-    monthly_new_authors = [
-        {"month": m, "count": new_author_monthly.get(m, 0)}
-        for m in months_sorted
-    ]
-
-    # 7. Stats: fastest growing category, newest active category
-    fastest_growing = ""
-    newest_active = ""
-    if len(months_sorted) >= 4:
-        mid = len(months_sorted) // 2
-        early_months = months_sorted[:mid]
-        late_months = months_sorted[mid:]
-        best_growth = -999
-        for cat in top_cats:
-            early = sum(cat_monthly[m].get(cat, 0) for m in early_months)
-            late = sum(cat_monthly[m].get(cat, 0) for m in late_months)
-            if early > 0:
-                growth = (late - early) / early
-            elif late > 0:
-                growth = float("inf")
-            else:
-                growth = 0
-            if growth > best_growth:
-                best_growth = growth
-                fastest_growing = cat
-
-    # Newest active: category with latest first appearance
-    cat_first_month: dict[str, str] = {}
-    for month in months_sorted:
-        for cat in all_cats:
-            if cat not in cat_first_month and cat_monthly[month].get(cat, 0) > 0:
-                cat_first_month[cat] = month
-    if cat_first_month:
-        newest_active = max(cat_first_month, key=lambda c: cat_first_month[c])
-
-    # 8. Monthly breakdown table
-    monthly_table = []
-    for month in months_sorted:
-        # Get per-source counts
-        sources: dict[str, int] = {}
-        total = 0
-        for s in monthly_submissions:
-            if s["month"] == month:
-                sources[s["source"]] = s["count"]
-                total += s["count"]
-        # Get avg score
-        avg_row = conn.execute("""
-            SELECT AVG((r.novelty + r.maturity + r.overlap + r.momentum + r.relevance) / 5.0) AS avg_score
-            FROM drafts d JOIN ratings r ON d.name = r.draft_name
-            WHERE substr(d.time, 1, 7) = ? AND r.false_positive = 0
-        """, (month,)).fetchone()
-        avg_score = round(avg_row["avg_score"], 2) if avg_row and avg_row["avg_score"] else 0
-        monthly_table.append({
-            "month": month,
-            "total": total,
-            "sources": sources,
-            "avg_score": avg_score,
-        })
-
-    return {
-        "monthly_submissions": monthly_submissions,
-        "monthly_ratings": monthly_ratings,
-        "monthly_categories": monthly_categories,
-        "safety_ratio": safety_ratio,
-        "cumulative_ideas": cumulative_ideas,
-        "monthly_new_authors": monthly_new_authors,
-        "top_categories": top_cats,
-        "months": months_sorted,
-        "stats": {
-            "fastest_growing": fastest_growing,
-            "newest_active": newest_active,
-        },
-        "monthly_table": monthly_table,
-    }
-
-
-# ---------------------------------------------------------------------------
-# Draft Complexity Matrix
-# ---------------------------------------------------------------------------
-
-
-def get_complexity_data(db: Database) -> dict:
-    """Return draft complexity analysis data for the /complexity page.
-
-    For each rated draft, compute structural complexity metrics and
-    correlate with rating dimensions.
-
-    Returns dict with:
-      - drafts: [{name, title, pages, author_count, citation_count, idea_count,
-                   category_count, novelty, maturity, overlap, momentum, relevance,
-                   score, composite_complexity}, ...]
-      - correlations: {metric: {dimension: r_value}}
-      - top_complex: top 10 most complex drafts
-      - top_efficient: top 10 high-rating low-complexity drafts
-      - stats: {avg_pages, avg_authors, avg_citations, pages_coverage_pct}
-      - category_complexity: [{category, avg_pages, avg_authors, avg_citations, count}, ...]
-      - source_complexity: [{source, avg_pages, avg_authors, avg_citations, count}, ...]
-    """
-    conn = db.conn
-
-    # Build per-draft complexity data
-    rows = conn.execute("""
-        SELECT d.name, d.title, d.pages, d.source,
-               r.novelty, r.maturity, r.overlap, r.momentum, r.relevance,
-               r.categories,
-               (r.novelty + r.maturity + r.overlap + r.momentum + r.relevance) / 5.0 AS score
-        FROM drafts d
-        JOIN ratings r ON d.name = r.draft_name
-        WHERE r.false_positive = 0
-    """).fetchall()
-
-    # Author counts
-    author_counts = db.draft_author_count_map()
-
-    # Citation counts (outgoing refs)
-    citation_counts = {}
-    for row in conn.execute("""
-        SELECT draft_name, COUNT(*) AS cnt FROM draft_refs GROUP BY draft_name
-    """).fetchall():
-        citation_counts[row["draft_name"]] = row["cnt"]
-
-    # Idea counts
-    idea_counts = {}
-    for row in conn.execute("""
-        SELECT draft_name, COUNT(*) AS cnt FROM ideas GROUP BY draft_name
-    """).fetchall():
-        idea_counts[row["draft_name"]] = row["cnt"]
-
-    drafts_data = []
-    total_with_pages = 0
-    total_drafts = 0
-    for r in rows:
-        total_drafts += 1
-        pages = r["pages"]
-        if pages is not None:
-            total_with_pages += 1
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-        except (json.JSONDecodeError, TypeError):
-            cats = []
-        ac = author_counts.get(r["name"], 0)
-        cc = citation_counts.get(r["name"], 0)
-        ic = idea_counts.get(r["name"], 0)
-        cat_count = len(cats)
-        # Composite complexity: normalize each metric to 0-1 scale and average
-        # (raw values stored; composite calculated after we know max values)
-        drafts_data.append({
-            "name": r["name"],
-            "title": r["title"],
-            "pages": pages,
-            "source": r["source"] or "ietf",
-            "author_count": ac,
-            "citation_count": cc,
-            "idea_count": ic,
-            "category_count": cat_count,
-            "categories": cats,
-            "novelty": r["novelty"],
-            "maturity": r["maturity"],
-            "overlap": r["overlap"],
-            "momentum": r["momentum"],
-            "relevance": r["relevance"],
-            "score": round(r["score"], 2),
-        })
-
-    # Compute composite complexity score (normalized 0-1 each, then averaged)
-    max_pages = max((d["pages"] for d in drafts_data if d["pages"] is not None), default=1) or 1
-    max_authors = max((d["author_count"] for d in drafts_data), default=1) or 1
-    max_citations = max((d["citation_count"] for d in drafts_data), default=1) or 1
-    max_ideas = max((d["idea_count"] for d in drafts_data), default=1) or 1
-
-    for d in drafts_data:
-        p = (d["pages"] / max_pages) if d["pages"] is not None else 0.3  # default to median-ish
-        a = d["author_count"] / max_authors
-        c = d["citation_count"] / max_citations
-        i = d["idea_count"] / max_ideas
-        d["composite_complexity"] = round((p + a + c + i) / 4, 3)
-
-    # Correlation matrix: complexity metrics vs rating dimensions
-    metrics = ["pages", "author_count", "citation_count", "idea_count", "category_count"]
-    dimensions = ["novelty", "maturity", "overlap", "momentum", "relevance"]
-
-    def _pearson(xs: list[float], ys: list[float]) -> float:
-        """Compute Pearson correlation coefficient."""
-        n = len(xs)
-        if n < 3:
-            return 0.0
-        mean_x = sum(xs) / n
-        mean_y = sum(ys) / n
-        cov = sum((x - mean_x) * (y - mean_y) for x, y in zip(xs, ys))
-        std_x = (sum((x - mean_x) ** 2 for x in xs)) ** 0.5
-        std_y = (sum((y - mean_y) ** 2 for y in ys)) ** 0.5
-        if std_x == 0 or std_y == 0:
-            return 0.0
-        return round(cov / (std_x * std_y), 3)
-
-    correlations: dict[str, dict[str, float]] = {}
-    for metric in metrics:
-        correlations[metric] = {}
-        for dim in dimensions:
-            if metric == "pages":
-                # Filter to drafts with pages data
-                pairs = [(d[metric], d[dim]) for d in drafts_data if d[metric] is not None]
-            else:
-                pairs = [(d[metric], d[dim]) for d in drafts_data]
-            if len(pairs) >= 3:
-                xs, ys = zip(*pairs)
-                correlations[metric][dim] = _pearson(list(xs), list(ys))
-            else:
-                correlations[metric][dim] = 0.0
-
-    # Top 10 most complex
-    sorted_by_complexity = sorted(drafts_data, key=lambda d: d["composite_complexity"], reverse=True)
-    top_complex = sorted_by_complexity[:10]
-
-    # Top 10 efficient: high score but low complexity
-    # Efficiency = score / (composite_complexity + 0.1)  (avoid div by zero)
-    for d in drafts_data:
-        d["efficiency"] = round(d["score"] / (d["composite_complexity"] + 0.1), 2)
-    sorted_by_efficiency = sorted(drafts_data, key=lambda d: d["efficiency"], reverse=True)
-    top_efficient = sorted_by_efficiency[:10]
-
-    # Stats
-    pages_vals = [d["pages"] for d in drafts_data if d["pages"] is not None]
-    avg_pages = round(sum(pages_vals) / len(pages_vals), 1) if pages_vals else 0
-    avg_authors = round(sum(d["author_count"] for d in drafts_data) / len(drafts_data), 1) if drafts_data else 0
-    avg_citations = round(sum(d["citation_count"] for d in drafts_data) / len(drafts_data), 1) if drafts_data else 0
-    pages_coverage = round(total_with_pages / total_drafts * 100, 1) if total_drafts else 0
-
-    # Category complexity averages
-    cat_data: dict[str, list[dict]] = defaultdict(list)
-    for d in drafts_data:
-        for cat in d.get("categories", []):
-            cat_data[cat].append(d)
-
-    category_complexity = []
-    for cat, ds in sorted(cat_data.items(), key=lambda x: -len(x[1])):
-        p_vals = [d["pages"] for d in ds if d["pages"] is not None]
-        category_complexity.append({
-            "category": cat,
-            "avg_pages": round(sum(p_vals) / len(p_vals), 1) if p_vals else 0,
-            "avg_authors": round(sum(d["author_count"] for d in ds) / len(ds), 1),
-            "avg_citations": round(sum(d["citation_count"] for d in ds) / len(ds), 1),
-            "avg_score": round(sum(d["score"] for d in ds) / len(ds), 2),
-            "count": len(ds),
-        })
-
-    # Source complexity
-    source_data: dict[str, list[dict]] = defaultdict(list)
-    for d in drafts_data:
-        source_data[d["source"]].append(d)
-
-    source_complexity = []
-    for src, ds in sorted(source_data.items(), key=lambda x: -len(x[1])):
-        p_vals = [d["pages"] for d in ds if d["pages"] is not None]
-        source_complexity.append({
-            "source": src,
-            "avg_pages": round(sum(p_vals) / len(p_vals), 1) if p_vals else 0,
-            "avg_authors": round(sum(d["author_count"] for d in ds) / len(ds), 1),
-            "avg_citations": round(sum(d["citation_count"] for d in ds) / len(ds), 1),
-            "avg_score": round(sum(d["score"] for d in ds) / len(ds), 2),
-            "count": len(ds),
-        })
-
-    return {
-        "drafts": drafts_data,
-        "correlations": correlations,
-        "metrics": metrics,
-        "dimensions": dimensions,
-        "top_complex": top_complex,
-        "top_efficient": top_efficient,
-        "stats": {
-            "avg_pages": avg_pages,
-            "avg_authors": avg_authors,
-            "avg_citations": avg_citations,
-            "pages_coverage_pct": pages_coverage,
-            "total_drafts": total_drafts,
-        },
-        "category_complexity": category_complexity,
-        "source_complexity": source_complexity,
-    }
-
-
-# ── Additional Analysis Functions ────────────────────────────────────
-
-def get_idea_analysis(db: Database) -> dict:
-    """Return comprehensive idea analysis data for the idea-analysis page.
-
-    Includes novelty distribution, type breakdown with avg novelty,
-    top novel ideas, ideas-per-draft distribution, cross-tab of type x source,
-    shared ideas across drafts, and idea novelty vs draft rating correlation.
-    """
-    from collections import Counter, defaultdict
-    from difflib import SequenceMatcher
-
-    # Fetch raw data
-    all_ideas = db.conn.execute(
-        """SELECT i.id, i.draft_name, i.title, i.description, i.idea_type,
-                  i.novelty_score
-           FROM ideas i ORDER BY i.novelty_score DESC NULLS LAST"""
-    ).fetchall()
-    all_ideas = [dict(r) for r in all_ideas]
-
-    # Draft ratings lookup
-    ratings_rows = db.conn.execute(
-        """SELECT d.name, d.title as draft_title, d.source,
-                  r.novelty AS r_novelty, r.maturity, r.overlap, r.momentum, r.relevance
-           FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name"""
-    ).fetchall()
-    draft_info = {}
-    for r in ratings_rows:
-        row = dict(r)
-        # Compute composite score (average of 5 dimensions)
-        dims = [row.get("r_novelty"), row.get("maturity"), row.get("overlap"),
-                row.get("momentum"), row.get("relevance")]
-        valid = [d for d in dims if d is not None]
-        row["composite_score"] = sum(valid) / len(valid) if valid else None
-        draft_info[row["name"]] = row
-
-    total = len(all_ideas)
-    scored = [i for i in all_ideas if i.get("novelty_score") is not None]
-    unscored = total - len(scored)
-    avg_novelty = sum(i["novelty_score"] for i in scored) / len(scored) if scored else 0
-
-    # Embedding coverage
-    embed_count = db.conn.execute("SELECT COUNT(*) FROM idea_embeddings").fetchone()[0]
-
-    # --- Novelty score distribution (histogram) ---
-    novelty_dist = Counter(i["novelty_score"] for i in scored)
-    novelty_histogram = {
-        "labels": [1, 2, 3, 4, 5],
-        "values": [novelty_dist.get(s, 0) for s in [1, 2, 3, 4, 5]],
-    }
-
-    # --- Ideas by type with counts and avg novelty ---
-    type_data = defaultdict(lambda: {"count": 0, "novelty_sum": 0, "novelty_n": 0})
-    for idea in all_ideas:
-        t = idea.get("idea_type") or "other"
-        type_data[t]["count"] += 1
-        if idea.get("novelty_score") is not None:
-            type_data[t]["novelty_sum"] += idea["novelty_score"]
-            type_data[t]["novelty_n"] += 1
-
-    by_type = []
-    for t, d in sorted(type_data.items(), key=lambda x: x[1]["count"], reverse=True):
-        avg = d["novelty_sum"] / d["novelty_n"] if d["novelty_n"] > 0 else 0
-        by_type.append({"type": t, "count": d["count"], "avg_novelty": round(avg, 2)})
-
-    type_names = [t["type"] for t in by_type]
-
-    # --- Top 20 most novel ideas (score 4-5) ---
-    top_novel = []
-    for idea in all_ideas:
-        if idea.get("novelty_score") and idea["novelty_score"] >= 4:
-            di = draft_info.get(idea["draft_name"], {})
-            top_novel.append({
-                "title": idea["title"],
-                "description": idea["description"],
-                "type": idea.get("idea_type", "other"),
-                "novelty_score": idea["novelty_score"],
-                "draft_name": idea["draft_name"],
-                "draft_title": di.get("draft_title", ""),
-                "draft_score": di.get("composite_score"),
-            })
-    top_novel.sort(key=lambda x: (x["novelty_score"], x.get("draft_score") or 0), reverse=True)
-    top_novel = top_novel[:20]
-
-    # --- Ideas per draft distribution ---
-    ideas_per_draft = Counter(i["draft_name"] for i in all_ideas)
-    ipd_dist = Counter(ideas_per_draft.values())
-    ideas_per_draft_hist = {
-        "labels": sorted(ipd_dist.keys()),
-        "values": [ipd_dist[k] for k in sorted(ipd_dist.keys())],
-    }
-    # Also top drafts by idea count
-    top_idea_drafts = []
-    for name, count in ideas_per_draft.most_common(10):
-        di = draft_info.get(name, {})
-        top_idea_drafts.append({
-            "name": name,
-            "draft_title": di.get("draft_title", ""),
-            "idea_count": count,
-            "score": di.get("composite_score"),
-        })
-
-    # --- Cross-tabulation: idea_type x source ---
-    type_source = defaultdict(lambda: defaultdict(int))
-    for idea in all_ideas:
-        t = idea.get("idea_type") or "other"
-        di = draft_info.get(idea["draft_name"], {})
-        source = di.get("source", "ietf") or "ietf"
-        type_source[t][source] += 1
-
-    sources = sorted(set(
-        di.get("source", "ietf") or "ietf" for di in draft_info.values()
-    ))
-    cross_tab = []
-    for t in type_names:
-        row = {"type": t}
-        for s in sources:
-            row[s] = type_source[t].get(s, 0)
-        cross_tab.append(row)
-
-    # --- Shared ideas across drafts ---
-    idea_groups: list[dict] = []
-    for idea in all_ideas:
-        title_lower = idea["title"].lower().strip()
-        matched = False
-        for group in idea_groups:
-            ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio()
-            if ratio >= 0.75:
-                group["ideas"].append(idea)
-                group["drafts"].add(idea["draft_name"])
-                matched = True
-                break
-        if not matched:
-            idea_groups.append({
-                "canonical": title_lower,
-                "title": idea["title"],
-                "ideas": [idea],
-                "drafts": {idea["draft_name"]},
-            })
-
-    shared_ideas = []
-    for g in sorted(idea_groups, key=lambda x: len(x["drafts"]), reverse=True):
-        if len(g["drafts"]) < 2:
-            break
-        shared_ideas.append({
-            "title": g["title"],
-            "appearances": len(g["drafts"]),
-            "drafts": sorted(g["drafts"])[:8],
-            "types": list(set(i.get("idea_type", "other") for i in g["ideas"])),
-        })
-
-    # --- Scatter: draft avg idea novelty vs draft relevance ---
-    draft_idea_novelty = defaultdict(list)
-    for idea in scored:
-        draft_idea_novelty[idea["draft_name"]].append(idea["novelty_score"])
-
-    scatter_data = []
-    for name, scores in draft_idea_novelty.items():
-        di = draft_info.get(name, {})
-        if di.get("relevance") is not None and di.get("composite_score") is not None:
-            scatter_data.append({
-                "name": name,
-                "avg_idea_novelty": round(sum(scores) / len(scores), 2),
-                "relevance": di["relevance"],
-                "score": di["composite_score"],
-                "idea_count": len(scores),
-                "source": di.get("source", "ietf") or "ietf",
-            })
-
-    # --- Sunburst data: type -> novelty band ---
-    sunburst_labels = []
-    sunburst_parents = []
-    sunburst_values = []
-    # Root
-    sunburst_labels.append("All Ideas")
-    sunburst_parents.append("")
-    sunburst_values.append(total)
-
-    novelty_bands = {"High (4-5)": lambda s: s is not None and s >= 4,
-                     "Medium (3)": lambda s: s is not None and s == 3,
-                     "Low (1-2)": lambda s: s is not None and s <= 2,
-                     "Unscored": lambda s: s is None}
-
-    for t_info in by_type:
-        t = t_info["type"]
-        sunburst_labels.append(t)
-        sunburst_parents.append("All Ideas")
-        sunburst_values.append(t_info["count"])
-        # Sub-bands
-        type_ideas = [i for i in all_ideas if (i.get("idea_type") or "other") == t]
-        for band, fn in novelty_bands.items():
-            cnt = sum(1 for i in type_ideas if fn(i.get("novelty_score")))
-            if cnt > 0:
-                sunburst_labels.append(f"{t} - {band}")
-                sunburst_parents.append(t)
-                sunburst_values.append(cnt)
-
-    return {
-        "total": total,
-        "scored": len(scored),
-        "unscored": unscored,
-        "avg_novelty": round(avg_novelty, 2),
-        "embed_count": embed_count,
-        "embed_pct": round(embed_count / total * 100, 1) if total > 0 else 0,
-        "type_count": len(by_type),
-        "novelty_histogram": novelty_histogram,
-        "by_type": by_type,
-        "top_novel": top_novel,
-        "ideas_per_draft_hist": ideas_per_draft_hist,
-        "top_idea_drafts": top_idea_drafts,
-        "cross_tab": cross_tab,
-        "sources": sources,
-        "shared_ideas": shared_ideas,
-        "scatter_data": scatter_data,
-        "sunburst": {
-            "labels": sunburst_labels,
-            "parents": sunburst_parents,
-            "values": sunburst_values,
-        },
-    }
-
-
-
-
-def get_source_comparison(db: Database) -> dict:
-    """Cross-source comparison: ratings, categories, counts by standards body."""
-    pairs_all = db.drafts_with_ratings(limit=2000)
-    # Also include false positives for completeness of source counts
-    pairs_fp = db.drafts_with_ratings(limit=2000, include_false_positives=True)
-
-    # Build per-source data
-    source_stats: dict[str, dict] = {}
-    source_categories: dict[str, Counter] = defaultdict(Counter)
-    source_ratings: dict[str, dict[str, list]] = defaultdict(lambda: {
-        "novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": [], "scores": [],
-    })
-    # Collect author counts per source
-    all_authors_by_source: dict[str, set] = defaultdict(set)
-
-    for draft, rating in pairs_all:
-        src = getattr(draft, "source", "ietf") or "ietf"
-        source_ratings[src]["novelty"].append(rating.novelty)
-        source_ratings[src]["maturity"].append(rating.maturity)
-        source_ratings[src]["overlap"].append(rating.overlap)
-        source_ratings[src]["momentum"].append(rating.momentum)
-        source_ratings[src]["relevance"].append(rating.relevance)
-        source_ratings[src]["scores"].append(round(rating.composite_score, 2))
-        for cat in rating.categories:
-            source_categories[src][cat] += 1
-
-    # Get all drafts (including unrated) for draft counts
-    all_drafts = db.list_drafts(limit=5000)
-    source_draft_counts: Counter = Counter()
-    for d in all_drafts:
-        src = getattr(d, "source", "ietf") or "ietf"
-        source_draft_counts[src] += 1
-
-    # Author counts by source
-    try:
-        rows = db.conn.execute(
-            """SELECT d.source, COUNT(DISTINCT da.person_id) as author_count
-            FROM drafts d
-            JOIN draft_authors da ON d.name = da.draft_name
-            GROUP BY d.source"""
-        ).fetchall()
-        for r in rows:
-            src = r["source"] or "ietf"
-            all_authors_by_source[src] = r["author_count"]
-    except Exception:
-        pass
-
-    # Idea counts by source
-    source_idea_counts: Counter = Counter()
-    try:
-        rows = db.conn.execute(
-            """SELECT d.source, COUNT(*) as idea_count
-            FROM ideas i
-            JOIN drafts d ON i.draft_name = d.name
-            GROUP BY d.source"""
-        ).fetchall()
-        for r in rows:
-            src = r["source"] or "ietf"
-            source_idea_counts[src] = r["idea_count"]
-    except Exception:
-        pass
-
-    # Build summary table
-    all_sources = sorted(set(source_draft_counts.keys()) | set(source_ratings.keys()))
-    summary = []
-    for src in all_sources:
-        rats = source_ratings.get(src, {"scores": []})
-        cats = source_categories.get(src, Counter())
-        top_cat = cats.most_common(1)[0][0] if cats else "N/A"
-        avg_score = round(sum(rats["scores"]) / len(rats["scores"]), 2) if rats["scores"] else 0.0
-        summary.append({
-            "source": src,
-            "drafts": source_draft_counts.get(src, 0),
-            "rated": len(rats["scores"]),
-            "authors": all_authors_by_source.get(src, 0),
-            "ideas": source_idea_counts.get(src, 0),
-            "avg_score": avg_score,
-            "top_category": top_cat,
-        })
-
-    # Radar data: average of each dimension per source
-    radar = {}
-    for src, rats in source_ratings.items():
-        if not rats["scores"]:
-            continue
-        n = len(rats["scores"])
-        radar[src] = {
-            "novelty": round(sum(rats["novelty"]) / n, 2),
-            "maturity": round(sum(rats["maturity"]) / n, 2),
-            "overlap": round(sum(rats["overlap"]) / n, 2),
-            "momentum": round(sum(rats["momentum"]) / n, 2),
-            "relevance": round(sum(rats["relevance"]) / n, 2),
-            "count": n,
-        }
-
-    # Category distribution by source (for stacked bar / heatmap)
-    all_cats = sorted({cat for cats in source_categories.values() for cat in cats})
-    heatmap = {
-        "sources": list(source_categories.keys()),
-        "categories": all_cats,
-        "values": [],
-    }
-    for src in heatmap["sources"]:
-        row = [source_categories[src].get(cat, 0) for cat in all_cats]
-        heatmap["values"].append(row)
-
-    # Unique/shared categories analysis
-    source_cat_sets = {src: set(cats.keys()) for src, cats in source_categories.items()}
-    unique_cats = {}
-    for src, cats in source_cat_sets.items():
-        others = set()
-        for s2, c2 in source_cat_sets.items():
-            if s2 != src:
-                others |= c2
-        unique_cats[src] = sorted(cats - others)
-
-    shared_cats = set()
-    for src, cats in source_cat_sets.items():
-        for s2, c2 in source_cat_sets.items():
-            if s2 != src:
-                shared_cats |= (cats & c2)
-    shared_cats = sorted(shared_cats)
-
-    return {
-        "summary": summary,
-        "radar": radar,
-        "heatmap": heatmap,
-        "unique_categories": unique_cats,
-        "shared_categories": shared_cats,
-    }
-
-
-def get_false_positive_profile(db: Database) -> dict:
-    """Profile drafts flagged as false positives."""
-    # Get false positives
-    fp_rows = db.false_positive_drafts_raw()
-
-    # Get non-FP rated drafts for comparison
-    nonfp_rows = db.non_false_positive_ratings_raw()
-
-    total_rated = db.rated_count()
-    total_drafts = db.count_drafts(include_false_positives=True)
-
-    # Build FP list
-    fp_list = []
-    fp_categories: Counter = Counter()
-    fp_sources: Counter = Counter()
-    fp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
-
-    for row in fp_rows:
-        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
-        src = row["source"] or "ietf"
-        fp_list.append({
-            "name": row["name"],
-            "title": row["title"],
-            "source": src,
-            "categories": cats,
-            "relevance": row["relevance"],
-            "novelty": row["novelty"],
-            "maturity": row["maturity"],
-            "overlap": row["overlap"],
-            "momentum": row["momentum"],
-            "summary": row["summary"] or "",
-        })
-        for cat in cats:
-            fp_categories[cat] += 1
-        fp_sources[src] += 1
-        fp_dims["novelty"].append(row["novelty"])
-        fp_dims["maturity"].append(row["maturity"])
-        fp_dims["overlap"].append(row["overlap"])
-        fp_dims["momentum"].append(row["momentum"])
-        fp_dims["relevance"].append(row["relevance"])
-
-    # Non-FP dimensions for comparison
-    nonfp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
-    nonfp_categories: Counter = Counter()
-    for row in nonfp_rows:
-        nonfp_dims["novelty"].append(row["novelty"])
-        nonfp_dims["maturity"].append(row["maturity"])
-        nonfp_dims["overlap"].append(row["overlap"])
-        nonfp_dims["momentum"].append(row["momentum"])
-        nonfp_dims["relevance"].append(row["relevance"])
-        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
-        for cat in cats:
-            nonfp_categories[cat] += 1
-
-    # Top terms from FP abstracts
-    from collections import Counter as _Counter
-    stop_words = {
-        "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
-        "of", "with", "by", "from", "is", "it", "that", "this", "are", "was",
-        "be", "as", "can", "may", "will", "not", "has", "have", "been", "which",
-        "their", "its", "also", "such", "these", "would", "should", "could",
-        "more", "other", "than", "into", "about", "between", "over", "after",
-        "all", "one", "two", "new", "they", "we", "our", "each", "some", "any",
-        "there", "what", "when", "how", "where", "who", "does", "do", "did",
-        "no", "if", "so", "up", "out", "only", "used", "using", "use", "based",
-        "through", "both", "well", "within", "must", "while", "had", "were",
-    }
-    word_counter: Counter = Counter()
-    for row in fp_rows:
-        abstract = (row["abstract"] or "").lower()
-        title = (row["title"] or "").lower()
-        text = abstract + " " + title
-        words = re.findall(r'[a-z]{3,}', text)
-        for w in words:
-            if w not in stop_words:
-                word_counter[w] += 1
-    top_terms = word_counter.most_common(30)
-
-    return {
-        "count": len(fp_list),
-        "total_rated": total_rated,
-        "total_drafts": total_drafts,
-        "pct_of_total": round(100 * len(fp_list) / total_drafts, 1) if total_drafts else 0,
-        "pct_of_rated": round(100 * len(fp_list) / total_rated, 1) if total_rated else 0,
-        "fp_list": fp_list,
-        "fp_categories": dict(fp_categories.most_common()),
-        "fp_sources": dict(fp_sources.most_common()),
-        "fp_dims": fp_dims,
-        "nonfp_dims": nonfp_dims,
-        "top_terms": top_terms,
-        "nonfp_categories": dict(nonfp_categories.most_common(20)),
-    }
-
-
-def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
-    """Search-only (free) — returns sources + cached answer if available."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.search_only(question, top_k=top_k)
-
-
-def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run Claude synthesis (costs tokens, result is cached permanently)."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.ask(question, top_k=top_k, cheap=cheap)
-
-
-def get_citation_influence(db: Database) -> dict:
-    """Return citation influence analysis data (cached for 5 min)."""
-    return _cached("citation_influence", lambda: _compute_citation_influence(db))
-
-
-def _compute_citation_influence(db: Database) -> dict:
-    """Compute citation influence metrics from the draft_refs table.
-
-    Returns dict with:
-    - top_cited_rfcs: top 20 most-cited RFCs with citation counts and citing drafts
-    - top_citing_drafts: top 20 drafts that cite the most references
-    - citations_by_category: average citations per category
-    - stats: total citations, unique RFCs, avg refs per draft
-    - draft_network: draft-to-draft citation edges for visualization
-    """
-    # Get all references
-    rows = db.conn.execute(
-        "SELECT draft_name, ref_type, ref_id FROM draft_refs"
-    ).fetchall()
-
-    # Get draft titles and categories
-    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
-    draft_titles = {r["name"]: r["title"] for r in draft_rows}
-
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
-    draft_cats: dict[str, str] = {}
-    for r in rating_rows:
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
-        except Exception:
-            draft_cats[r["draft_name"]] = "Other"
-
-    # Well-known RFC names
-    rfc_names = {
-        "2119": "Key words (MUST/SHALL/MAY)", "8174": "Key words update",
-        "8259": "JSON", "7519": "JWT", "6749": "OAuth 2.0",
-        "7540": "HTTP/2", "9110": "HTTP Semantics", "7525": "TLS Recommendations",
-        "8446": "TLS 1.3", "3986": "URIs", "7230": "HTTP/1.1 Syntax",
-        "7231": "HTTP/1.1 Semantics", "8288": "Web Linking", "6125": "TLS Server Identity",
-        "7515": "JWS", "7516": "JWE", "7517": "JWK", "7518": "JWA",
-        "9449": "DPoP", "6750": "OAuth Bearer", "8725": "JWT Best Practices",
-        "9396": "Rich Authorization Requests", "9101": "JAR",
-        "8414": "OAuth Server Metadata", "7591": "Dynamic Client Registration",
-        "8705": "mTLS for OAuth", "9068": "JWT Access Tokens",
-        "6819": "OAuth Threat Model", "9200": "ACE-OAuth", "9052": "COSE",
-        "8392": "CWT", "7252": "CoAP",
-    }
-
-    # In-degree: how many times each RFC is cited
-    rfc_citations: dict[str, list[str]] = defaultdict(list)
-    draft_out_count: dict[str, int] = Counter()
-    draft_to_draft_edges = []
-    total_citations = 0
-
-    for r in rows:
-        draft_name = r["draft_name"]
-        ref_type = r["ref_type"]
-        ref_id = r["ref_id"]
-        total_citations += 1
-        draft_out_count[draft_name] += 1
-
-        if ref_type == "rfc":
-            rfc_citations[ref_id].append(draft_name)
-        elif ref_type == "draft":
-            draft_to_draft_edges.append({
-                "source": draft_name,
-                "target": ref_id,
-                "source_title": draft_titles.get(draft_name, draft_name),
-                "target_title": draft_titles.get(ref_id, ref_id),
-            })
-
-    # Top 20 most-cited RFCs
-    rfc_sorted = sorted(rfc_citations.items(), key=lambda x: len(x[1]), reverse=True)
-    top_cited_rfcs = []
-    for ref_id, citing_drafts in rfc_sorted[:20]:
-        top_cited_rfcs.append({
-            "rfc_id": ref_id,
-            "name": rfc_names.get(ref_id, ""),
-            "count": len(citing_drafts),
-            "drafts": citing_drafts[:10],  # Limit to first 10 for display
-            "total_drafts": len(citing_drafts),
-        })
-
-    # Top 20 most-citing drafts (out-degree)
-    draft_sorted = sorted(draft_out_count.items(), key=lambda x: x[1], reverse=True)
-    top_citing_drafts = []
-    for draft_name, count in draft_sorted[:20]:
-        top_citing_drafts.append({
-            "name": draft_name,
-            "title": draft_titles.get(draft_name, draft_name),
-            "count": count,
-            "category": draft_cats.get(draft_name, "Other"),
-        })
-
-    # Citation density by category
-    cat_totals: dict[str, int] = Counter()
-    cat_counts: dict[str, int] = Counter()
-    for draft_name, count in draft_out_count.items():
-        cat = draft_cats.get(draft_name, "Other")
-        cat_totals[cat] += count
-        cat_counts[cat] += 1
-
-    citations_by_category = []
-    for cat in sorted(cat_totals.keys()):
-        avg = cat_totals[cat] / cat_counts[cat] if cat_counts[cat] > 0 else 0
-        citations_by_category.append({
-            "category": cat,
-            "total_citations": cat_totals[cat],
-            "draft_count": cat_counts[cat],
-            "avg_citations": round(avg, 1),
-        })
-    citations_by_category.sort(key=lambda x: x["avg_citations"], reverse=True)
-
-    # PageRank-style influence: drafts that cite highly-cited RFCs
-    # Simple approximation: sum of (1 / citation_count) for each RFC cited
-    rfc_influence = {rid: len(drafts) for rid, drafts in rfc_citations.items()}
-    draft_pagerank: dict[str, float] = Counter()
-    for r in rows:
-        if r["ref_type"] == "rfc" and r["ref_id"] in rfc_influence:
-            # Higher score for citing highly-cited RFCs
-            draft_pagerank[r["draft_name"]] += rfc_influence[r["ref_id"]]
-
-    pagerank_sorted = sorted(draft_pagerank.items(), key=lambda x: x[1], reverse=True)
-    top_pagerank = []
-    for draft_name, score in pagerank_sorted[:20]:
-        top_pagerank.append({
-            "name": draft_name,
-            "title": draft_titles.get(draft_name, draft_name),
-            "score": round(score, 1),
-            "category": draft_cats.get(draft_name, "Other"),
-            "out_degree": draft_out_count.get(draft_name, 0),
-        })
-
-    # Stats
-    unique_rfcs = len(rfc_citations)
-    drafts_with_refs = len(draft_out_count)
-    avg_refs = total_citations / drafts_with_refs if drafts_with_refs > 0 else 0
-
-    return {
-        "top_cited_rfcs": top_cited_rfcs,
-        "top_citing_drafts": top_citing_drafts,
-        "top_pagerank": top_pagerank,
-        "citations_by_category": citations_by_category,
-        "draft_network": draft_to_draft_edges[:200],  # Limit for perf
-        "stats": {
-            "total_citations": total_citations,
-            "unique_rfcs": unique_rfcs,
-            "drafts_with_refs": drafts_with_refs,
-            "avg_refs_per_draft": round(avg_refs, 1),
-        },
-    }
-
-
-def get_bcp_analysis(db: Database) -> dict:
-    """Return BCP dependency analysis data (cached for 5 min)."""
-    return _cached("bcp_analysis", lambda: _compute_bcp_analysis(db))
-
-
-def _compute_bcp_analysis(db: Database) -> dict:
-    """Compute BCP dependency analysis.
-
-    Returns dict with:
-    - bcps: all BCPs with citation counts and citing drafts
-    - co_citation: which BCPs tend to be co-cited
-    - by_category: BCP citation patterns by category
-    - coverage: what % of drafts cite at least one BCP
-    """
-    # Get all BCP references
-    bcp_rows = db.conn.execute(
-        "SELECT draft_name, ref_id FROM draft_refs WHERE ref_type = 'bcp'"
-    ).fetchall()
-
-    # Get draft titles and categories
-    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
-    draft_titles = {r["name"]: r["title"] for r in draft_rows}
-    total_drafts = len(draft_titles)
-
-    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
-    draft_cats: dict[str, str] = {}
-    for r in rating_rows:
-        try:
-            cats = json.loads(r["categories"]) if r["categories"] else []
-            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
-        except Exception:
-            draft_cats[r["draft_name"]] = "Other"
-
-    # BCP citation counts
-    bcp_citations: dict[str, list[str]] = defaultdict(list)
-    draft_bcps: dict[str, list[str]] = defaultdict(list)
-
-    for r in bcp_rows:
-        bcp_citations[r["ref_id"]].append(r["draft_name"])
-        draft_bcps[r["draft_name"]].append(r["ref_id"])
-
-    # All BCPs with counts
-    bcps = []
-    for bcp_id, citing_drafts in sorted(bcp_citations.items(),
-                                         key=lambda x: len(x[1]), reverse=True):
-        bcps.append({
-            "bcp_id": bcp_id,
-            "count": len(citing_drafts),
-            "drafts": citing_drafts[:10],
-            "total_drafts": len(citing_drafts),
-        })
-
-    # Co-citation matrix: which BCPs appear together in the same draft
-    bcp_ids = sorted(bcp_citations.keys())
-    co_citation = []
-    for i, bcp_a in enumerate(bcp_ids):
-        drafts_a = set(bcp_citations[bcp_a])
-        for j, bcp_b in enumerate(bcp_ids):
-            if j <= i:
-                continue
-            drafts_b = set(bcp_citations[bcp_b])
-            shared = len(drafts_a & drafts_b)
-            if shared > 0:
-                co_citation.append({
-                    "bcp_a": bcp_a,
-                    "bcp_b": bcp_b,
-                    "count": shared,
-                })
-
-    # Heatmap data: full matrix for all BCPs (top 20 by citation count)
-    top_bcp_ids = [b["bcp_id"] for b in bcps[:20]]
-    heatmap_matrix = []
-    for bcp_a in top_bcp_ids:
-        row = []
-        drafts_a = set(bcp_citations.get(bcp_a, []))
-        for bcp_b in top_bcp_ids:
-            drafts_b = set(bcp_citations.get(bcp_b, []))
-            shared = len(drafts_a & drafts_b)
-            row.append(shared)
-        heatmap_matrix.append(row)
-
-    # BCP citations by category
-    cat_bcp_count: dict[str, Counter] = defaultdict(Counter)
-    for draft_name, bcp_list in draft_bcps.items():
-        cat = draft_cats.get(draft_name, "Other")
-        for bcp_id in bcp_list:
-            cat_bcp_count[cat][bcp_id] += 1
-
-    by_category = []
-    for cat in sorted(cat_bcp_count.keys()):
-        top_bcps = cat_bcp_count[cat].most_common(5)
-        by_category.append({
-            "category": cat,
-            "total_bcp_refs": sum(cat_bcp_count[cat].values()),
-            "unique_bcps": len(cat_bcp_count[cat]),
-            "top_bcps": [{"bcp_id": bid, "count": c} for bid, c in top_bcps],
-        })
-    by_category.sort(key=lambda x: x["total_bcp_refs"], reverse=True)
-
-    # Coverage
-    drafts_with_bcp = len(draft_bcps)
-    coverage_pct = (drafts_with_bcp / total_drafts * 100) if total_drafts > 0 else 0
-
-    return {
-        "bcps": bcps,
-        "co_citation": co_citation,
-        "heatmap_labels": top_bcp_ids,
-        "heatmap_matrix": heatmap_matrix,
-        "by_category": by_category,
-        "coverage": {
-            "total_drafts": total_drafts,
-            "drafts_with_bcp": drafts_with_bcp,
-            "coverage_pct": round(coverage_pct, 1),
-            "unique_bcps": len(bcp_citations),
-            "total_bcp_refs": len(bcp_rows),
-        },
-    }
-
-
-def global_search(db: Database, query: str) -> SearchResults:
-    """Search across drafts (FTS5), ideas, authors, and gaps.
-
-    Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}.
-    """
-    results: dict = {"drafts": [], "ideas": [], "authors": [], "gaps": []}
-    if not query or not query.strip():
-        return results
-
-    q = query.strip()
-
-    # 1. Drafts via FTS5
-    try:
-        fts_query = re.sub(r'[^\w\s]', '', q)
-        fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE)
-        fts_query = re.sub(r'\s+', ' ', fts_query).strip()
-        if not fts_query:
-            raise ValueError("empty query after sanitization")
-        rows = db.conn.execute(
-            """SELECT d.name, d.title, d.abstract, d.time, d."group"
-            FROM drafts d
-            JOIN drafts_fts f ON d.rowid = f.rowid
-            WHERE drafts_fts MATCH ?
-            ORDER BY rank
-            LIMIT 50""",
-            (fts_query,),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-    except Exception:
-        # FTS5 match can fail on certain query syntax; fall back to LIKE
-        like = f"%{q}%"
-        rows = db.conn.execute(
-            """SELECT name, title, abstract, time, "group" FROM drafts
-            WHERE title LIKE ? OR name LIKE ? OR abstract LIKE ?
-            LIMIT 50""",
-            (like, like, like),
-        ).fetchall()
-        for r in rows:
-            results["drafts"].append({
-                "name": r["name"],
-                "title": r["title"],
-                "abstract": (r["abstract"] or "")[:200],
-                "date": r["time"],
-                "group": r["group"] or "individual",
-            })
-
-    # 2. Ideas via LIKE
-    like = f"%{q}%"
-    rows = db.conn.execute(
-        """SELECT id, title, description, idea_type, draft_name FROM ideas
-        WHERE title LIKE ? OR description LIKE ?
-        ORDER BY id LIMIT 50""",
-        (like, like),
-    ).fetchall()
-    for r in rows:
-        results["ideas"].append({
-            "id": r["id"],
-            "title": r["title"],
-            "description": (r["description"] or "")[:200],
-            "type": r["idea_type"],
-            "draft_name": r["draft_name"],
-        })
-
-    # 3. Authors via LIKE
-    results["authors"] = db.search_authors(q, limit=50)
-
-    # 4. Gaps via LIKE
-    results["gaps"] = db.search_gaps(q, limit=50)
-
-    return results
-
-
-def get_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE (cached for 5 min)."""
-    return _cached("landscape_tsne", lambda: _compute_landscape_tsne(db))
-
-
-def _compute_landscape_tsne(db: Database) -> list[dict]:
-    """Compute t-SNE from embeddings, return [{name, title, x, y, category, score}]."""
-
-
-    embeddings = db.all_embeddings()
-    if len(embeddings) < 5:
-        return []
-
-    pairs = db.drafts_with_ratings(limit=1000)
-    rating_map = {d.name: r for d, r in pairs}
-    draft_map = {d.name: d for d, _ in pairs}
-
-    # Filter to drafts that have both embeddings and ratings
-    names = [n for n in embeddings if n in rating_map]
-    if len(names) < 5:
-        return []
-
-    matrix = np.array([embeddings[n] for n in names])
-
-    try:
-        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
-                     random_state=42, max_iter=500)
-        coords = tsne.fit_transform(matrix)
-    except Exception:
-        return []
-
-    result = []
-    for i, name in enumerate(names):
-        r = rating_map[name]
-        d = draft_map.get(name)
-        result.append({
-            "name": name,
-            "title": d.title if d else name,
-            "x": round(float(coords[i, 0]), 3),
-            "y": round(float(coords[i, 1]), 3),
-            "category": r.categories[0] if r.categories else "Other",
-            "score": round(r.composite_score, 2),
-        })
-    return result
-
-
-def get_comparison_data(db: Database, names: list[str]) -> dict | None:
-    """Get comparison data for a list of drafts.
-
-    Returns {
-        drafts: [{name, title, abstract, rating, ideas, refs, ...}],
-        shared_ideas: [{title, drafts: [name,...]}],
-        unique_ideas: {name: [{title, description}]},
-        shared_refs: [{type, id, drafts: [name,...]}],
-        unique_refs: {name: [{type, id}]},
-        similarities: [{a, b, similarity}],
-        comparison_text: str | None,
-    }
-    """
-
-
-    drafts_data = []
-    all_ideas: dict[str, list[dict]] = {}
-    all_refs: dict[str, list[tuple[str, str]]] = {}
-
-    for name in names:
-        detail = get_draft_detail(db, name)
-        if not detail:
-            continue
-        drafts_data.append(detail)
-        all_ideas[name] = detail.get("ideas", [])
-        all_refs[name] = [(r["type"], r["id"]) for r in detail.get("refs", [])]
-
-    if len(drafts_data) < 2:
-        return None
-
-    # Find shared vs unique ideas (by title similarity)
-    idea_title_drafts: dict[str, list[str]] = {}
-    for name, ideas in all_ideas.items():
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if title_lower not in idea_title_drafts:
-                idea_title_drafts[title_lower] = []
-            idea_title_drafts[title_lower].append(name)
-
-    shared_ideas = [
-        {"title": title, "drafts": draft_list}
-        for title, draft_list in idea_title_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_ideas: dict[str, list[dict]] = {}
-    for name, ideas in all_ideas.items():
-        unique = []
-        for idea in ideas:
-            title_lower = idea["title"].lower().strip()
-            if len(set(idea_title_drafts.get(title_lower, []))) <= 1:
-                unique.append({"title": idea["title"], "description": idea.get("description", "")})
-        unique_ideas[name] = unique
-
-    # Find shared vs unique references
-    ref_drafts: dict[tuple[str, str], list[str]] = {}
-    for name, refs in all_refs.items():
-        for ref in refs:
-            if ref not in ref_drafts:
-                ref_drafts[ref] = []
-            ref_drafts[ref].append(name)
-
-    shared_refs = [
-        {"type": ref[0], "id": ref[1], "drafts": draft_list}
-        for ref, draft_list in ref_drafts.items()
-        if len(set(draft_list)) > 1
-    ]
-    unique_refs: dict[str, list[dict]] = {}
-    for name, refs in all_refs.items():
-        unique = []
-        for ref in refs:
-            if len(set(ref_drafts.get(ref, []))) <= 1:
-                unique.append({"type": ref[0], "id": ref[1]})
-        unique_refs[name] = unique
-
-    # Pairwise embedding similarities
-    embeddings = db.all_embeddings()
-    similarities = []
-    valid_names = [d["name"] for d in drafts_data]
-    for i in range(len(valid_names)):
-        for j in range(i + 1, len(valid_names)):
-            a, b = valid_names[i], valid_names[j]
-            if a in embeddings and b in embeddings:
-                vec_a = embeddings[a]
-                vec_b = embeddings[b]
-                dot = np.dot(vec_a, vec_b)
-                norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
-                sim = float(dot / norm) if norm > 0 else 0.0
-                similarities.append({"a": a, "b": b, "similarity": round(sim, 4)})
-
-    return {
-        "drafts": drafts_data,
-        "shared_ideas": shared_ideas,
-        "unique_ideas": unique_ideas,
-        "shared_refs": shared_refs,
-        "unique_refs": unique_refs,
-        "similarities": similarities,
-        "comparison_text": None,
-    }
-
-
-def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
-    """Search-only (free) — returns sources + cached answer if available."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.search_only(question, top_k=top_k)
-
-
-def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run Claude synthesis (costs tokens, result is cached permanently)."""
-    config = Config.load()
-    searcher = HybridSearch(config, db)
-    return searcher.ask(question, top_k=top_k, cheap=cheap)
-
-
-# --- Proposals ---
-
-def get_all_proposals(db: Database) -> list[dict]:
-    """Return all proposals with linked gap info."""
-    proposals = db.all_proposals()
-    gaps = {g["id"]: g for g in db.all_gaps()}
-    for p in proposals:
-        p["gaps"] = [gaps[gid] for gid in p.get("gap_ids", []) if gid in gaps]
-    return proposals
-
-
-def get_proposal_detail(db: Database, proposal_id: int) -> dict | None:
-    """Return a single proposal with full gap details."""
-    p = db.get_proposal(proposal_id)
-    if not p:
-        return None
-    gaps = {g["id"]: g for g in db.all_gaps()}
-    p["gaps"] = [gaps[gid] for gid in p.get("gap_ids", []) if gid in gaps]
-    return p
-
-
-def get_proposals_for_gap(db: Database, gap_id: int) -> list[dict]:
-    """Return proposals linked to a specific gap."""
-    return db.get_proposals_for_gap(gap_id)
diff --git a/src/webui/data/__init__.py b/src/webui/data/__init__.py
new file mode 100644
index 0000000..8aac5b3
--- /dev/null
+++ b/src/webui/data/__init__.py
@@ -0,0 +1,97 @@
+"""Data access layer for the web dashboard.
+
+Thin wrapper around ietf_analyzer.db.Database that returns plain dicts
+ready for JSON serialization or Jinja2 template rendering.
+
+All public functions are re-exported here for backward compatibility:
+    from webui.data import get_overview_stats
+"""
+from __future__ import annotations
+
+# Shared utilities
+from webui.data._shared import get_db, _cached, _extract_month  # noqa: F401
+
+# Drafts
+from webui.data.drafts import (  # noqa: F401
+    OverviewStats,
+    DraftListItem,
+    DraftsPage,
+    get_overview_stats,
+    get_category_counts,
+    get_category_summary,
+    get_drafts_page,
+    get_draft_detail,
+    get_generated_drafts,
+    read_generated_draft,
+)
+
+# Authors
+from webui.data.authors import (  # noqa: F401
+    AuthorInfo,
+    AuthorNetworkNode,
+    AuthorNetworkEdge,
+    AuthorCluster,
+    AuthorNetwork,
+    get_top_authors,
+    get_org_data,
+    get_coauthor_network,
+    get_cross_org_data,
+    get_author_network_full,
+)
+
+# Ratings
+from webui.data.ratings import (  # noqa: F401
+    get_rating_distributions,
+    get_category_radar_data,
+    get_score_histogram,
+    get_false_positive_profile,
+)
+
+# Gaps
+from webui.data.gaps import (  # noqa: F401
+    get_all_gaps,
+    get_gap_detail,
+)
+
+# Analysis & Visualization
+from webui.data.analysis import (  # noqa: F401
+    TimelineData,
+    SimilarityGraphStats,
+    SimilarityGraph,
+    CitationGraphStats,
+    CitationGraph,
+    MonitorCost,
+    MonitorPipeline,
+    MonitorStatus,
+    get_ideas_by_type,
+    get_timeline_data,
+    get_similarity_graph,
+    get_idea_clusters,
+    get_timeline_animation_data,
+    get_monitor_status,
+    get_citation_graph,
+    get_landscape_tsne,
+    get_comparison_data,
+    get_architecture,
+    get_idea_analysis,
+    get_trends_data,
+    get_complexity_data,
+    get_source_comparison,
+    get_citation_influence,
+    get_bcp_analysis,
+)
+
+# Search
+from webui.data.search import (  # noqa: F401
+    SearchResults,
+    global_search,
+    get_ask_search,
+    get_ask_synthesize,
+)
+
+# Proposals
+from webui.data.proposals import (  # noqa: F401
+    get_all_proposals,
+    get_proposal_detail,
+    get_proposals_for_gap,
+)
diff --git a/src/webui/data/_shared.py b/src/webui/data/_shared.py
new file mode 100644
index 0000000..9ab8e57
--- /dev/null
+++ b/src/webui/data/_shared.py
@@ -0,0 +1,46 @@
+"""Shared utilities for webui data modules."""
+from __future__ import annotations
+
+import sys
+import time
+from pathlib import Path
+
+# Ensure project src is on path
+_project_root = Path(__file__).resolve().parent.parent.parent.parent
+if str(_project_root) not in sys.path:
+    sys.path.insert(0, str(_project_root / "src"))
+
+from ietf_analyzer.config import Config
+from ietf_analyzer.db import Database
+from ietf_analyzer.readiness import compute_readiness, compute_readiness_batch
+
+# Simple TTL cache for expensive computations (t-SNE, clustering, similarity)
+_cache: dict[str, tuple[float, object]] = {}
+_CACHE_TTL = 300  # 5 minutes
+
+
+def _extract_month(time_str: str | None) -> str:
+    """Normalize a date string to YYYY-MM format."""
+    if not time_str:
+        return "unknown"
+    if len(time_str) >= 7 and time_str[4] == '-':
+        return time_str[:7]  # Already YYYY-MM-DD
+    if len(time_str) >= 6 and time_str[:4].isdigit():
+        return time_str[:4] + '-' + time_str[4:6]  # YYYYMMDD → YYYY-MM
+    return time_str[:7]
+
+def _cached(key: str, fn, ttl: float = _CACHE_TTL):
+    """Return cached result or compute and cache it."""
+    now = time.monotonic()
+    if key in _cache:
+        ts, val = _cache[key]
+        if now - ts < ttl:
+            return val
+    val = fn()
+    _cache[key] = (now, val)
+    return val
+
+def get_db() -> Database:
+    """Get a Database instance using default config."""
+    config = Config.load()
+    return Database(config)
diff --git a/src/webui/data/analysis.py b/src/webui/data/analysis.py
new file mode 100644
index 0000000..6c962b2
--- /dev/null
+++ b/src/webui/data/analysis.py
@@ -0,0 +1,1968 @@
+"""Analysis, visualization, and complex computation data access functions."""
+from __future__ import annotations
+
+import json
+import re
+from collections import Counter, defaultdict
+from typing import TypedDict
+
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.manifold import TSNE
+from sklearn.preprocessing import normalize as sk_normalize
+
+from ietf_analyzer.config import Config
+from ietf_analyzer.db import Database
+from webui.data._shared import _cached, _extract_month
+from webui.data.drafts import get_draft_detail
+
+_ARCH_LAYERS = [
+    {"id": "transport", "label": "Transport & Networking", "order": 0,
+     "keywords": {"transport", "network", "routing", "tunnel", "packet", "flow", "traffic", "qos", "sdwan", "mpls", "bgp", "ospf", "segment", "srv6", "quic", "http", "grpc", "mqtt", "yang", "snmp", "netconf", "restconf"}},
+    {"id": "identity", "label": "Identity & Trust", "order": 1,
+     "keywords": {"identity", "auth", "authentication", "authorization", "credential", "certificate", "trust", "attestation", "oauth", "token", "signing", "verification", "verifiable", "did", "vc", "pki", "spiffe", "acl"}},
+    {"id": "discovery", "label": "Discovery & Registration", "order": 2,
+     "keywords": {"discovery", "registration", "registry", "catalog", "advertisement", "announce", "capability", "service", "lookup", "resolution", "dns", "directory"}},
+    {"id": "communication", "label": "Agent Communication", "order": 3,
+     "keywords": {"a2a", "agent", "communication", "message", "messaging", "protocol", "exchange", "negotiation", "handshake", "session", "dialogue", "interaction", "mcp", "interop"}},
+    {"id": "coordination", "label": "Task & Coordination", "order": 4,
+     "keywords": {"task", "delegation", "orchestration", "workflow", "planning", "coordination", "consensus", "collaboration", "multi-agent", "swarm", "composition", "scheduling"}},
+    {"id": "intelligence", "label": "AI & Inference", "order": 5,
+     "keywords": {"model", "inference", "learning", "training", "ml", "neural", "llm", "embedding", "reasoning", "decision", "prediction", "classification", "generative", "rag", "fine-tuning"}},
+    {"id": "safety", "label": "Safety & Governance", "order": 6,
+     "keywords": {"safety", "ethical", "governance", "policy", "audit", "explainability", "transparency", "accountability", "bias", "fairness", "compliance", "regulation", "risk", "shutdown", "alignment", "adversarial", "privacy", "consent"}},
+    {"id": "application", "label": "Application Domains", "order": 7,
+     "keywords": {"healthcare", "autonomous", "vehicle", "robotics", "iot", "digital twin", "supply chain", "finance", "manufacturing", "energy", "smart", "edge", "cloud", "sensing"}},
+]
+
+_LAYER_KEYWORDS = {l["id"]: l["keywords"] for l in _ARCH_LAYERS}
+
+
+class TimelineData(TypedDict):
+    """Monthly category counts from :func:`get_timeline_data`."""
+    months: list[str]
+    series: dict[str, list[int]]
+    categories: list[str]
+
+class SimilarityGraphStats(TypedDict):
+    """Stats sub-dict in similarity graph."""
+    node_count: int
+    edge_count: int
+    avg_similarity: float
+
+class SimilarityGraph(TypedDict):
+    """Draft similarity network from :func:`get_similarity_graph`."""
+    nodes: list[dict]
+    edges: list[dict]
+    stats: SimilarityGraphStats
+
+class CitationGraphStats(TypedDict):
+    """Stats sub-dict in citation graph."""
+    node_count: int
+    edge_count: int
+    rfc_count: int
+    draft_count: int
+
+class CitationGraph(TypedDict):
+    """Citation network from :func:`get_citation_graph`."""
+    nodes: list[dict]
+    edges: list[dict]
+    stats: CitationGraphStats
+
+class MonitorCost(TypedDict):
+    """Cost sub-dict in monitor status."""
+    input_tokens: int
+    output_tokens: int
+    estimated_usd: float
+
+class MonitorPipeline(TypedDict):
+    """Pipeline sub-dict in monitor status."""
+    total_drafts: int
+    rated: int
+    embedded: int
+    with_ideas: int
+    idea_total: int
+    gap_count: int
+
+class MonitorStatus(TypedDict):
+    """Monitor status from :func:`get_monitor_status`."""
+    last_run: dict | None
+    runs: list[dict]
+    unprocessed: dict[str, int]
+    total_runs: int
+    pipeline: MonitorPipeline
+    cost: MonitorCost
+
+def get_ideas_by_type(db: Database) -> dict:
+    """Return ideas grouped by type with counts."""
+    all_ideas = db.all_ideas()
+    type_counts = Counter(i.get("type", "other") or "other" for i in all_ideas)
+    return {
+        "total": len(all_ideas),
+        "by_type": dict(type_counts.most_common()),
+        "ideas": all_ideas,
+    }
+
+def get_timeline_data(db: Database) -> TimelineData:
+    """Return monthly counts by category for timeline chart."""
+    pairs = db.drafts_with_ratings(limit=1000)
+    all_drafts = db.list_drafts(limit=1000, order_by="time ASC")
+    rating_map = {d.name: r for d, r in pairs}
+
+    month_cat: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+    for d in all_drafts:
+        month = _extract_month(d.time)
+        r = rating_map.get(d.name)
+        if r:
+            cat = r.categories[0] if r.categories else "Other"
+            month_cat[month][cat] += 1
+
+    months = sorted(month_cat.keys())
+    cat_totals: Counter = Counter()
+    for mc in month_cat.values():
+        for c, cnt in mc.items():
+            cat_totals[c] += cnt
+    top_cats = [c for c, _ in cat_totals.most_common(10)]
+
+    series = {}
+    for cat in top_cats:
+        series[cat] = [month_cat[m].get(cat, 0) for m in months]
+
+    return {"months": months, "series": series, "categories": top_cats}
+
+def get_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
+    """Return draft similarity network (cached)."""
+    return _cached(f"similarity_{threshold}", lambda: _compute_similarity_graph(db, threshold))
+
+def _compute_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
+    """Return draft similarity network for force-directed graph.
+
+    Returns {nodes: [{name, title, category, score}],
+             edges: [{source, target, similarity}],
+             stats: {node_count, edge_count, avg_similarity}}
+    """
+
+
+    embeddings = db.all_embeddings()
+    if len(embeddings) < 2:
+        return {"nodes": [], "edges": [], "stats": {"node_count": 0, "edge_count": 0, "avg_similarity": 0}}
+
+    pairs = db.drafts_with_ratings(limit=1000)
+    rating_map = {d.name: r for d, r in pairs}
+    draft_map = {d.name: d for d, _ in pairs}
+
+    # Filter to drafts with both embeddings and ratings
+    names = [n for n in embeddings if n in rating_map]
+    if len(names) < 2:
+        return {"nodes": [], "edges": [], "stats": {"node_count": 0, "edge_count": 0, "avg_similarity": 0}}
+
+    matrix = np.array([embeddings[n] for n in names])
+
+    # L2-normalize and compute cosine similarity
+    norms = np.linalg.norm(matrix, axis=1, keepdims=True)
+    norms[norms == 0] = 1.0
+    normalized = matrix / norms
+    sim_matrix = normalized @ normalized.T
+
+    # Find pairs above threshold (upper triangle only)
+    edges = []
+    node_set = set()
+    for i in range(len(names)):
+        for j in range(i + 1, len(names)):
+            sim = float(sim_matrix[i, j])
+            if sim >= threshold:
+                edges.append({"source": names[i], "target": names[j], "similarity": round(sim, 4)})
+                node_set.add(names[i])
+                node_set.add(names[j])
+
+    # Build nodes from connected drafts only
+    nodes = []
+    for name in names:
+        if name not in node_set:
+            continue
+        r = rating_map[name]
+        d = draft_map.get(name)
+        nodes.append({
+            "name": name,
+            "title": d.title if d else name,
+            "category": r.categories[0] if r.categories else "Other",
+            "score": round(r.composite_score, 2),
+        })
+
+    avg_sim = round(sum(e["similarity"] for e in edges) / max(len(edges), 1), 4)
+
+    return {
+        "nodes": nodes,
+        "edges": edges,
+        "stats": {"node_count": len(nodes), "edge_count": len(edges), "avg_similarity": avg_sim},
+    }
+
+def get_idea_clusters(db: Database) -> dict:
+    """Cluster ideas (cached for 5 min)."""
+    return _cached("idea_clusters", lambda: _compute_idea_clusters(db))
+
+def _compute_idea_clusters(db: Database) -> dict:
+    """Cluster ideas by embedding similarity, return clusters + t-SNE scatter.
+
+    Uses Ward linkage on L2-normalized embeddings (approximates cosine) with
+    a target of ~30 clusters for readable groupings.  Enriches each cluster
+    with WG info and category breakdown.
+    """
+
+
+    embeddings = db.all_idea_embeddings()
+    if not embeddings:
+        return {"clusters": [], "scatter": [], "stats": {"total": 0, "clustered": 0, "num_clusters": 0}, "empty": True}
+
+    # Exclude ideas from false-positive drafts
+    fp_names = db.false_positive_names()
+
+    # Fetch ideas with IDs for metadata lookup
+    rows = db.conn.execute("SELECT id, title, description, idea_type, draft_name FROM ideas").fetchall()
+    idea_map = {r["id"]: {"title": r["title"], "description": r["description"],
+                           "type": r["idea_type"], "draft_name": r["draft_name"]}
+                for r in rows if r["draft_name"] not in fp_names}
+
+    # Remove FP ideas from embeddings too
+    embeddings = {k: v for k, v in embeddings.items() if k in idea_map}
+
+    # Draft -> WG and category lookup
+    draft_rows = db.conn.execute('SELECT name, "group", title FROM drafts').fetchall()
+    draft_wg = {r["name"]: r["group"] or "none" for r in draft_rows}
+    draft_title_map = {r["name"]: r["title"] for r in draft_rows}
+    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings WHERE COALESCE(false_positive, 0) = 0").fetchall()
+    draft_cats: dict[str, list[str]] = {}
+    for r in rating_rows:
+        try:
+            draft_cats[r["draft_name"]] = json.loads(r["categories"]) if r["categories"] else []
+        except (json.JSONDecodeError, TypeError):
+            draft_cats[r["draft_name"]] = []
+
+    # Build matrix from embeddings that have matching ideas
+    idea_ids = [iid for iid in embeddings if iid in idea_map]
+    if len(idea_ids) < 5:
+        return {"clusters": [], "scatter": [], "stats": {"total": len(idea_ids), "clustered": 0, "num_clusters": 0}, "empty": True}
+
+    matrix = np.array([embeddings[iid] for iid in idea_ids])
+    matrix_norm = sk_normalize(matrix)
+
+    # Ward clustering on normalized vectors — target ~30 clusters scaled by dataset size
+    n_target = max(10, min(40, len(idea_ids) // 12))
+    try:
+        clustering = AgglomerativeClustering(n_clusters=n_target, linkage='ward')
+        labels = clustering.fit_predict(matrix_norm)
+    except Exception:
+        return {"clusters": [], "scatter": [], "stats": {"total": len(idea_ids), "clustered": 0, "num_clusters": 0}, "empty": True}
+
+    # Build cluster data
+    cluster_ideas_map: dict[int, list] = defaultdict(list)
+    for idx, iid in enumerate(idea_ids):
+        cluster_ideas_map[labels[idx]].append(iid)
+
+    stop = {"a", "an", "the", "of", "for", "in", "to", "and", "or", "with",
+            "on", "by", "is", "as", "at", "from", "that", "this", "it",
+            "based", "using", "protocol", "mechanism", "framework", "system",
+            "network", "agent", "agents"}
+    clusters = []
+    for cid in sorted(cluster_ideas_map.keys()):
+        members = cluster_ideas_map[cid]
+        ideas_in_cluster = [idea_map[iid] for iid in members if iid in idea_map]
+        if len(ideas_in_cluster) < 2:
+            continue
+
+        # Theme: most common significant words in titles
+        words = Counter()
+        for idea in ideas_in_cluster:
+            for w in idea["title"].lower().split():
+                w_clean = w.strip("()[].,;:-\"'")
+                if len(w_clean) > 2 and w_clean not in stop:
+                    words[w_clean] += 1
+        top_words = [w for w, _ in words.most_common(4)]
+        theme = " ".join(top_words).title() if top_words else f"Cluster {cid}"
+
+        drafts = list({idea["draft_name"] for idea in ideas_in_cluster})
+
+        # Enrich: WG breakdown
+        wg_counts: dict[str, int] = Counter()
+        cat_counts: dict[str, int] = Counter()
+        for dname in drafts:
+            wg = draft_wg.get(dname, "none")
+            wg_counts[wg] += 1
+            for cat in draft_cats.get(dname, []):
+                cat_counts[cat] += 1
+
+        wg_list = [{"wg": wg, "count": cnt} for wg, cnt in wg_counts.most_common(5)]
+        cat_list = [{"cat": cat, "count": cnt} for cat, cnt in cat_counts.most_common(3)]
+        cross_wg = len([w for w in wg_counts if w != "none"]) >= 2
+
+        clusters.append({
+            "id": len(clusters),
+            "theme": theme,
+            "size": len(ideas_in_cluster),
+            "ideas": ideas_in_cluster[:20],
+            "drafts": drafts,
+            "wgs": wg_list,
+            "categories": cat_list,
+            "cross_wg": cross_wg,
+            "wg_count": len(wg_counts),
+        })
+
+    clusters.sort(key=lambda c: c["size"], reverse=True)
+
+    # Build mapping: original cluster label -> sorted index
+    # Each cluster remembers which original label it came from via its member ids
+    old_label_to_new: dict[int, int] = {}
+    for new_idx, c in enumerate(clusters):
+        c["id"] = new_idx
+        # Find original label for any member of this cluster
+        for old_cid, members in cluster_ideas_map.items():
+            if members and members[0] in [iid for iid in members if iid in idea_map]:
+                member_titles = {idea_map[m]["title"] for m in members if m in idea_map}
+                c_titles = {idea["title"] for idea in c["ideas"]}
+                if member_titles == c_titles or (member_titles & c_titles and len(members) == c["size"]):
+                    old_label_to_new[old_cid] = new_idx
+                    break
+
+    # Fallback: build from idea_id -> label mapping
+    iid_to_new: dict[int, int] = {}
+    for old_cid, members in cluster_ideas_map.items():
+        new_idx = old_label_to_new.get(old_cid, old_cid)
+        for iid in members:
+            iid_to_new[iid] = new_idx
+
+    # t-SNE for scatter
+    scatter = []
+    try:
+        perp = min(30, len(idea_ids) - 1)
+        tsne = TSNE(n_components=2, perplexity=perp, random_state=42, max_iter=500)
+        coords = tsne.fit_transform(matrix_norm)
+
+        for idx, iid in enumerate(idea_ids):
+            info = idea_map.get(iid, {})
+            scatter.append({
+                "x": round(float(coords[idx, 0]), 3),
+                "y": round(float(coords[idx, 1]), 3),
+                "cluster_id": iid_to_new.get(iid, int(labels[idx])),
+                "title": info.get("title", ""),
+                "draft_name": info.get("draft_name", ""),
+                "wg": draft_wg.get(info.get("draft_name", ""), ""),
+            })
+    except Exception:
+        pass
+
+    # --- Cross-cluster links ---
+    # Find pairs of clusters whose ideas are semantically related
+    # Use centroid similarity + best idea-pair links
+    links = []
+    if len(clusters) >= 2:
+        # Build cluster centroids from normalized embeddings
+        cluster_centroids = {}
+        cluster_member_indices: dict[int, list[int]] = defaultdict(list)
+        for idx, iid in enumerate(idea_ids):
+            cid = iid_to_new.get(iid, int(labels[idx]))
+            cluster_member_indices[cid].append(idx)
+
+        for cid, indices in cluster_member_indices.items():
+            if indices:
+                centroid = matrix_norm[indices].mean(axis=0)
+                norm = np.linalg.norm(centroid)
+                if norm > 0:
+                    cluster_centroids[cid] = centroid / norm
+
+        # Compute pairwise centroid similarity for all cluster pairs
+        cids_sorted = sorted(cluster_centroids.keys())
+        for ci_idx, ci in enumerate(cids_sorted):
+            for cj in cids_sorted[ci_idx + 1:]:
+                sim = float(np.dot(cluster_centroids[ci], cluster_centroids[cj]))
+                if sim < 0.45:
+                    continue
+
+                # Find the best idea pair across these two clusters
+                best_sim = 0.0
+                best_pair = (None, None)
+                # Sample up to 20 ideas per cluster to keep it fast
+                ci_members = cluster_member_indices[ci][:20]
+                cj_members = cluster_member_indices[cj][:20]
+                for mi in ci_members:
+                    for mj in cj_members:
+                        pair_sim = float(np.dot(matrix_norm[mi], matrix_norm[mj]))
+                        if pair_sim > best_sim:
+                            best_sim = pair_sim
+                            best_pair = (idea_ids[mi], idea_ids[mj])
+
+                if best_sim < 0.5:
+                    continue
+
+                # Get theme names
+                ci_theme = next((c["theme"] for c in clusters if c["id"] == ci), f"Cluster {ci}")
+                cj_theme = next((c["theme"] for c in clusters if c["id"] == cj), f"Cluster {cj}")
+
+                idea_a = idea_map.get(best_pair[0], {})
+                idea_b = idea_map.get(best_pair[1], {})
+
+                links.append({
+                    "source": ci,
+                    "target": cj,
+                    "source_theme": ci_theme,
+                    "target_theme": cj_theme,
+                    "similarity": round(sim, 3),
+                    "best_pair_sim": round(best_sim, 3),
+                    "idea_a": idea_a.get("title", ""),
+                    "idea_a_draft": idea_a.get("draft_name", ""),
+                    "idea_b": idea_b.get("title", ""),
+                    "idea_b_draft": idea_b.get("draft_name", ""),
+                })
+
+        links.sort(key=lambda l: l["best_pair_sim"], reverse=True)
+        links = links[:50]  # cap at top 50 links
+
+    total = len(idea_ids)
+    clustered = sum(c["size"] for c in clusters)
+    return {
+        "clusters": clusters,
+        "scatter": scatter,
+        "links": links,
+        "stats": {"total": total, "clustered": clustered, "num_clusters": len(clusters)},
+        "empty": False,
+    }
+
+def get_timeline_animation_data(db: Database) -> dict:
+    """Timeline animation (cached for 5 min)."""
+    return _cached("timeline_animation", lambda: _compute_timeline_animation_data(db))
+
+def _compute_timeline_animation_data(db: Database) -> dict:
+    """Compute t-SNE on all drafts, return points with month info + category_monthly.
+
+    t-SNE is computed once on ALL drafts so coordinates are stable across
+    animation frames.  Each point carries a ``month`` field (YYYY-MM) so the
+    front-end can build cumulative animation frames.
+    """
+
+
+    embeddings = db.all_embeddings()
+    if len(embeddings) < 5:
+        return {"points": [], "months": [], "category_monthly": {}}
+
+    pairs = db.drafts_with_ratings(limit=1000)
+    rating_map = {d.name: r for d, r in pairs}
+    draft_map = {d.name: d for d, _ in pairs}
+
+    # Filter to drafts that have both embeddings and ratings
+    names = [n for n in embeddings if n in rating_map]
+    if len(names) < 5:
+        return {"points": [], "months": [], "category_monthly": {}}
+
+    matrix = np.array([embeddings[n] for n in names])
+
+    try:
+        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
+                     random_state=42, max_iter=500)
+        coords = tsne.fit_transform(matrix)
+    except Exception:
+        return {"points": [], "months": [], "category_monthly": {}}
+
+    # Build points with month
+    points = []
+    month_set: set[str] = set()
+    category_monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+
+    for i, name in enumerate(names):
+        r = rating_map[name]
+        d = draft_map.get(name)
+        month = _extract_month(d.time if d else None)
+        cat = r.categories[0] if r.categories else "Other"
+        month_set.add(month)
+        category_monthly[month][cat] += 1
+        points.append({
+            "name": name,
+            "title": d.title if d else name,
+            "x": round(float(coords[i, 0]), 3),
+            "y": round(float(coords[i, 1]), 3),
+            "category": cat,
+            "score": round(r.composite_score, 2),
+            "month": month,
+        })
+
+    months = sorted(month_set)
+    # Convert defaultdict to plain dict for JSON
+    cat_monthly_plain = {m: dict(cats) for m, cats in category_monthly.items()}
+
+    return {
+        "points": points,
+        "months": months,
+        "category_monthly": cat_monthly_plain,
+    }
+
+def get_monitor_status(db: Database) -> MonitorStatus:
+    """Return monitoring status data for dashboard."""
+    runs = db.get_monitor_runs(limit=20)
+    last = runs[0] if runs else None
+    total_drafts = db.count_drafts()
+    rated_count = len(db.drafts_with_ratings(limit=10000))
+    unrated = len(db.unrated_drafts(limit=9999))
+    unembedded = len(db.drafts_without_embeddings(limit=9999))
+    embedded_count = total_drafts - unembedded
+    no_ideas = len(db.drafts_without_ideas(limit=9999))
+    ideas_count = total_drafts - no_ideas
+    idea_total = db.idea_count()
+    gap_count = len(db.all_gaps())
+    input_tok, output_tok = db.total_tokens_used()
+
+    # Estimate cost (Sonnet pricing: $3/M input, $15/M output)
+    est_cost = (input_tok * 3.0 / 1_000_000) + (output_tok * 15.0 / 1_000_000)
+
+    return {
+        "last_run": last,
+        "runs": runs,
+        "unprocessed": {"unrated": unrated, "unembedded": unembedded, "no_ideas": no_ideas},
+        "total_runs": len(runs),
+        "pipeline": {
+            "total_drafts": total_drafts,
+            "rated": rated_count,
+            "embedded": embedded_count,
+            "with_ideas": ideas_count,
+            "idea_total": idea_total,
+            "gap_count": gap_count,
+        },
+        "cost": {
+            "input_tokens": input_tok,
+            "output_tokens": output_tok,
+            "estimated_usd": round(est_cost, 2),
+        },
+    }
+
+def get_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
+    """Return citation graph (cached for 5 min)."""
+    return _cached(f"citation_graph_{min_refs}", lambda: _compute_citation_graph(db, min_refs))
+
+def _compute_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
+    """Return citation network data for force-directed graph.
+
+    Returns {nodes: [{id, type, title, influence, ...}],
+             edges: [{source, target}],
+             stats: {node_count, edge_count, ...}}
+    """
+    # Get all references
+    rows = db.conn.execute(
+        "SELECT draft_name, ref_type, ref_id FROM draft_refs"
+    ).fetchall()
+
+    # Count in-degree for each referenced item
+    in_degree: dict[str, int] = Counter()
+    edges_raw = []
+    for r in rows:
+        ref_key = f"{r['ref_type']}:{r['ref_id']}"
+        in_degree[ref_key] += 1
+        edges_raw.append((r["draft_name"], ref_key))
+
+    # Also count drafts as source nodes
+    draft_out: dict[str, int] = Counter()
+    for draft_name, _ in edges_raw:
+        draft_out[draft_name] += 1
+
+    # Get draft titles for labeling
+    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
+    draft_titles = {r["name"]: r["title"] for r in draft_rows}
+
+    # Get rating categories for draft coloring
+    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
+    draft_cats = {}
+    for r in rating_rows:
+        try:
+            cats = json.loads(r["categories"]) if r["categories"] else []
+            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
+        except Exception:
+            draft_cats[r["draft_name"]] = "Other"
+
+    # Filter: keep RFCs with min_refs+ references and all drafts that reference them
+    top_refs = {k: v for k, v in in_degree.items() if v >= min_refs}
+
+    # Build node set
+    node_set = set()
+    filtered_edges = []
+    for draft_name, ref_key in edges_raw:
+        if ref_key in top_refs:
+            node_set.add(draft_name)
+            node_set.add(ref_key)
+            filtered_edges.append({"source": draft_name, "target": ref_key})
+
+    # Limit to ~200 nodes max for readability
+    if len(node_set) > 250:
+        # Keep only refs with higher in-degree
+        sorted_refs = sorted(top_refs.items(), key=lambda x: x[1], reverse=True)
+        keep_refs = set(k for k, _ in sorted_refs[:80])
+        node_set = set()
+        filtered_edges = []
+        for draft_name, ref_key in edges_raw:
+            if ref_key in keep_refs:
+                node_set.add(draft_name)
+                node_set.add(ref_key)
+                filtered_edges.append({"source": draft_name, "target": ref_key})
+
+    # Build nodes
+    nodes = []
+    for nid in node_set:
+        if ":" in nid and not nid.startswith("draft-"):
+            # It's a reference node (rfc:1234, bcp:14, etc.)
+            ref_type, ref_id = nid.split(":", 1)
+            influence = in_degree.get(nid, 0)
+            if ref_type == "rfc":
+                try:
+                    title = f"RFC {int(ref_id)}"
+                except ValueError:
+                    title = f"RFC {ref_id}"
+            else:
+                title = f"{ref_type.upper()} {ref_id}"
+            nodes.append({
+                "id": nid,
+                "type": ref_type,
+                "title": title,
+                "influence": influence,
+                "ref_id": ref_id,
+            })
+        else:
+            # It's a draft node
+            influence = in_degree.get(nid, 0) + draft_out.get(nid, 0)
+            nodes.append({
+                "id": nid,
+                "type": "draft",
+                "title": draft_titles.get(nid, nid),
+                "influence": draft_out.get(nid, 0),
+                "category": draft_cats.get(nid, "Other"),
+            })
+
+    # Stats
+    rfc_count = sum(1 for n in nodes if n["type"] == "rfc")
+    draft_count = sum(1 for n in nodes if n["type"] == "draft")
+
+    return {
+        "nodes": nodes,
+        "edges": filtered_edges,
+        "stats": {
+            "node_count": len(nodes),
+            "edge_count": len(filtered_edges),
+            "rfc_count": rfc_count,
+            "draft_count": draft_count,
+        },
+    }
+
+def get_landscape_tsne(db: Database) -> list[dict]:
+    """Compute t-SNE (cached for 5 min)."""
+    return _cached("landscape_tsne", lambda: _compute_landscape_tsne(db))
+
+def _compute_landscape_tsne(db: Database) -> list[dict]:
+    """Compute t-SNE from embeddings, return [{name, title, x, y, category, score}]."""
+
+
+    embeddings = db.all_embeddings()
+    if len(embeddings) < 5:
+        return []
+
+    pairs = db.drafts_with_ratings(limit=1000)
+    rating_map = {d.name: r for d, r in pairs}
+    draft_map = {d.name: d for d, _ in pairs}
+
+    # Filter to drafts that have both embeddings and ratings
+    names = [n for n in embeddings if n in rating_map]
+    if len(names) < 5:
+        return []
+
+    matrix = np.array([embeddings[n] for n in names])
+
+    try:
+        tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
+                     random_state=42, max_iter=500)
+        coords = tsne.fit_transform(matrix)
+    except Exception:
+        return []
+
+    result = []
+    for i, name in enumerate(names):
+        r = rating_map[name]
+        d = draft_map.get(name)
+        result.append({
+            "name": name,
+            "title": d.title if d else name,
+            "x": round(float(coords[i, 0]), 3),
+            "y": round(float(coords[i, 1]), 3),
+            "category": r.categories[0] if r.categories else "Other",
+            "score": round(r.composite_score, 2),
+        })
+    return result
+
+def get_comparison_data(db: Database, names: list[str]) -> dict | None:
+    """Get comparison data for a list of drafts.
+
+    Returns {
+        drafts: [{name, title, abstract, rating, ideas, refs, ...}],
+        shared_ideas: [{title, drafts: [name,...]}],
+        unique_ideas: {name: [{title, description}]},
+        shared_refs: [{type, id, drafts: [name,...]}],
+        unique_refs: {name: [{type, id}]},
+        similarities: [{a, b, similarity}],
+        comparison_text: str | None,
+    }
+    """
+
+
+    drafts_data = []
+    all_ideas: dict[str, list[dict]] = {}
+    all_refs: dict[str, list[tuple[str, str]]] = {}
+
+    for name in names:
+        detail = get_draft_detail(db, name)
+        if not detail:
+            continue
+        drafts_data.append(detail)
+        all_ideas[name] = detail.get("ideas", [])
+        all_refs[name] = [(r["type"], r["id"]) for r in detail.get("refs", [])]
+
+    if len(drafts_data) < 2:
+        return None
+
+    # Find shared vs unique ideas (by title similarity)
+    idea_title_drafts: dict[str, list[str]] = {}
+    for name, ideas in all_ideas.items():
+        for idea in ideas:
+            title_lower = idea["title"].lower().strip()
+            if title_lower not in idea_title_drafts:
+                idea_title_drafts[title_lower] = []
+            idea_title_drafts[title_lower].append(name)
+
+    shared_ideas = [
+        {"title": title, "drafts": draft_list}
+        for title, draft_list in idea_title_drafts.items()
+        if len(set(draft_list)) > 1
+    ]
+    unique_ideas: dict[str, list[dict]] = {}
+    for name, ideas in all_ideas.items():
+        unique = []
+        for idea in ideas:
+            title_lower = idea["title"].lower().strip()
+            if len(set(idea_title_drafts.get(title_lower, []))) <= 1:
+                unique.append({"title": idea["title"], "description": idea.get("description", "")})
+        unique_ideas[name] = unique
+
+    # Find shared vs unique references
+    ref_drafts: dict[tuple[str, str], list[str]] = {}
+    for name, refs in all_refs.items():
+        for ref in refs:
+            if ref not in ref_drafts:
+                ref_drafts[ref] = []
+            ref_drafts[ref].append(name)
+
+    shared_refs = [
+        {"type": ref[0], "id": ref[1], "drafts": draft_list}
+        for ref, draft_list in ref_drafts.items()
+        if len(set(draft_list)) > 1
+    ]
+    unique_refs: dict[str, list[dict]] = {}
+    for name, refs in all_refs.items():
+        unique = []
+        for ref in refs:
+            if len(set(ref_drafts.get(ref, []))) <= 1:
+                unique.append({"type": ref[0], "id": ref[1]})
+        unique_refs[name] = unique
+
+    # Pairwise embedding similarities
+    embeddings = db.all_embeddings()
+    similarities = []
+    valid_names = [d["name"] for d in drafts_data]
+    for i in range(len(valid_names)):
+        for j in range(i + 1, len(valid_names)):
+            a, b = valid_names[i], valid_names[j]
+            if a in embeddings and b in embeddings:
+                vec_a = embeddings[a]
+                vec_b = embeddings[b]
+                dot = np.dot(vec_a, vec_b)
+                norm = np.linalg.norm(vec_a) * np.linalg.norm(vec_b)
+                sim = float(dot / norm) if norm > 0 else 0.0
+                similarities.append({"a": a, "b": b, "similarity": round(sim, 4)})
+
+    return {
+        "drafts": drafts_data,
+        "shared_ideas": shared_ideas,
+        "unique_ideas": unique_ideas,
+        "shared_refs": shared_refs,
+        "unique_refs": unique_refs,
+        "similarities": similarities,
+        "comparison_text": None,
+    }
+
+def _classify_to_layer(text: str) -> str:
+    """Classify a piece of text to the best-matching architectural layer."""
+    text_lower = text.lower()
+    words = set(re.findall(r"[a-z][a-z0-9-]+", text_lower))
+    scores: dict[str, int] = {}
+    for layer_id, kws in _LAYER_KEYWORDS.items():
+        scores[layer_id] = len(words & kws)
+        # Also check for multi-word keywords as substrings
+        for kw in kws:
+            if len(kw) > 4 and kw in text_lower:
+                scores[layer_id] += 1
+    best = max(scores, key=lambda k: scores[k])
+    return best if scores[best] > 0 else "communication"  # default
+
+def get_architecture(db: Database) -> dict:
+    """Build system-of-systems architecture from idea clusters, gaps, and source coverage."""
+    return _cached("architecture", lambda: _compute_architecture(db), ttl=600)
+
+def _compute_architecture(db: Database) -> dict:
+    """Compute the architecture view.
+
+    Returns:
+        {
+            "components": [...],  # architectural building blocks
+            "dependencies": [...],  # edges between components
+            "gaps": [...],  # gaps mapped to layers
+            "layers": [...],  # layer definitions
+            "source_coverage": {...},  # per-layer source coverage
+            "stats": {...}
+        }
+    """
+    # --- Gather raw data ---
+    cluster_data = get_idea_clusters(db)
+    clusters = cluster_data.get("clusters", [])
+    links = cluster_data.get("links", [])
+    all_gaps = db.all_gaps()
+
+    # Source coverage: count drafts per source per layer
+    draft_rows = db.conn.execute(
+        "SELECT d.name, d.title, d.abstract, d.source, r.categories "
+        "FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name "
+        "WHERE COALESCE(r.false_positive, 0) = 0"
+    ).fetchall()
+
+    # Build components from idea clusters
+    components = []
+    cluster_to_component: dict[int, int] = {}  # cluster_id -> component index
+
+    for cl in clusters:
+        if cl["size"] < 3:
+            continue  # skip tiny clusters
+
+        # Determine layer from cluster theme + idea titles
+        text_blob = cl.get("theme", "")
+        for idea in cl.get("ideas", [])[:10]:
+            text_blob += " " + idea.get("title", "") + " " + idea.get("description", "")
+        layer = _classify_to_layer(text_blob)
+
+        # Source coverage for this component's drafts
+        draft_names = set(cl.get("drafts", []))
+        sources: Counter = Counter()
+        comp_drafts: list[dict] = []
+        for dr in draft_rows:
+            if dr["name"] in draft_names:
+                sources[dr["source"] or "ietf"] += 1
+                comp_drafts.append({"name": dr["name"], "title": (dr["title"] or dr["name"])[:80], "source": dr["source"] or "ietf"})
+
+        # Idea type breakdown
+        type_counts: Counter = Counter()
+        for idea in cl.get("ideas", []):
+            t = idea.get("type", "")
+            if t:
+                type_counts[t] += 1
+
+        # Maturity: rough proxy from idea count and source diversity
+        maturity = min(5, 1 + len(sources) + (1 if cl["size"] >= 10 else 0) + (1 if cl.get("cross_wg") else 0))
+
+        comp = {
+            "id": len(components),
+            "cluster_id": cl["id"],
+            "name": cl.get("theme", f"Component {cl['id']}"),
+            "layer": layer,
+            "size": cl["size"],
+            "draft_count": len(draft_names),
+            "drafts": comp_drafts[:20],
+            "sources": dict(sources.most_common()),
+            "type_breakdown": dict(type_counts.most_common(5)),
+            "maturity": maturity,
+            "wgs": cl.get("wgs", [])[:3],
+            "top_ideas": [{"title": i["title"], "type": i.get("type", ""), "draft_name": i.get("draft_name", "")}
+                          for i in cl.get("ideas", [])[:5]],
+            "categories": cl.get("categories", []),
+        }
+        cluster_to_component[cl["id"]] = comp["id"]
+        components.append(comp)
+
+    # Build dependencies from cross-cluster links
+    dependencies = []
+    for link in links:
+        src_comp = cluster_to_component.get(link["source"])
+        tgt_comp = cluster_to_component.get(link["target"])
+        if src_comp is not None and tgt_comp is not None and src_comp != tgt_comp:
+            dependencies.append({
+                "source": src_comp,
+                "target": tgt_comp,
+                "similarity": link.get("best_pair_sim", link.get("similarity", 0)),
+                "idea_a": link.get("idea_a", ""),
+                "idea_b": link.get("idea_b", ""),
+            })
+
+    # Map gaps to layers
+    gap_items = []
+    for gap in all_gaps:
+        text = gap["topic"] + " " + gap.get("description", "") + " " + gap.get("category", "")
+        layer = _classify_to_layer(text)
+        gap_items.append({
+            "id": gap["id"],
+            "topic": gap["topic"],
+            "description": gap["description"],
+            "evidence": gap.get("evidence", ""),
+            "severity": gap.get("severity", "medium"),
+            "category": gap.get("category", ""),
+            "layer": layer,
+        })
+
+    # Source coverage per layer
+    source_coverage: dict[str, dict[str, int]] = {l["id"]: Counter() for l in _ARCH_LAYERS}
+    for dr in draft_rows:
+        text = (dr["title"] or "") + " " + (dr["abstract"] or "")[:200]
+        layer = _classify_to_layer(text)
+        source_coverage[layer][dr["source"] or "ietf"] += 1
+    # Convert Counters to dicts
+    source_coverage = {k: dict(v) for k, v in source_coverage.items()}
+
+    # Layer summary stats
+    layer_info = []
+    for l in _ARCH_LAYERS:
+        lid = l["id"]
+        comp_count = sum(1 for c in components if c["layer"] == lid)
+        idea_count = sum(c["size"] for c in components if c["layer"] == lid)
+        gap_count = sum(1 for g in gap_items if g["layer"] == lid)
+        layer_info.append({
+            "id": l["id"],
+            "label": l["label"],
+            "order": l["order"],
+            "component_count": comp_count,
+            "idea_count": idea_count,
+            "gap_count": gap_count,
+            "coverage": source_coverage.get(lid, {}),
+            "total_drafts": sum(source_coverage.get(lid, {}).values()),
+        })
+
+    return {
+        "components": components,
+        "dependencies": dependencies,
+        "gaps": gap_items,
+        "layers": layer_info,
+        "stats": {
+            "total_components": len(components),
+            "total_dependencies": len(dependencies),
+            "total_gaps": len(gap_items),
+            "layers_with_gaps": len(set(g["layer"] for g in gap_items)),
+        },
+    }
+
+def get_idea_analysis(db: Database) -> dict:
+    """Return comprehensive idea analysis data for the idea-analysis page.
+
+    Includes novelty distribution, type breakdown with avg novelty,
+    top novel ideas, ideas-per-draft distribution, cross-tab of type x source,
+    shared ideas across drafts, and idea novelty vs draft rating correlation.
+    """
+    from collections import Counter, defaultdict
+    from difflib import SequenceMatcher
+
+    # Fetch raw data
+    all_ideas = db.conn.execute(
+        """SELECT i.id, i.draft_name, i.title, i.description, i.idea_type,
+                  i.novelty_score
+           FROM ideas i ORDER BY i.novelty_score DESC NULLS LAST"""
+    ).fetchall()
+    all_ideas = [dict(r) for r in all_ideas]
+
+    # Draft ratings lookup
+    ratings_rows = db.conn.execute(
+        """SELECT d.name, d.title as draft_title, d.source,
+                  r.novelty AS r_novelty, r.maturity, r.overlap, r.momentum, r.relevance
+           FROM drafts d LEFT JOIN ratings r ON d.name = r.draft_name"""
+    ).fetchall()
+    draft_info = {}
+    for r in ratings_rows:
+        row = dict(r)
+        # Compute composite score (average of 5 dimensions)
+        dims = [row.get("r_novelty"), row.get("maturity"), row.get("overlap"),
+                row.get("momentum"), row.get("relevance")]
+        valid = [d for d in dims if d is not None]
+        row["composite_score"] = sum(valid) / len(valid) if valid else None
+        draft_info[row["name"]] = row
+
+    total = len(all_ideas)
+    scored = [i for i in all_ideas if i.get("novelty_score") is not None]
+    unscored = total - len(scored)
+    avg_novelty = sum(i["novelty_score"] for i in scored) / len(scored) if scored else 0
+
+    # Embedding coverage
+    embed_count = db.conn.execute("SELECT COUNT(*) FROM idea_embeddings").fetchone()[0]
+
+    # --- Novelty score distribution (histogram) ---
+    novelty_dist = Counter(i["novelty_score"] for i in scored)
+    novelty_histogram = {
+        "labels": [1, 2, 3, 4, 5],
+        "values": [novelty_dist.get(s, 0) for s in [1, 2, 3, 4, 5]],
+    }
+
+    # --- Ideas by type with counts and avg novelty ---
+    type_data = defaultdict(lambda: {"count": 0, "novelty_sum": 0, "novelty_n": 0})
+    for idea in all_ideas:
+        t = idea.get("idea_type") or "other"
+        type_data[t]["count"] += 1
+        if idea.get("novelty_score") is not None:
+            type_data[t]["novelty_sum"] += idea["novelty_score"]
+            type_data[t]["novelty_n"] += 1
+
+    by_type = []
+    for t, d in sorted(type_data.items(), key=lambda x: x[1]["count"], reverse=True):
+        avg = d["novelty_sum"] / d["novelty_n"] if d["novelty_n"] > 0 else 0
+        by_type.append({"type": t, "count": d["count"], "avg_novelty": round(avg, 2)})
+
+    type_names = [t["type"] for t in by_type]
+
+    # --- Top 20 most novel ideas (score 4-5) ---
+    top_novel = []
+    for idea in all_ideas:
+        if idea.get("novelty_score") and idea["novelty_score"] >= 4:
+            di = draft_info.get(idea["draft_name"], {})
+            top_novel.append({
+                "title": idea["title"],
+                "description": idea["description"],
+                "type": idea.get("idea_type", "other"),
+                "novelty_score": idea["novelty_score"],
+                "draft_name": idea["draft_name"],
+                "draft_title": di.get("draft_title", ""),
+                "draft_score": di.get("composite_score"),
+            })
+    top_novel.sort(key=lambda x: (x["novelty_score"], x.get("draft_score") or 0), reverse=True)
+    top_novel = top_novel[:20]
+
+    # --- Ideas per draft distribution ---
+    ideas_per_draft = Counter(i["draft_name"] for i in all_ideas)
+    ipd_dist = Counter(ideas_per_draft.values())
+    ideas_per_draft_hist = {
+        "labels": sorted(ipd_dist.keys()),
+        "values": [ipd_dist[k] for k in sorted(ipd_dist.keys())],
+    }
+    # Also top drafts by idea count
+    top_idea_drafts = []
+    for name, count in ideas_per_draft.most_common(10):
+        di = draft_info.get(name, {})
+        top_idea_drafts.append({
+            "name": name,
+            "draft_title": di.get("draft_title", ""),
+            "idea_count": count,
+            "score": di.get("composite_score"),
+        })
+
+    # --- Cross-tabulation: idea_type x source ---
+    type_source = defaultdict(lambda: defaultdict(int))
+    for idea in all_ideas:
+        t = idea.get("idea_type") or "other"
+        di = draft_info.get(idea["draft_name"], {})
+        source = di.get("source", "ietf") or "ietf"
+        type_source[t][source] += 1
+
+    sources = sorted(set(
+        di.get("source", "ietf") or "ietf" for di in draft_info.values()
+    ))
+    cross_tab = []
+    for t in type_names:
+        row = {"type": t}
+        for s in sources:
+            row[s] = type_source[t].get(s, 0)
+        cross_tab.append(row)
+
+    # --- Shared ideas across drafts ---
+    idea_groups: list[dict] = []
+    for idea in all_ideas:
+        title_lower = idea["title"].lower().strip()
+        matched = False
+        for group in idea_groups:
+            ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio()
+            if ratio >= 0.75:
+                group["ideas"].append(idea)
+                group["drafts"].add(idea["draft_name"])
+                matched = True
+                break
+        if not matched:
+            idea_groups.append({
+                "canonical": title_lower,
+                "title": idea["title"],
+                "ideas": [idea],
+                "drafts": {idea["draft_name"]},
+            })
+
+    shared_ideas = []
+    for g in sorted(idea_groups, key=lambda x: len(x["drafts"]), reverse=True):
+        if len(g["drafts"]) < 2:
+            break
+        shared_ideas.append({
+            "title": g["title"],
+            "appearances": len(g["drafts"]),
+            "drafts": sorted(g["drafts"])[:8],
+            "types": list(set(i.get("idea_type", "other") for i in g["ideas"])),
+        })
+
+    # --- Scatter: draft avg idea novelty vs draft relevance ---
+    draft_idea_novelty = defaultdict(list)
+    for idea in scored:
+        draft_idea_novelty[idea["draft_name"]].append(idea["novelty_score"])
+
+    scatter_data = []
+    for name, scores in draft_idea_novelty.items():
+        di = draft_info.get(name, {})
+        if di.get("relevance") is not None and di.get("composite_score") is not None:
+            scatter_data.append({
+                "name": name,
+                "avg_idea_novelty": round(sum(scores) / len(scores), 2),
+                "relevance": di["relevance"],
+                "score": di["composite_score"],
+                "idea_count": len(scores),
+                "source": di.get("source", "ietf") or "ietf",
+            })
+
+    # --- Sunburst data: type -> novelty band ---
+    sunburst_labels = []
+    sunburst_parents = []
+    sunburst_values = []
+    # Root
+    sunburst_labels.append("All Ideas")
+    sunburst_parents.append("")
+    sunburst_values.append(total)
+
+    novelty_bands = {"High (4-5)": lambda s: s is not None and s >= 4,
+                     "Medium (3)": lambda s: s is not None and s == 3,
+                     "Low (1-2)": lambda s: s is not None and s <= 2,
+                     "Unscored": lambda s: s is None}
+
+    for t_info in by_type:
+        t = t_info["type"]
+        sunburst_labels.append(t)
+        sunburst_parents.append("All Ideas")
+        sunburst_values.append(t_info["count"])
+        # Sub-bands
+        type_ideas = [i for i in all_ideas if (i.get("idea_type") or "other") == t]
+        for band, fn in novelty_bands.items():
+            cnt = sum(1 for i in type_ideas if fn(i.get("novelty_score")))
+            if cnt > 0:
+                sunburst_labels.append(f"{t} - {band}")
+                sunburst_parents.append(t)
+                sunburst_values.append(cnt)
+
+    return {
+        "total": total,
+        "scored": len(scored),
+        "unscored": unscored,
+        "avg_novelty": round(avg_novelty, 2),
+        "embed_count": embed_count,
+        "embed_pct": round(embed_count / total * 100, 1) if total > 0 else 0,
+        "type_count": len(by_type),
+        "novelty_histogram": novelty_histogram,
+        "by_type": by_type,
+        "top_novel": top_novel,
+        "ideas_per_draft_hist": ideas_per_draft_hist,
+        "top_idea_drafts": top_idea_drafts,
+        "cross_tab": cross_tab,
+        "sources": sources,
+        "shared_ideas": shared_ideas,
+        "scatter_data": scatter_data,
+        "sunburst": {
+            "labels": sunburst_labels,
+            "parents": sunburst_parents,
+            "values": sunburst_values,
+        },
+    }
+
+def get_trends_data(db: Database) -> dict:
+    """Return temporal evolution data for the /trends page.
+
+    Returns dict with:
+      - monthly_submissions: [{month, source, count}, ...]
+      - monthly_ratings: [{month, novelty, maturity, overlap, momentum, relevance}, ...]
+      - monthly_categories: [{month, category, count}, ...]
+      - safety_ratio: [{month, safety, capability, ratio}, ...]
+      - cumulative_ideas: [{month, total}, ...]
+      - monthly_new_authors: [{month, count}, ...]
+      - stats: {fastest_growing, newest_active}
+      - monthly_table: [{month, total, sources: {}, avg_score}, ...]
+    """
+    conn = db.conn
+
+    # 1. Monthly submissions by source
+    rows = conn.execute("""
+        SELECT substr(time, 1, 7) AS month, source, COUNT(*) AS cnt
+        FROM drafts
+        WHERE time IS NOT NULL AND time != ''
+        GROUP BY month, source
+        ORDER BY month
+    """).fetchall()
+    monthly_submissions = [{"month": r["month"], "source": r["source"], "count": r["cnt"]} for r in rows]
+
+    # 2. Monthly average ratings (all 5 dimensions)
+    rows = conn.execute("""
+        SELECT substr(d.time, 1, 7) AS month,
+               AVG(r.novelty) AS novelty, AVG(r.maturity) AS maturity,
+               AVG(r.overlap) AS overlap, AVG(r.momentum) AS momentum,
+               AVG(r.relevance) AS relevance,
+               COUNT(*) AS cnt
+        FROM drafts d
+        JOIN ratings r ON d.name = r.draft_name
+        WHERE d.time IS NOT NULL AND d.time != '' AND r.false_positive = 0
+        GROUP BY month
+        ORDER BY month
+    """).fetchall()
+    monthly_ratings = [{
+        "month": r["month"],
+        "novelty": round(r["novelty"], 2),
+        "maturity": round(r["maturity"], 2),
+        "overlap": round(r["overlap"], 2),
+        "momentum": round(r["momentum"], 2),
+        "relevance": round(r["relevance"], 2),
+        "count": r["cnt"],
+    } for r in rows]
+
+    # 3. Monthly category distribution
+    rows = conn.execute("""
+        SELECT substr(d.time, 1, 7) AS month, r.categories
+        FROM drafts d
+        JOIN ratings r ON d.name = r.draft_name
+        WHERE d.time IS NOT NULL AND d.time != '' AND r.false_positive = 0
+    """).fetchall()
+    cat_monthly: dict[str, Counter] = defaultdict(Counter)
+    all_cats: Counter = Counter()
+    for r in rows:
+        month = r["month"]
+        try:
+            cats = json.loads(r["categories"]) if r["categories"] else []
+        except (json.JSONDecodeError, TypeError):
+            cats = []
+        for c in cats:
+            cat_monthly[month][c] += 1
+            all_cats[c] += 1
+
+    # Top 8 categories
+    top_cats = [c for c, _ in all_cats.most_common(8)]
+    months_sorted = sorted(cat_monthly.keys())
+    monthly_categories = []
+    for month in months_sorted:
+        for cat in top_cats:
+            monthly_categories.append({
+                "month": month,
+                "category": cat,
+                "count": cat_monthly[month].get(cat, 0),
+            })
+
+    # 4. Safety ratio over time
+    safety_ratio = []
+    for month in months_sorted:
+        safety = sum(cat_monthly[month].get(c, 0) for c in SAFETY_CATEGORIES)
+        capability = sum(cat_monthly[month].get(c, 0) for c in CAPABILITY_CATEGORIES)
+        ratio = round(safety / capability, 2) if capability > 0 else 0
+        safety_ratio.append({
+            "month": month,
+            "safety": safety,
+            "capability": capability,
+            "ratio": ratio,
+        })
+
+    # 5. Cumulative idea count over time
+    rows = conn.execute("""
+        SELECT substr(d.time, 1, 7) AS month, COUNT(i.id) AS cnt
+        FROM ideas i
+        JOIN drafts d ON i.draft_name = d.name
+        WHERE d.time IS NOT NULL AND d.time != ''
+        GROUP BY month
+        ORDER BY month
+    """).fetchall()
+    cumulative = 0
+    cumulative_ideas = []
+    for r in rows:
+        cumulative += r["cnt"]
+        cumulative_ideas.append({"month": r["month"], "total": cumulative})
+
+    # 6. Monthly new author count (first-time contributors)
+    rows = conn.execute("""
+        SELECT da.person_id, MIN(substr(d.time, 1, 7)) AS first_month
+        FROM draft_authors da
+        JOIN drafts d ON da.draft_name = d.name
+        WHERE d.time IS NOT NULL AND d.time != ''
+        GROUP BY da.person_id
+    """).fetchall()
+    new_author_monthly: Counter = Counter()
+    for r in rows:
+        if r["first_month"]:
+            new_author_monthly[r["first_month"]] += 1
+    monthly_new_authors = [
+        {"month": m, "count": new_author_monthly.get(m, 0)}
+        for m in months_sorted
+    ]
+
+    # 7. Stats: fastest growing category, newest active category
+    fastest_growing = ""
+    newest_active = ""
+    if len(months_sorted) >= 4:
+        mid = len(months_sorted) // 2
+        early_months = months_sorted[:mid]
+        late_months = months_sorted[mid:]
+        best_growth = -999
+        for cat in top_cats:
+            early = sum(cat_monthly[m].get(cat, 0) for m in early_months)
+            late = sum(cat_monthly[m].get(cat, 0) for m in late_months)
+            if early > 0:
+                growth = (late - early) / early
+            elif late > 0:
+                growth = float("inf")
+            else:
+                growth = 0
+            if growth > best_growth:
+                best_growth = growth
+                fastest_growing = cat
+
+    # Newest active: category with latest first appearance
+    cat_first_month: dict[str, str] = {}
+    for month in months_sorted:
+        for cat in all_cats:
+            if cat not in cat_first_month and cat_monthly[month].get(cat, 0) > 0:
+                cat_first_month[cat] = month
+    if cat_first_month:
+        newest_active = max(cat_first_month, key=lambda c: cat_first_month[c])
+
+    # 8. Monthly breakdown table
+    monthly_table = []
+    for month in months_sorted:
+        # Get per-source counts
+        sources: dict[str, int] = {}
+        total = 0
+        for s in monthly_submissions:
+            if s["month"] == month:
+                sources[s["source"]] = s["count"]
+                total += s["count"]
+        # Get avg score
+        avg_row = conn.execute("""
+            SELECT AVG((r.novelty + r.maturity + r.overlap + r.momentum + r.relevance) / 5.0) AS avg_score
+            FROM drafts d JOIN ratings r ON d.name = r.draft_name
+            WHERE substr(d.time, 1, 7) = ? AND r.false_positive = 0
+        """, (month,)).fetchone()
+        avg_score = round(avg_row["avg_score"], 2) if avg_row and avg_row["avg_score"] else 0
+        monthly_table.append({
+            "month": month,
+            "total": total,
+            "sources": sources,
+            "avg_score": avg_score,
+        })
+
+    return {
+        "monthly_submissions": monthly_submissions,
+        "monthly_ratings": monthly_ratings,
+        "monthly_categories": monthly_categories,
+        "safety_ratio": safety_ratio,
+        "cumulative_ideas": cumulative_ideas,
+        "monthly_new_authors": monthly_new_authors,
+        "top_categories": top_cats,
+        "months": months_sorted,
+        "stats": {
+            "fastest_growing": fastest_growing,
+            "newest_active": newest_active,
+        },
+        "monthly_table": monthly_table,
+    }
+
+def get_complexity_data(db: Database) -> dict:
+    """Return draft complexity analysis data for the /complexity page.
+
+    For each rated draft, compute structural complexity metrics and
+    correlate with rating dimensions.
+
+    Returns dict with:
+      - drafts: [{name, title, pages, author_count, citation_count, idea_count,
+                   category_count, novelty, maturity, overlap, momentum, relevance,
+                   score, composite_complexity}, ...]
+      - correlations: {metric: {dimension: r_value}}
+      - top_complex: top 10 most complex drafts
+      - top_efficient: top 10 high-rating low-complexity drafts
+      - stats: {avg_pages, avg_authors, avg_citations, pages_coverage_pct}
+      - category_complexity: [{category, avg_pages, avg_authors, avg_citations, count}, ...]
+      - source_complexity: [{source, avg_pages, avg_authors, avg_citations, count}, ...]
+    """
+    conn = db.conn
+
+    # Build per-draft complexity data
+    rows = conn.execute("""
+        SELECT d.name, d.title, d.pages, d.source,
+               r.novelty, r.maturity, r.overlap, r.momentum, r.relevance,
+               r.categories,
+               (r.novelty + r.maturity + r.overlap + r.momentum + r.relevance) / 5.0 AS score
+        FROM drafts d
+        JOIN ratings r ON d.name = r.draft_name
+        WHERE r.false_positive = 0
+    """).fetchall()
+
+    # Author counts
+    author_counts = db.draft_author_count_map()
+
+    # Citation counts (outgoing refs)
+    citation_counts = {}
+    for row in conn.execute("""
+        SELECT draft_name, COUNT(*) AS cnt FROM draft_refs GROUP BY draft_name
+    """).fetchall():
+        citation_counts[row["draft_name"]] = row["cnt"]
+
+    # Idea counts
+    idea_counts = {}
+    for row in conn.execute("""
+        SELECT draft_name, COUNT(*) AS cnt FROM ideas GROUP BY draft_name
+    """).fetchall():
+        idea_counts[row["draft_name"]] = row["cnt"]
+
+    drafts_data = []
+    total_with_pages = 0
+    total_drafts = 0
+    for r in rows:
+        total_drafts += 1
+        pages = r["pages"]
+        if pages is not None:
+            total_with_pages += 1
+        try:
+            cats = json.loads(r["categories"]) if r["categories"] else []
+        except (json.JSONDecodeError, TypeError):
+            cats = []
+        ac = author_counts.get(r["name"], 0)
+        cc = citation_counts.get(r["name"], 0)
+        ic = idea_counts.get(r["name"], 0)
+        cat_count = len(cats)
+        # Composite complexity: normalize each metric to 0-1 scale and average
+        # (raw values stored; composite calculated after we know max values)
+        drafts_data.append({
+            "name": r["name"],
+            "title": r["title"],
+            "pages": pages,
+            "source": r["source"] or "ietf",
+            "author_count": ac,
+            "citation_count": cc,
+            "idea_count": ic,
+            "category_count": cat_count,
+            "categories": cats,
+            "novelty": r["novelty"],
+            "maturity": r["maturity"],
+            "overlap": r["overlap"],
+            "momentum": r["momentum"],
+            "relevance": r["relevance"],
+            "score": round(r["score"], 2),
+        })
+
+    # Compute composite complexity score (normalized 0-1 each, then averaged)
+    max_pages = max((d["pages"] for d in drafts_data if d["pages"] is not None), default=1) or 1
+    max_authors = max((d["author_count"] for d in drafts_data), default=1) or 1
+    max_citations = max((d["citation_count"] for d in drafts_data), default=1) or 1
+    max_ideas = max((d["idea_count"] for d in drafts_data), default=1) or 1
+
+    for d in drafts_data:
+        p = (d["pages"] / max_pages) if d["pages"] is not None else 0.3  # default to median-ish
+        a = d["author_count"] / max_authors
+        c = d["citation_count"] / max_citations
+        i = d["idea_count"] / max_ideas
+        d["composite_complexity"] = round((p + a + c + i) / 4, 3)
+
+    # Correlation matrix: complexity metrics vs rating dimensions
+    metrics = ["pages", "author_count", "citation_count", "idea_count", "category_count"]
+    dimensions = ["novelty", "maturity", "overlap", "momentum", "relevance"]
+
+    def _pearson(xs: list[float], ys: list[float]) -> float:
+        """Compute Pearson correlation coefficient."""
+        n = len(xs)
+        if n < 3:
+            return 0.0
+        mean_x = sum(xs) / n
+        mean_y = sum(ys) / n
+        cov = sum((x - mean_x) * (y - mean_y) for x, y in zip(xs, ys))
+        std_x = (sum((x - mean_x) ** 2 for x in xs)) ** 0.5
+        std_y = (sum((y - mean_y) ** 2 for y in ys)) ** 0.5
+        if std_x == 0 or std_y == 0:
+            return 0.0
+        return round(cov / (std_x * std_y), 3)
+
+    correlations: dict[str, dict[str, float]] = {}
+    for metric in metrics:
+        correlations[metric] = {}
+        for dim in dimensions:
+            if metric == "pages":
+                # Filter to drafts with pages data
+                pairs = [(d[metric], d[dim]) for d in drafts_data if d[metric] is not None]
+            else:
+                pairs = [(d[metric], d[dim]) for d in drafts_data]
+            if len(pairs) >= 3:
+                xs, ys = zip(*pairs)
+                correlations[metric][dim] = _pearson(list(xs), list(ys))
+            else:
+                correlations[metric][dim] = 0.0
+
+    # Top 10 most complex
+    sorted_by_complexity = sorted(drafts_data, key=lambda d: d["composite_complexity"], reverse=True)
+    top_complex = sorted_by_complexity[:10]
+
+    # Top 10 efficient: high score but low complexity
+    # Efficiency = score / (composite_complexity + 0.1)  (avoid div by zero)
+    for d in drafts_data:
+        d["efficiency"] = round(d["score"] / (d["composite_complexity"] + 0.1), 2)
+    sorted_by_efficiency = sorted(drafts_data, key=lambda d: d["efficiency"], reverse=True)
+    top_efficient = sorted_by_efficiency[:10]
+
+    # Stats
+    pages_vals = [d["pages"] for d in drafts_data if d["pages"] is not None]
+    avg_pages = round(sum(pages_vals) / len(pages_vals), 1) if pages_vals else 0
+    avg_authors = round(sum(d["author_count"] for d in drafts_data) / len(drafts_data), 1) if drafts_data else 0
+    avg_citations = round(sum(d["citation_count"] for d in drafts_data) / len(drafts_data), 1) if drafts_data else 0
+    pages_coverage = round(total_with_pages / total_drafts * 100, 1) if total_drafts else 0
+
+    # Category complexity averages
+    cat_data: dict[str, list[dict]] = defaultdict(list)
+    for d in drafts_data:
+        for cat in d.get("categories", []):
+            cat_data[cat].append(d)
+
+    category_complexity = []
+    for cat, ds in sorted(cat_data.items(), key=lambda x: -len(x[1])):
+        p_vals = [d["pages"] for d in ds if d["pages"] is not None]
+        category_complexity.append({
+            "category": cat,
+            "avg_pages": round(sum(p_vals) / len(p_vals), 1) if p_vals else 0,
+            "avg_authors": round(sum(d["author_count"] for d in ds) / len(ds), 1),
+            "avg_citations": round(sum(d["citation_count"] for d in ds) / len(ds), 1),
+            "avg_score": round(sum(d["score"] for d in ds) / len(ds), 2),
+            "count": len(ds),
+        })
+
+    # Source complexity
+    source_data: dict[str, list[dict]] = defaultdict(list)
+    for d in drafts_data:
+        source_data[d["source"]].append(d)
+
+    source_complexity = []
+    for src, ds in sorted(source_data.items(), key=lambda x: -len(x[1])):
+        p_vals = [d["pages"] for d in ds if d["pages"] is not None]
+        source_complexity.append({
+            "source": src,
+            "avg_pages": round(sum(p_vals) / len(p_vals), 1) if p_vals else 0,
+            "avg_authors": round(sum(d["author_count"] for d in ds) / len(ds), 1),
+            "avg_citations": round(sum(d["citation_count"] for d in ds) / len(ds), 1),
+            "avg_score": round(sum(d["score"] for d in ds) / len(ds), 2),
+            "count": len(ds),
+        })
+
+    return {
+        "drafts": drafts_data,
+        "correlations": correlations,
+        "metrics": metrics,
+        "dimensions": dimensions,
+        "top_complex": top_complex,
+        "top_efficient": top_efficient,
+        "stats": {
+            "avg_pages": avg_pages,
+            "avg_authors": avg_authors,
+            "avg_citations": avg_citations,
+            "pages_coverage_pct": pages_coverage,
+            "total_drafts": total_drafts,
+        },
+        "category_complexity": category_complexity,
+        "source_complexity": source_complexity,
+    }
+
+def get_source_comparison(db: Database) -> dict:
+    """Cross-source comparison: ratings, categories, counts by standards body."""
+    pairs_all = db.drafts_with_ratings(limit=2000)
+    # Also include false positives for completeness of source counts
+    pairs_fp = db.drafts_with_ratings(limit=2000, include_false_positives=True)
+
+    # Build per-source data
+    source_stats: dict[str, dict] = {}
+    source_categories: dict[str, Counter] = defaultdict(Counter)
+    source_ratings: dict[str, dict[str, list]] = defaultdict(lambda: {
+        "novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": [], "scores": [],
+    })
+    # Collect author counts per source
+    all_authors_by_source: dict[str, set] = defaultdict(set)
+
+    for draft, rating in pairs_all:
+        src = getattr(draft, "source", "ietf") or "ietf"
+        source_ratings[src]["novelty"].append(rating.novelty)
+        source_ratings[src]["maturity"].append(rating.maturity)
+        source_ratings[src]["overlap"].append(rating.overlap)
+        source_ratings[src]["momentum"].append(rating.momentum)
+        source_ratings[src]["relevance"].append(rating.relevance)
+        source_ratings[src]["scores"].append(round(rating.composite_score, 2))
+        for cat in rating.categories:
+            source_categories[src][cat] += 1
+
+    # Get all drafts (including unrated) for draft counts
+    all_drafts = db.list_drafts(limit=5000)
+    source_draft_counts: Counter = Counter()
+    for d in all_drafts:
+        src = getattr(d, "source", "ietf") or "ietf"
+        source_draft_counts[src] += 1
+
+    # Author counts by source
+    try:
+        rows = db.conn.execute(
+            """SELECT d.source, COUNT(DISTINCT da.person_id) as author_count
+            FROM drafts d
+            JOIN draft_authors da ON d.name = da.draft_name
+            GROUP BY d.source"""
+        ).fetchall()
+        for r in rows:
+            src = r["source"] or "ietf"
+            all_authors_by_source[src] = r["author_count"]
+    except Exception:
+        pass
+
+    # Idea counts by source
+    source_idea_counts: Counter = Counter()
+    try:
+        rows = db.conn.execute(
+            """SELECT d.source, COUNT(*) as idea_count
+            FROM ideas i
+            JOIN drafts d ON i.draft_name = d.name
+            GROUP BY d.source"""
+        ).fetchall()
+        for r in rows:
+            src = r["source"] or "ietf"
+            source_idea_counts[src] = r["idea_count"]
+    except Exception:
+        pass
+
+    # Build summary table
+    all_sources = sorted(set(source_draft_counts.keys()) | set(source_ratings.keys()))
+    summary = []
+    for src in all_sources:
+        rats = source_ratings.get(src, {"scores": []})
+        cats = source_categories.get(src, Counter())
+        top_cat = cats.most_common(1)[0][0] if cats else "N/A"
+        avg_score = round(sum(rats["scores"]) / len(rats["scores"]), 2) if rats["scores"] else 0.0
+        summary.append({
+            "source": src,
+            "drafts": source_draft_counts.get(src, 0),
+            "rated": len(rats["scores"]),
+            "authors": all_authors_by_source.get(src, 0),
+            "ideas": source_idea_counts.get(src, 0),
+            "avg_score": avg_score,
+            "top_category": top_cat,
+        })
+
+    # Radar data: average of each dimension per source
+    radar = {}
+    for src, rats in source_ratings.items():
+        if not rats["scores"]:
+            continue
+        n = len(rats["scores"])
+        radar[src] = {
+            "novelty": round(sum(rats["novelty"]) / n, 2),
+            "maturity": round(sum(rats["maturity"]) / n, 2),
+            "overlap": round(sum(rats["overlap"]) / n, 2),
+            "momentum": round(sum(rats["momentum"]) / n, 2),
+            "relevance": round(sum(rats["relevance"]) / n, 2),
+            "count": n,
+        }
+
+    # Category distribution by source (for stacked bar / heatmap)
+    all_cats = sorted({cat for cats in source_categories.values() for cat in cats})
+    heatmap = {
+        "sources": list(source_categories.keys()),
+        "categories": all_cats,
+        "values": [],
+    }
+    for src in heatmap["sources"]:
+        row = [source_categories[src].get(cat, 0) for cat in all_cats]
+        heatmap["values"].append(row)
+
+    # Unique/shared categories analysis
+    source_cat_sets = {src: set(cats.keys()) for src, cats in source_categories.items()}
+    unique_cats = {}
+    for src, cats in source_cat_sets.items():
+        others = set()
+        for s2, c2 in source_cat_sets.items():
+            if s2 != src:
+                others |= c2
+        unique_cats[src] = sorted(cats - others)
+
+    shared_cats = set()
+    for src, cats in source_cat_sets.items():
+        for s2, c2 in source_cat_sets.items():
+            if s2 != src:
+                shared_cats |= (cats & c2)
+    shared_cats = sorted(shared_cats)
+
+    return {
+        "summary": summary,
+        "radar": radar,
+        "heatmap": heatmap,
+        "unique_categories": unique_cats,
+        "shared_categories": shared_cats,
+    }
+
+def get_citation_influence(db: Database) -> dict:
+    """Return citation influence analysis data (cached for 5 min)."""
+    return _cached("citation_influence", lambda: _compute_citation_influence(db))
+
+def _compute_citation_influence(db: Database) -> dict:
+    """Compute citation influence metrics from the draft_refs table.
+
+    Returns dict with:
+    - top_cited_rfcs: top 20 most-cited RFCs with citation counts and citing drafts
+    - top_citing_drafts: top 20 drafts that cite the most references
+    - citations_by_category: average citations per category
+    - stats: total citations, unique RFCs, avg refs per draft
+    - draft_network: draft-to-draft citation edges for visualization
+    """
+    # Get all references
+    rows = db.conn.execute(
+        "SELECT draft_name, ref_type, ref_id FROM draft_refs"
+    ).fetchall()
+
+    # Get draft titles and categories
+    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
+    draft_titles = {r["name"]: r["title"] for r in draft_rows}
+
+    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
+    draft_cats: dict[str, str] = {}
+    for r in rating_rows:
+        try:
+            cats = json.loads(r["categories"]) if r["categories"] else []
+            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
+        except Exception:
+            draft_cats[r["draft_name"]] = "Other"
+
+    # Well-known RFC names
+    rfc_names = {
+        "2119": "Key words (MUST/SHALL/MAY)", "8174": "Key words update",
+        "8259": "JSON", "7519": "JWT", "6749": "OAuth 2.0",
+        "7540": "HTTP/2", "9110": "HTTP Semantics", "7525": "TLS Recommendations",
+        "8446": "TLS 1.3", "3986": "URIs", "7230": "HTTP/1.1 Syntax",
+        "7231": "HTTP/1.1 Semantics", "8288": "Web Linking", "6125": "TLS Server Identity",
+        "7515": "JWS", "7516": "JWE", "7517": "JWK", "7518": "JWA",
+        "9449": "DPoP", "6750": "OAuth Bearer", "8725": "JWT Best Practices",
+        "9396": "Rich Authorization Requests", "9101": "JAR",
+        "8414": "OAuth Server Metadata", "7591": "Dynamic Client Registration",
+        "8705": "mTLS for OAuth", "9068": "JWT Access Tokens",
+        "6819": "OAuth Threat Model", "9200": "ACE-OAuth", "9052": "COSE",
+        "8392": "CWT", "7252": "CoAP",
+    }
+
+    # In-degree: how many times each RFC is cited
+    rfc_citations: dict[str, list[str]] = defaultdict(list)
+    draft_out_count: dict[str, int] = Counter()
+    draft_to_draft_edges = []
+    total_citations = 0
+
+    for r in rows:
+        draft_name = r["draft_name"]
+        ref_type = r["ref_type"]
+        ref_id = r["ref_id"]
+        total_citations += 1
+        draft_out_count[draft_name] += 1
+
+        if ref_type == "rfc":
+            rfc_citations[ref_id].append(draft_name)
+        elif ref_type == "draft":
+            draft_to_draft_edges.append({
+                "source": draft_name,
+                "target": ref_id,
+                "source_title": draft_titles.get(draft_name, draft_name),
+                "target_title": draft_titles.get(ref_id, ref_id),
+            })
+
+    # Top 20 most-cited RFCs
+    rfc_sorted = sorted(rfc_citations.items(), key=lambda x: len(x[1]), reverse=True)
+    top_cited_rfcs = []
+    for ref_id, citing_drafts in rfc_sorted[:20]:
+        top_cited_rfcs.append({
+            "rfc_id": ref_id,
+            "name": rfc_names.get(ref_id, ""),
+            "count": len(citing_drafts),
+            "drafts": citing_drafts[:10],  # Limit to first 10 for display
+            "total_drafts": len(citing_drafts),
+        })
+
+    # Top 20 most-citing drafts (out-degree)
+    draft_sorted = sorted(draft_out_count.items(), key=lambda x: x[1], reverse=True)
+    top_citing_drafts = []
+    for draft_name, count in draft_sorted[:20]:
+        top_citing_drafts.append({
+            "name": draft_name,
+            "title": draft_titles.get(draft_name, draft_name),
+            "count": count,
+            "category": draft_cats.get(draft_name, "Other"),
+        })
+
+    # Citation density by category
+    cat_totals: dict[str, int] = Counter()
+    cat_counts: dict[str, int] = Counter()
+    for draft_name, count in draft_out_count.items():
+        cat = draft_cats.get(draft_name, "Other")
+        cat_totals[cat] += count
+        cat_counts[cat] += 1
+
+    citations_by_category = []
+    for cat in sorted(cat_totals.keys()):
+        avg = cat_totals[cat] / cat_counts[cat] if cat_counts[cat] > 0 else 0
+        citations_by_category.append({
+            "category": cat,
+            "total_citations": cat_totals[cat],
+            "draft_count": cat_counts[cat],
+            "avg_citations": round(avg, 1),
+        })
+    citations_by_category.sort(key=lambda x: x["avg_citations"], reverse=True)
+
+    # PageRank-style influence: drafts that cite highly-cited RFCs
+    # Simple approximation: sum of (1 / citation_count) for each RFC cited
+    rfc_influence = {rid: len(drafts) for rid, drafts in rfc_citations.items()}
+    draft_pagerank: dict[str, float] = Counter()
+    for r in rows:
+        if r["ref_type"] == "rfc" and r["ref_id"] in rfc_influence:
+            # Higher score for citing highly-cited RFCs
+            draft_pagerank[r["draft_name"]] += rfc_influence[r["ref_id"]]
+
+    pagerank_sorted = sorted(draft_pagerank.items(), key=lambda x: x[1], reverse=True)
+    top_pagerank = []
+    for draft_name, score in pagerank_sorted[:20]:
+        top_pagerank.append({
+            "name": draft_name,
+            "title": draft_titles.get(draft_name, draft_name),
+            "score": round(score, 1),
+            "category": draft_cats.get(draft_name, "Other"),
+            "out_degree": draft_out_count.get(draft_name, 0),
+        })
+
+    # Stats
+    unique_rfcs = len(rfc_citations)
+    drafts_with_refs = len(draft_out_count)
+    avg_refs = total_citations / drafts_with_refs if drafts_with_refs > 0 else 0
+
+    return {
+        "top_cited_rfcs": top_cited_rfcs,
+        "top_citing_drafts": top_citing_drafts,
+        "top_pagerank": top_pagerank,
+        "citations_by_category": citations_by_category,
+        "draft_network": draft_to_draft_edges[:200],  # Limit for perf
+        "stats": {
+            "total_citations": total_citations,
+            "unique_rfcs": unique_rfcs,
+            "drafts_with_refs": drafts_with_refs,
+            "avg_refs_per_draft": round(avg_refs, 1),
+        },
+    }
+
+def get_bcp_analysis(db: Database) -> dict:
+    """Return BCP dependency analysis data (cached for 5 min)."""
+    return _cached("bcp_analysis", lambda: _compute_bcp_analysis(db))
+
+def _compute_bcp_analysis(db: Database) -> dict:
+    """Compute BCP dependency analysis.
+
+    Returns dict with:
+    - bcps: all BCPs with citation counts and citing drafts
+    - co_citation: which BCPs tend to be co-cited
+    - by_category: BCP citation patterns by category
+    - coverage: what % of drafts cite at least one BCP
+    """
+    # Get all BCP references
+    bcp_rows = db.conn.execute(
+        "SELECT draft_name, ref_id FROM draft_refs WHERE ref_type = 'bcp'"
+    ).fetchall()
+
+    # Get draft titles and categories
+    draft_rows = db.conn.execute("SELECT name, title FROM drafts").fetchall()
+    draft_titles = {r["name"]: r["title"] for r in draft_rows}
+    total_drafts = len(draft_titles)
+
+    rating_rows = db.conn.execute("SELECT draft_name, categories FROM ratings").fetchall()
+    draft_cats: dict[str, str] = {}
+    for r in rating_rows:
+        try:
+            cats = json.loads(r["categories"]) if r["categories"] else []
+            draft_cats[r["draft_name"]] = cats[0] if cats else "Other"
+        except Exception:
+            draft_cats[r["draft_name"]] = "Other"
+
+    # BCP citation counts
+    bcp_citations: dict[str, list[str]] = defaultdict(list)
+    draft_bcps: dict[str, list[str]] = defaultdict(list)
+
+    for r in bcp_rows:
+        bcp_citations[r["ref_id"]].append(r["draft_name"])
+        draft_bcps[r["draft_name"]].append(r["ref_id"])
+
+    # All BCPs with counts
+    bcps = []
+    for bcp_id, citing_drafts in sorted(bcp_citations.items(),
+                                         key=lambda x: len(x[1]), reverse=True):
+        bcps.append({
+            "bcp_id": bcp_id,
+            "count": len(citing_drafts),
+            "drafts": citing_drafts[:10],
+            "total_drafts": len(citing_drafts),
+        })
+
+    # Co-citation matrix: which BCPs appear together in the same draft
+    bcp_ids = sorted(bcp_citations.keys())
+    co_citation = []
+    for i, bcp_a in enumerate(bcp_ids):
+        drafts_a = set(bcp_citations[bcp_a])
+        for j, bcp_b in enumerate(bcp_ids):
+            if j <= i:
+                continue
+            drafts_b = set(bcp_citations[bcp_b])
+            shared = len(drafts_a & drafts_b)
+            if shared > 0:
+                co_citation.append({
+                    "bcp_a": bcp_a,
+                    "bcp_b": bcp_b,
+                    "count": shared,
+                })
+
+    # Heatmap data: full matrix for all BCPs (top 20 by citation count)
+    top_bcp_ids = [b["bcp_id"] for b in bcps[:20]]
+    heatmap_matrix = []
+    for bcp_a in top_bcp_ids:
+        row = []
+        drafts_a = set(bcp_citations.get(bcp_a, []))
+        for bcp_b in top_bcp_ids:
+            drafts_b = set(bcp_citations.get(bcp_b, []))
+            shared = len(drafts_a & drafts_b)
+            row.append(shared)
+        heatmap_matrix.append(row)
+
+    # BCP citations by category
+    cat_bcp_count: dict[str, Counter] = defaultdict(Counter)
+    for draft_name, bcp_list in draft_bcps.items():
+        cat = draft_cats.get(draft_name, "Other")
+        for bcp_id in bcp_list:
+            cat_bcp_count[cat][bcp_id] += 1
+
+    by_category = []
+    for cat in sorted(cat_bcp_count.keys()):
+        top_bcps = cat_bcp_count[cat].most_common(5)
+        by_category.append({
+            "category": cat,
+            "total_bcp_refs": sum(cat_bcp_count[cat].values()),
+            "unique_bcps": len(cat_bcp_count[cat]),
+            "top_bcps": [{"bcp_id": bid, "count": c} for bid, c in top_bcps],
+        })
+    by_category.sort(key=lambda x: x["total_bcp_refs"], reverse=True)
+
+    # Coverage
+    drafts_with_bcp = len(draft_bcps)
+    coverage_pct = (drafts_with_bcp / total_drafts * 100) if total_drafts > 0 else 0
+
+    return {
+        "bcps": bcps,
+        "co_citation": co_citation,
+        "heatmap_labels": top_bcp_ids,
+        "heatmap_matrix": heatmap_matrix,
+        "by_category": by_category,
+        "coverage": {
+            "total_drafts": total_drafts,
+            "drafts_with_bcp": drafts_with_bcp,
+            "coverage_pct": round(coverage_pct, 1),
+            "unique_bcps": len(bcp_citations),
+            "total_bcp_refs": len(bcp_rows),
+        },
+    }
diff --git a/src/webui/data/authors.py b/src/webui/data/authors.py
new file mode 100644
index 0000000..b6268b9
--- /dev/null
+++ b/src/webui/data/authors.py
@@ -0,0 +1,276 @@
+"""Author-related data access functions."""
+from __future__ import annotations
+
+import re
+from collections import Counter, defaultdict
+from typing import TypedDict
+
+from ietf_analyzer.db import Database
+from webui.data._shared import _cached
+
+
+class AuthorInfo(TypedDict):
+    """Author entry from :func:`get_top_authors`."""
+    name: str
+    affiliation: str
+    draft_count: int
+    drafts: list[str]
+
+class AuthorNetworkNode(TypedDict):
+    """Node in the author network graph."""
+    id: str
+    name: str
+    org: str
+    draft_count: int
+    avg_score: float
+    drafts: list[str]
+
+class AuthorNetworkEdge(TypedDict):
+    """Edge in the author network graph."""
+    source: str
+    target: str
+    weight: int
+
+class AuthorCluster(TypedDict):
+    """Cluster in the author network."""
+    id: int
+    members: list[str]
+    org_mix: dict[str, int]
+    size: int
+    drafts: list[dict[str, str]]
+    draft_count: int
+
+class AuthorNetwork(TypedDict):
+    """Full author network from :func:`get_author_network_full`."""
+    nodes: list[AuthorNetworkNode]
+    edges: list[AuthorNetworkEdge]
+    clusters: list[AuthorCluster]
+
+def get_top_authors(db: Database, limit: int = 30) -> list[AuthorInfo]:
+    """Return top authors by draft count."""
+    rows = db.top_authors(limit=limit)
+    return [
+        {"name": name, "affiliation": aff, "draft_count": cnt, "drafts": drafts}
+        for name, aff, cnt, drafts in rows
+    ]
+
+def get_org_data(db: Database, limit: int = 20) -> list[dict]:
+    """Return organization contribution data."""
+    rows = db.top_orgs(limit=limit)
+    return [
+        {"org": org, "author_count": authors, "draft_count": drafts}
+        for org, authors, drafts in rows
+    ]
+
+def get_coauthor_network(db: Database, min_shared: int = 1) -> dict:
+    """Return co-authorship network data for force-directed graph.
+
+    Returns {nodes: [{id, name, org, draft_count}], edges: [{source, target, weight}]}
+    """
+    pairs = db.coauthor_pairs()
+    top = db.top_authors(limit=100)
+
+    # Build node set from authors who have co-authorships
+    author_info = {name: {"org": aff, "draft_count": cnt} for name, aff, cnt, _ in top}
+    node_set = set()
+    edges = []
+    for a, b, shared in pairs:
+        if shared >= min_shared:
+            node_set.add(a)
+            node_set.add(b)
+            edges.append({"source": a, "target": b, "weight": shared})
+
+    nodes = []
+    for name in node_set:
+        info = author_info.get(name, {"org": "", "draft_count": 1})
+        nodes.append({
+            "id": name,
+            "name": name,
+            "org": info["org"],
+            "draft_count": info["draft_count"],
+        })
+
+    return {"nodes": nodes, "edges": edges}
+
+def get_cross_org_data(db: Database, limit: int = 20) -> list[dict]:
+    """Return cross-org collaboration pairs."""
+    rows = db.cross_org_collaborations(limit=limit)
+    return [
+        {"org_a": a, "org_b": b, "shared_drafts": cnt}
+        for a, b, cnt in rows
+    ]
+
+def get_author_network_full(db: Database) -> AuthorNetwork:
+    """Return author network (cached for 5 min)."""
+    return _cached("author_network", lambda: _compute_author_network_full(db))
+
+def _compute_author_network_full(db: Database) -> AuthorNetwork:
+    """Return enriched co-authorship network with avg scores and cluster info.
+
+    Returns {
+        nodes: [{id, name, org, draft_count, avg_score, drafts: [name,...]}],
+        edges: [{source, target, weight}],
+        clusters: [{id, members: [name,...], org_mix: {org: count}, size}],
+    }
+    """
+    pairs = db.coauthor_pairs()
+    top = db.top_authors(limit=500)
+
+    # Build rating lookup for avg scores
+    rated = db.drafts_with_ratings(limit=2000)
+    draft_score = {d.name: r.composite_score for d, r in rated}
+
+    # Author info map
+    author_info = {}
+    for name, aff, cnt, drafts in top:
+        scores = [draft_score[dn] for dn in drafts if dn in draft_score]
+        avg = round(sum(scores) / len(scores), 2) if scores else 0
+        author_info[name] = {
+            "org": aff, "draft_count": cnt, "drafts": drafts, "avg_score": avg
+        }
+
+    # Build node set: authors with meaningful collaboration (2+ shared drafts)
+    node_set = set()
+    edges = []
+    for a, b, shared in pairs:
+        if shared >= 2:
+            node_set.add(a)
+            node_set.add(b)
+            edges.append({"source": a, "target": b, "weight": shared})
+
+    # Also include authors with 3+ drafts even if no co-authorships
+    for name, info in author_info.items():
+        if info["draft_count"] >= 3:
+            node_set.add(name)
+
+    nodes = []
+    for name in node_set:
+        info = author_info.get(name, {"org": "", "draft_count": 1, "drafts": [], "avg_score": 0})
+        nodes.append({
+            "id": name,
+            "name": name,
+            "org": info["org"],
+            "draft_count": info["draft_count"],
+            "avg_score": info["avg_score"],
+            "drafts": info["drafts"][:8],  # cap for JSON size
+        })
+
+    # Cluster detection via connected components (BFS)
+    adjacency: dict[str, set[str]] = defaultdict(set)
+    for e in edges:
+        adjacency[e["source"]].add(e["target"])
+        adjacency[e["target"]].add(e["source"])
+
+    visited: set[str] = set()
+    clusters = []
+
+    # Batch-load all drafts referenced by authors (avoid N+1 in cluster loop)
+    _all_dn = set()
+    for _ai in author_info.values():
+        _all_dn.update(_ai.get("drafts", []))
+    _all_drafts_map = db.get_drafts_by_names(list(_all_dn))
+
+    for node in sorted(node_set):
+        if node in visited:
+            continue
+        component: list[str] = []
+        queue = [node]
+        while queue:
+            current = queue.pop(0)
+            if current in visited:
+                continue
+            visited.add(current)
+            component.append(current)
+            for neighbor in adjacency.get(current, []):
+                if neighbor not in visited:
+                    queue.append(neighbor)
+
+        if len(component) >= 2:
+            org_mix: dict[str, int] = Counter()
+            member_orgs: dict[str, str] = {}
+            cluster_drafts: dict[str, str] = {}  # name -> title
+            for m in component:
+                org = author_info.get(m, {}).get("org", "")
+                if org:
+                    org_mix[org] += 1
+                    member_orgs[m] = org
+                for dn in author_info.get(m, {}).get("drafts", []):
+                    if dn not in cluster_drafts:
+                        d = _all_drafts_map.get(dn)
+                        cluster_drafts[dn] = d.title[:80] if d else dn
+            clusters.append({
+                "id": len(clusters),
+                "members": component,
+                "member_orgs": member_orgs,
+                "org_mix": dict(org_mix.most_common()),
+                "size": len(component),
+                "drafts": [{"name": n, "title": t} for n, t in list(cluster_drafts.items())],
+                "draft_count": len(cluster_drafts),
+            })
+
+    clusters.sort(key=lambda c: c["size"], reverse=True)
+
+    # Generate meaningful names for clusters
+    for cl in clusters:
+        cl["name"] = _author_cluster_name(cl)
+
+    return {"nodes": nodes, "edges": edges, "clusters": clusters}
+
+def _normalize_org(name: str) -> str:
+    """Shorten verbose org names for display."""
+    # Remove common suffixes
+    for suffix in (", Inc.", " Inc.", ", Ltd.", " Ltd.", " Co.", " Technologies",
+                   " Corporation", " Corp.", " Limited", " GmbH", " AG",
+                   " Europe Ltd", " Research", " Systems"):
+        name = name.replace(suffix, "")
+    return name.strip().rstrip(",").rstrip("&").rstrip()
+
+def _author_cluster_name(cluster: dict) -> str:
+    """Derive a meaningful name for an author cluster from orgs and draft titles."""
+    # Org part: top 1-2 orgs, normalized
+    raw_orgs = list(cluster.get("org_mix", {}).keys())
+    orgs = []
+    seen_short: set[str] = set()
+    for o in raw_orgs:
+        short = _normalize_org(o)
+        if short.lower() not in seen_short:
+            seen_short.add(short.lower())
+            orgs.append(short)
+    if len(orgs) >= 2:
+        org_label = f"{orgs[0]} + {orgs[1]}"
+    elif orgs:
+        org_label = orgs[0]
+    else:
+        # Fall back to first member's last name
+        members = cluster.get("members", [])
+        org_label = members[0].split()[-1] if members else "Unknown"
+
+    # Topic part: extract common keywords from draft titles
+    stopwords = {
+        "a", "an", "the", "of", "for", "in", "to", "and", "on", "with",
+        "using", "based", "draft", "internet", "ietf", "protocol", "framework",
+        "requirements", "architecture", "considerations", "use", "cases", "via",
+        "towards", "over", "from", "into", "between", "specification", "extension",
+        "extensions", "mechanisms", "mechanism", "version", "new", "general",
+    }
+    word_counts: Counter = Counter()
+    for d in cluster.get("drafts", []):
+        title = d.get("title", "")
+        words = re.findall(r"[A-Za-z]{3,}", title)
+        for w in words:
+            wl = w.lower()
+            if wl not in stopwords:
+                word_counts[wl] += 1
+
+    # Pick top keyword(s) that appear in multiple drafts
+    top_words = [w for w, c in word_counts.most_common(3) if c >= 2]
+    if not top_words:
+        top_words = [w for w, _ in word_counts.most_common(1)]
+
+    if top_words:
+        topic = " ".join(w.capitalize() for w in top_words[:2])
+        name = f"{org_label} — {topic}"
+    else:
+        name = org_label
+    # Truncate if too long for display
+    return name if len(name) <= 50 else name[:47] + "…"
diff --git a/src/webui/data/drafts.py b/src/webui/data/drafts.py
new file mode 100644
index 0000000..a5d9e45
--- /dev/null
+++ b/src/webui/data/drafts.py
@@ -0,0 +1,381 @@
+"""Draft-related data access functions."""
+from __future__ import annotations
+
+import json
+import re
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import TypedDict
+
+from ietf_analyzer.db import Database
+from ietf_analyzer.readiness import compute_readiness, compute_readiness_batch
+from webui.data._shared import _project_root
+
+
+class OverviewStats(TypedDict):
+    """High-level dashboard statistics from :func:`get_overview_stats`."""
+    total_drafts: int
+    rated_count: int
+    author_count: int
+    idea_count: int
+    gap_count: int
+    input_tokens: int
+    output_tokens: int
+    false_positive_count: int
+
+class DraftListItem(TypedDict):
+    """Single draft in the paginated listing from :func:`get_drafts_page`."""
+    name: str
+    title: str
+    date: str | None
+    url: str
+    pages: int
+    group: str
+    source: str
+    score: float
+    novelty: float
+    maturity: float
+    overlap: float
+    momentum: float
+    relevance: float
+    categories: list[str]
+    summary: str
+    readiness: float
+
+class DraftsPage(TypedDict):
+    """Paginated draft listing from :func:`get_drafts_page`."""
+    drafts: list[DraftListItem]
+    total: int
+    page: int
+    per_page: int
+    pages: int
+
+def get_overview_stats(db: Database) -> OverviewStats:
+    """Return high-level stats for the dashboard home page.
+
+    Excludes drafts flagged as false positives from rated counts.
+    """
+    total_drafts = db.count_drafts(include_false_positives=False)
+    rated_pairs = db.drafts_with_ratings(limit=1000)  # already excludes FPs
+    rated_count = len(rated_pairs)
+    author_count = db.author_count()
+    idea_count = db.idea_count()
+    gaps = db.all_gaps()
+    input_tok, output_tok = db.total_tokens_used()
+
+    # Count false positives separately for transparency
+    total_all = db.count_drafts(include_false_positives=True)
+    false_positive_count = total_all - total_drafts
+
+    return {
+        "total_drafts": total_drafts,
+        "rated_count": rated_count,
+        "author_count": author_count,
+        "idea_count": idea_count,
+        "gap_count": len(gaps),
+        "input_tokens": input_tok,
+        "output_tokens": output_tok,
+        "false_positive_count": false_positive_count,
+    }
+
+def get_category_counts(db: Database) -> dict[str, int]:
+    """Return {category: draft_count} for all categories."""
+    return db.category_counts()
+
+def get_category_summary(db: Database, category: str) -> dict | None:
+    """Build a data-driven summary for a category. Returns None if category not found."""
+    pairs = db.drafts_with_ratings(limit=2000)
+    all_authors = db.top_authors(limit=500)
+
+    # Filter to drafts in this category
+    cat_pairs = [(d, r) for d, r in pairs if category in r.categories]
+    if not cat_pairs:
+        return None
+
+    # Author lookup: draft_name -> [author names]
+    author_drafts_map: dict[str, list[str]] = defaultdict(list)
+    for name, aff, cnt, drafts in all_authors:
+        for dn in drafts:
+            author_drafts_map[dn].append(name)
+
+    # Dimension averages
+    n = len(cat_pairs)
+    avg = lambda vals: round(sum(vals) / len(vals), 1) if vals else 0
+    novelty_vals = [r.novelty for _, r in cat_pairs]
+    maturity_vals = [r.maturity for _, r in cat_pairs]
+    overlap_vals = [r.overlap for _, r in cat_pairs]
+    momentum_vals = [r.momentum for _, r in cat_pairs]
+    relevance_vals = [r.relevance for _, r in cat_pairs]
+    scores = [r.composite_score for _, r in cat_pairs]
+
+    # Top drafts
+    sorted_pairs = sorted(cat_pairs, key=lambda p: p[1].composite_score, reverse=True)
+    top_3 = [(d.name, d.title, round(r.composite_score, 1)) for d, r in sorted_pairs[:3]]
+
+    # Top authors in this category
+    author_counter: Counter = Counter()
+    org_counter: Counter = Counter()
+    author_aff: dict[str, str] = {}
+    for name, aff, cnt, drafts in all_authors:
+        author_aff[name] = aff or ""
+    for d, r in cat_pairs:
+        for a in author_drafts_map.get(d.name, []):
+            author_counter[a] += 1
+            if author_aff.get(a):
+                org_counter[author_aff[a]] += 1
+    top_authors = author_counter.most_common(5)
+    top_orgs = org_counter.most_common(5)
+
+    # Strongest and weakest dimensions
+    dim_avgs = {
+        "Novelty": avg(novelty_vals),
+        "Maturity": avg(maturity_vals),
+        "Overlap": avg(overlap_vals),
+        "Momentum": avg(momentum_vals),
+        "Relevance": avg(relevance_vals),
+    }
+    strongest = max(dim_avgs, key=dim_avgs.get)
+    weakest = min(dim_avgs, key=dim_avgs.get)
+
+    # Activity trend: how many are recent (last 6 months)?
+    recent = sum(1 for d, _ in cat_pairs if d.time and d.time >= "2025-09")
+    total_all = len(pairs)
+
+    # Build text summary
+    lines = []
+    lines.append(f"**{n} drafts** ({n * 100 // total_all}% of all rated drafts) "
+                 f"with an average composite score of **{avg(scores):.1f}/5.0**.")
+
+    # Dimension profile
+    lines.append(f"Strongest dimension: **{strongest}** ({dim_avgs[strongest]}), "
+                 f"weakest: **{weakest}** ({dim_avgs[weakest]}).")
+
+    # Maturity vs novelty insight
+    if dim_avgs["Maturity"] < 2.5 and dim_avgs["Novelty"] >= 3.0:
+        lines.append("This category has **high novelty but low maturity** — many early-stage proposals with fresh ideas that haven't been fully developed yet.")
+    elif dim_avgs["Maturity"] >= 3.0 and dim_avgs["Novelty"] < 2.5:
+        lines.append("This category is **mature but less novel** — established approaches being refined rather than introducing fundamentally new concepts.")
+    elif dim_avgs["Maturity"] >= 3.0 and dim_avgs["Novelty"] >= 3.0:
+        lines.append("This category shows **both high novelty and maturity** — well-developed proposals with genuinely new contributions.")
+
+    # Overlap insight
+    if dim_avgs["Overlap"] >= 3.5:
+        lines.append(f"High overlap ({dim_avgs['Overlap']}) suggests **significant duplication** — multiple drafts cover similar ground, which may indicate convergence or fragmentation.")
+    elif dim_avgs["Overlap"] <= 2.0:
+        lines.append(f"Low overlap ({dim_avgs['Overlap']}) indicates **diverse approaches** — drafts in this category tackle distinct problems with little redundancy.")
+
+    # Activity
+    if recent > 0:
+        lines.append(f"**{recent} draft{'s' if recent != 1 else ''}** submitted in the last 6 months, "
+                     f"suggesting {'active' if recent >= 3 else 'moderate'} development.")
+
+    return {
+        "text": " ".join(lines),
+        "count": n,
+        "avg_score": avg(scores),
+        "dimensions": dim_avgs,
+        "top_drafts": top_3,
+        "top_authors": top_authors,
+        "top_orgs": top_orgs,
+        "strongest": strongest,
+        "weakest": weakest,
+    }
+
+def get_drafts_page(
+    db: Database,
+    page: int = 1,
+    per_page: int = 50,
+    search: str = "",
+    category: str = "",
+    min_score: float = 0.0,
+    sort: str = "score",
+    sort_dir: str = "desc",
+    source: str = "",
+) -> DraftsPage:
+    """Return a paginated, filtered list of drafts with ratings.
+
+    Returns dict with keys: drafts, total, page, per_page, pages.
+    """
+    pairs = db.drafts_with_ratings(limit=1000)
+
+    # Build author lookup for search (draft_name -> "author1 author2 ...")
+    author_text_by_draft: dict[str, str] = {}
+    if search:
+        rows = db.conn.execute(
+            """SELECT da.draft_name, GROUP_CONCAT(a.name, ' ') as names
+               FROM draft_authors da JOIN authors a ON da.person_id = a.person_id
+               GROUP BY da.draft_name"""
+        ).fetchall()
+        for r in rows:
+            author_text_by_draft[r[0]] = r[1] or ""
+
+    # Filter
+    filtered = []
+    for draft, rating in pairs:
+        if min_score > 0 and rating.composite_score < min_score:
+            continue
+        if category and category not in rating.categories:
+            continue
+        if source and draft.source != source:
+            continue
+        if search:
+            author_names = author_text_by_draft.get(draft.name, "")
+            haystack = f"{draft.name} {draft.title} {rating.summary} {author_names}".lower()
+            if not all(w in haystack for w in search.lower().split()):
+                continue
+        filtered.append((draft, rating))
+
+    # Sort
+    sort_keys = {
+        "score": lambda p: p[1].composite_score,
+        "name": lambda p: p[0].name,
+        "date": lambda p: p[0].time or "",
+        "novelty": lambda p: p[1].novelty,
+        "maturity": lambda p: p[1].maturity,
+        "relevance": lambda p: p[1].relevance,
+        "overlap": lambda p: p[1].overlap,
+        "momentum": lambda p: p[1].momentum,
+        "readiness": lambda p: (1.0 if p[0].name.startswith("draft-ietf-") else 0.0) * 0.25 +
+                                min(int(p[0].rev or "0") / 5.0, 1.0) * 0.15 +
+                                ((p[1].momentum - 1) / 4.0) * 0.15,
+    }
+    key_fn = sort_keys.get(sort, sort_keys["score"])
+    reverse = sort_dir == "desc"
+    filtered.sort(key=key_fn, reverse=reverse)
+
+    total = len(filtered)
+    pages = max(1, (total + per_page - 1) // per_page)
+    page = max(1, min(page, pages))
+    start = (page - 1) * per_page
+    page_items = filtered[start : start + per_page]
+
+    # Pre-compute readiness in batch (~6 queries total instead of ~200)
+
+    readiness_cache = compute_readiness_batch(db, [d.name for d, _ in page_items])
+
+    drafts = []
+    for draft, rating in page_items:
+        r_score = readiness_cache.get(draft.name, {}).get("score", 0)
+        drafts.append({
+            "name": draft.name,
+            "title": draft.title,
+            "date": draft.date,
+            "url": draft.source_url if draft.source != "ietf" else draft.datatracker_url,
+            "pages": draft.pages or 0,
+            "group": draft.group or "individual",
+            "source": draft.source or "ietf",
+            "score": round(rating.composite_score, 2),
+            "novelty": rating.novelty,
+            "maturity": rating.maturity,
+            "overlap": rating.overlap,
+            "momentum": rating.momentum,
+            "relevance": rating.relevance,
+            "categories": rating.categories,
+            "summary": rating.summary,
+            "readiness": r_score,
+        })
+
+    return {
+        "drafts": drafts,
+        "total": total,
+        "page": page,
+        "per_page": per_page,
+        "pages": pages,
+    }
+
+def get_draft_detail(db: Database, name: str) -> dict | None:
+    """Return full detail for a single draft."""
+    draft = db.get_draft(name)
+    if not draft:
+        return None
+
+    rating = db.get_rating(name)
+    authors = db.get_authors_for_draft(name)
+    ideas = db.get_ideas_for_draft(name)
+    refs = db.get_refs_for_draft(name)
+
+    result = {
+        "name": draft.name,
+        "title": draft.title,
+        "rev": draft.rev,
+        "abstract": draft.abstract,
+        "date": draft.date,
+        "time": draft.time,
+        "url": draft.datatracker_url,
+        "text_url": draft.text_url,
+        "pages": draft.pages,
+        "words": draft.words,
+        "group": draft.group or "individual",
+        "categories": draft.categories,
+        "tags": draft.tags,
+        "authors": [
+            {"name": a.name, "affiliation": a.affiliation, "person_id": a.person_id}
+            for a in authors
+        ],
+        "ideas": ideas,
+        "refs": [{"type": t, "id": rid} for t, rid in refs],
+    }
+
+    if rating:
+        result["rating"] = {
+            "score": round(rating.composite_score, 2),
+            "novelty": rating.novelty,
+            "maturity": rating.maturity,
+            "overlap": rating.overlap,
+            "momentum": rating.momentum,
+            "relevance": rating.relevance,
+            "summary": rating.summary,
+            "novelty_note": rating.novelty_note,
+            "maturity_note": rating.maturity_note,
+            "overlap_note": rating.overlap_note,
+            "momentum_note": rating.momentum_note,
+            "relevance_note": rating.relevance_note,
+            "categories": rating.categories,
+        }
+
+    # Readiness score
+
+    result["readiness"] = compute_readiness(db, name)
+
+    # Annotation
+    annotation = db.get_annotation(name)
+    result["annotation"] = annotation
+
+    return result
+
+def get_generated_drafts() -> list[dict]:
+    """Return list of pre-generated draft files in data/reports/generated-drafts/."""
+    drafts_dir = _project_root / "data" / "reports" / "generated-drafts"
+    if not drafts_dir.exists():
+        return []
+    results = []
+    for f in sorted(drafts_dir.glob("draft-*.txt")):
+        # Extract title from first non-empty content line after header
+        title = f.stem
+        text = f.read_text(errors="replace")
+        for line in text.splitlines():
+            stripped = line.strip()
+            if stripped and not stripped.startswith("Internet-Draft") and \
+               not stripped.startswith("Intended status") and \
+               not stripped.startswith("Expires:") and stripped != "":
+                title = stripped
+                break
+        results.append({
+            "filename": f.name,
+            "stem": f.stem,
+            "title": title,
+            "size": f.stat().st_size,
+            "path": str(f),
+        })
+    return results
+
+def read_generated_draft(filename: str) -> str | None:
+    """Read a generated draft file by filename. Returns text or None."""
+    drafts_dir = _project_root / "data" / "reports" / "generated-drafts"
+    path = drafts_dir / filename
+    if not path.exists() or not path.is_file():
+        return None
+    # Safety: ensure we're not reading outside the directory
+    if not str(path.resolve()).startswith(str(drafts_dir.resolve())):
+        return None
+    return path.read_text(errors="replace")
diff --git a/src/webui/data/gaps.py b/src/webui/data/gaps.py
new file mode 100644
index 0000000..83b9b05
--- /dev/null
+++ b/src/webui/data/gaps.py
@@ -0,0 +1,20 @@
+"""Gap analysis data access functions."""
+from __future__ import annotations
+
+from ietf_analyzer.db import Database
+
+
+def get_all_gaps(db: Database) -> list[dict]:
+    """Return all gap analysis results, sorted by severity (critical first)."""
+    _sev_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+    gaps = db.all_gaps()
+    gaps.sort(key=lambda g: _sev_order.get(g.get("severity", "low"), 99))
+    return gaps
+
+def get_gap_detail(db: Database, gap_id: int) -> dict | None:
+    """Return a single gap by ID, or None if not found."""
+    gaps = db.all_gaps()
+    for g in gaps:
+        if g["id"] == gap_id:
+            return g
+    return None
diff --git a/src/webui/data/proposals.py b/src/webui/data/proposals.py
new file mode 100644
index 0000000..e9a26f9
--- /dev/null
+++ b/src/webui/data/proposals.py
@@ -0,0 +1,26 @@
+"""Proposal data access functions."""
+from __future__ import annotations
+
+from ietf_analyzer.db import Database
+
+
+def get_all_proposals(db: Database) -> list[dict]:
+    """Return all proposals with linked gap info."""
+    proposals = db.all_proposals()
+    gaps = {g["id"]: g for g in db.all_gaps()}
+    for p in proposals:
+        p["gaps"] = [gaps[gid] for gid in p.get("gap_ids", []) if gid in gaps]
+    return proposals
+
+def get_proposal_detail(db: Database, proposal_id: int) -> dict | None:
+    """Return a single proposal with full gap details."""
+    p = db.get_proposal(proposal_id)
+    if not p:
+        return None
+    gaps = {g["id"]: g for g in db.all_gaps()}
+    p["gaps"] = [gaps[gid] for gid in p.get("gap_ids", []) if gid in gaps]
+    return p
+
+def get_proposals_for_gap(db: Database, gap_id: int) -> list[dict]:
+    """Return proposals linked to a specific gap."""
+    return db.get_proposals_for_gap(gap_id)
diff --git a/src/webui/data/ratings.py b/src/webui/data/ratings.py
new file mode 100644
index 0000000..172954e
--- /dev/null
+++ b/src/webui/data/ratings.py
@@ -0,0 +1,155 @@
+"""Rating-related data access functions."""
+from __future__ import annotations
+
+import json
+from collections import Counter, defaultdict
+
+from ietf_analyzer.db import Database
+
+
+def get_rating_distributions(db: Database) -> dict:
+    """Return arrays for each rating dimension, suitable for Plotly."""
+    pairs = db.drafts_with_ratings(limit=1000)
+    dims = {
+        "novelty": [],
+        "maturity": [],
+        "overlap": [],
+        "momentum": [],
+        "relevance": [],
+        "scores": [],
+        "categories": [],
+        "names": [],
+        "sources": [],
+    }
+    for draft, rating in pairs:
+        dims["novelty"].append(rating.novelty)
+        dims["maturity"].append(rating.maturity)
+        dims["overlap"].append(rating.overlap)
+        dims["momentum"].append(rating.momentum)
+        dims["relevance"].append(rating.relevance)
+        dims["scores"].append(round(rating.composite_score, 2))
+        dims["categories"].append(rating.categories[0] if rating.categories else "Other")
+        dims["names"].append(draft.name)
+        dims["sources"].append(getattr(draft, "source", "ietf") or "ietf")
+    return dims
+
+def get_category_radar_data(db: Database) -> dict:
+    """Return average rating profiles per category for radar chart."""
+    pairs = db.drafts_with_ratings(limit=1000)
+    cat_ratings: dict[str, list] = defaultdict(list)
+    for _, r in pairs:
+        for c in r.categories:
+            cat_ratings[c].append(r)
+
+    top_cats = sorted(cat_ratings.keys(), key=lambda c: len(cat_ratings[c]), reverse=True)[:8]
+    result = {}
+    for cat in top_cats:
+        ratings = cat_ratings[cat]
+        n = len(ratings)
+        result[cat] = {
+            "count": n,
+            "novelty": round(sum(r.novelty for r in ratings) / n, 2),
+            "maturity": round(sum(r.maturity for r in ratings) / n, 2),
+            "relevance": round(sum(r.relevance for r in ratings) / n, 2),
+            "momentum": round(sum(r.momentum for r in ratings) / n, 2),
+            "low_overlap": round(sum(6 - r.overlap for r in ratings) / n, 2),
+        }
+    return result
+
+def get_score_histogram(db: Database) -> list[float]:
+    """Return list of composite scores for histogram."""
+    pairs = db.drafts_with_ratings(limit=1000)
+    return [round(r.composite_score, 2) for _, r in pairs]
+
+def get_false_positive_profile(db: Database) -> dict:
+    """Profile drafts flagged as false positives."""
+    # Get false positives
+    fp_rows = db.false_positive_drafts_raw()
+
+    # Get non-FP rated drafts for comparison
+    nonfp_rows = db.non_false_positive_ratings_raw()
+
+    total_rated = db.rated_count()
+    total_drafts = db.count_drafts(include_false_positives=True)
+
+    # Build FP list
+    fp_list = []
+    fp_categories: Counter = Counter()
+    fp_sources: Counter = Counter()
+    fp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
+
+    for row in fp_rows:
+        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
+        src = row["source"] or "ietf"
+        fp_list.append({
+            "name": row["name"],
+            "title": row["title"],
+            "source": src,
+            "categories": cats,
+            "relevance": row["relevance"],
+            "novelty": row["novelty"],
+            "maturity": row["maturity"],
+            "overlap": row["overlap"],
+            "momentum": row["momentum"],
+            "summary": row["summary"] or "",
+        })
+        for cat in cats:
+            fp_categories[cat] += 1
+        fp_sources[src] += 1
+        fp_dims["novelty"].append(row["novelty"])
+        fp_dims["maturity"].append(row["maturity"])
+        fp_dims["overlap"].append(row["overlap"])
+        fp_dims["momentum"].append(row["momentum"])
+        fp_dims["relevance"].append(row["relevance"])
+
+    # Non-FP dimensions for comparison
+    nonfp_dims = {"novelty": [], "maturity": [], "overlap": [], "momentum": [], "relevance": []}
+    nonfp_categories: Counter = Counter()
+    for row in nonfp_rows:
+        nonfp_dims["novelty"].append(row["novelty"])
+        nonfp_dims["maturity"].append(row["maturity"])
+        nonfp_dims["overlap"].append(row["overlap"])
+        nonfp_dims["momentum"].append(row["momentum"])
+        nonfp_dims["relevance"].append(row["relevance"])
+        cats = json.loads(row["r_categories"]) if row["r_categories"] else []
+        for cat in cats:
+            nonfp_categories[cat] += 1
+
+    # Top terms from FP abstracts
+    from collections import Counter as _Counter
+    stop_words = {
+        "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
+        "of", "with", "by", "from", "is", "it", "that", "this", "are", "was",
+        "be", "as", "can", "may", "will", "not", "has", "have", "been", "which",
+        "their", "its", "also", "such", "these", "would", "should", "could",
+        "more", "other", "than", "into", "about", "between", "over", "after",
+        "all", "one", "two", "new", "they", "we", "our", "each", "some", "any",
+        "there", "what", "when", "how", "where", "who", "does", "do", "did",
+        "no", "if", "so", "up", "out", "only", "used", "using", "use", "based",
+        "through", "both", "well", "within", "must", "while", "had", "were",
+    }
+    word_counter: Counter = Counter()
+    for row in fp_rows:
+        abstract = (row["abstract"] or "").lower()
+        title = (row["title"] or "").lower()
+        text = abstract + " " + title
+        words = re.findall(r'[a-z]{3,}', text)
+        for w in words:
+            if w not in stop_words:
+                word_counter[w] += 1
+    top_terms = word_counter.most_common(30)
+
+    return {
+        "count": len(fp_list),
+        "total_rated": total_rated,
+        "total_drafts": total_drafts,
+        "pct_of_total": round(100 * len(fp_list) / total_drafts, 1) if total_drafts else 0,
+        "pct_of_rated": round(100 * len(fp_list) / total_rated, 1) if total_rated else 0,
+        "fp_list": fp_list,
+        "fp_categories": dict(fp_categories.most_common()),
+        "fp_sources": dict(fp_sources.most_common()),
+        "fp_dims": fp_dims,
+        "nonfp_dims": nonfp_dims,
+        "top_terms": top_terms,
+        "nonfp_categories": dict(nonfp_categories.most_common(20)),
+    }
diff --git a/src/webui/data/search.py b/src/webui/data/search.py
new file mode 100644
index 0000000..af744b7
--- /dev/null
+++ b/src/webui/data/search.py
@@ -0,0 +1,107 @@
+"""Search and Q&A data access functions."""
+from __future__ import annotations
+
+import re
+from typing import TypedDict
+
+from ietf_analyzer.config import Config
+from ietf_analyzer.db import Database
+from ietf_analyzer.search import HybridSearch
+
+
+class SearchResults(TypedDict):
+    """Global search results from :func:`global_search`."""
+    drafts: list[dict]
+    ideas: list[dict]
+    authors: list[dict]
+    gaps: list[dict]
+
+def global_search(db: Database, query: str) -> SearchResults:
+    """Search across drafts (FTS5), ideas, authors, and gaps.
+
+    Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}.
+    """
+    results: dict = {"drafts": [], "ideas": [], "authors": [], "gaps": []}
+    if not query or not query.strip():
+        return results
+
+    q = query.strip()
+
+    # 1. Drafts via FTS5
+    try:
+        fts_query = re.sub(r'[^\w\s]', '', q)
+        fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE)
+        fts_query = re.sub(r'\s+', ' ', fts_query).strip()
+        if not fts_query:
+            raise ValueError("empty query after sanitization")
+        rows = db.conn.execute(
+            """SELECT d.name, d.title, d.abstract, d.time, d."group"
+            FROM drafts d
+            JOIN drafts_fts f ON d.rowid = f.rowid
+            WHERE drafts_fts MATCH ?
+            ORDER BY rank
+            LIMIT 50""",
+            (fts_query,),
+        ).fetchall()
+        for r in rows:
+            results["drafts"].append({
+                "name": r["name"],
+                "title": r["title"],
+                "abstract": (r["abstract"] or "")[:200],
+                "date": r["time"],
+                "group": r["group"] or "individual",
+            })
+    except Exception:
+        # FTS5 match can fail on certain query syntax; fall back to LIKE
+        like = f"%{q}%"
+        rows = db.conn.execute(
+            """SELECT name, title, abstract, time, "group" FROM drafts
+            WHERE title LIKE ? OR name LIKE ? OR abstract LIKE ?
+            LIMIT 50""",
+            (like, like, like),
+        ).fetchall()
+        for r in rows:
+            results["drafts"].append({
+                "name": r["name"],
+                "title": r["title"],
+                "abstract": (r["abstract"] or "")[:200],
+                "date": r["time"],
+                "group": r["group"] or "individual",
+            })
+
+    # 2. Ideas via LIKE
+    like = f"%{q}%"
+    rows = db.conn.execute(
+        """SELECT id, title, description, idea_type, draft_name FROM ideas
+        WHERE title LIKE ? OR description LIKE ?
+        ORDER BY id LIMIT 50""",
+        (like, like),
+    ).fetchall()
+    for r in rows:
+        results["ideas"].append({
+            "id": r["id"],
+            "title": r["title"],
+            "description": (r["description"] or "")[:200],
+            "type": r["idea_type"],
+            "draft_name": r["draft_name"],
+        })
+
+    # 3. Authors via LIKE
+    results["authors"] = db.search_authors(q, limit=50)
+
+    # 4. Gaps via LIKE
+    results["gaps"] = db.search_gaps(q, limit=50)
+
+    return results
+
+def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
+    """Search-only (free) — returns sources + cached answer if available."""
+    config = Config.load()
+    searcher = HybridSearch(config, db)
+    return searcher.search_only(question, top_k=top_k)
+
+def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
+    """Run Claude synthesis (costs tokens, result is cached permanently)."""
+    config = Config.load()
+    searcher = HybridSearch(config, db)
+    return searcher.ask(question, top_k=top_k, cheap=cheap)
diff --git a/src/webui/templates/errors/404.html b/src/webui/templates/errors/404.html
new file mode 100644
index 0000000..8ca19ee
--- /dev/null
+++ b/src/webui/templates/errors/404.html
@@ -0,0 +1,23 @@
+{% extends "base.html" %}
+
+{% block title %}404 — Not Found{% endblock %}
+
+{% block content %}
+<div class="flex items-center justify-center min-h-[60vh]">
+    <div class="text-center max-w-lg">
+        <h1 class="text-8xl font-bold text-gray-600 mb-4">404</h1>
+        <h2 class="text-2xl font-semibold text-gray-300 mb-4">Page Not Found</h2>
+        <p class="text-gray-400 mb-8">
+            The page you're looking for doesn't exist or has been moved.
+        </p>
+        <div class="flex gap-4 justify-center">
+            <a href="/" class="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg transition">
+                Back to Overview
+            </a>
+            <a href="/search" class="px-6 py-2 bg-gray-700 hover:bg-gray-600 text-gray-200 rounded-lg transition">
+                Search Drafts
+            </a>
+        </div>
+    </div>
+</div>
+{% endblock %}
diff --git a/src/webui/templates/errors/500.html b/src/webui/templates/errors/500.html
new file mode 100644
index 0000000..59bb879
--- /dev/null
+++ b/src/webui/templates/errors/500.html
@@ -0,0 +1,20 @@
+{% extends "base.html" %}
+
+{% block title %}500 — Server Error{% endblock %}
+
+{% block content %}
+<div class="flex items-center justify-center min-h-[60vh]">
+    <div class="text-center max-w-lg">
+        <h1 class="text-8xl font-bold text-gray-600 mb-4">500</h1>
+        <h2 class="text-2xl font-semibold text-gray-300 mb-4">Internal Server Error</h2>
+        <p class="text-gray-400 mb-8">
+            Something went wrong on our end. Please try again later.
+        </p>
+        <div class="flex gap-4 justify-center">
+            <a href="/" class="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg transition">
+                Back to Overview
+            </a>
+        </div>
+    </div>
+</div>
+{% endblock %}