Run pipeline, write Post 08, commit untracked files

Pipeline: - Extract ideas for 38 new drafts → 462 ideas total - Convergence analysis: 132 cross-org convergent ideas (33% rate) - Fetch authors for 102 drafts → 709 authors (up from 403) - Refresh gap analysis: 12 gaps across full 474-draft corpus - Update verified counts with new totals Post 08: - Complete rewrite of "Agents Building the Agent Analysis" (2,953 words) - Covers 3 phases: writing team → review cycle → fix cycle - Meta-irony table mapping team coordination to IETF gap names - Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch) Untracked files committed: - scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui - src/ietf_analyzer/classifier.py: two-stage Ollama classifier - src/webui/: analytics (GDPR-compliant), auth, obsidian_export - tests/test_obsidian_export.py (10 tests) - data/reports/: wg-analysis, generated draft for gap #37 Housekeeping: - .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:31:30 +01:00
parent 20c45a7eba
commit e247bfef8f
19 changed files with 2758 additions and 586 deletions
--- a/src/webui/analytics.py
+++ b/src/webui/analytics.py
@@ -0,0 +1,244 @@
+"""Lightweight, GDPR-compliant analytics using SQLite.
+
+No cookies, no personal data, no consent needed.
+Visitor uniqueness is estimated via daily-salted IP hash (not stored raw).
+Data lives in a separate analytics.db to keep the main DB clean.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import sqlite3
+import time
+from collections import Counter, defaultdict
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from urllib.parse import urlparse
+
+from flask import Flask, request, g
+
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS page_views (
+    id        INTEGER PRIMARY KEY AUTOINCREMENT,
+    ts        TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
+    date      TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
+    path      TEXT    NOT NULL,
+    referrer  TEXT,
+    visitor   TEXT,
+    ua_type   TEXT
+);
+
+CREATE INDEX IF NOT EXISTS idx_pv_date ON page_views(date);
+CREATE INDEX IF NOT EXISTS idx_pv_path ON page_views(path);
+
+CREATE TABLE IF NOT EXISTS downloads (
+    id        INTEGER PRIMARY KEY AUTOINCREMENT,
+    ts        TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
+    date      TEXT    NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
+    file_type TEXT    NOT NULL,
+    visitor   TEXT
+);
+
+CREATE INDEX IF NOT EXISTS idx_dl_date ON downloads(date);
+"""
+
+# Daily salt rotates so yesterday's hashes can't be correlated with today's
+_daily_salt: tuple[str, str] = ("", "")
+
+
+def _get_salt() -> str:
+    global _daily_salt
+    today = date.today().isoformat()
+    if _daily_salt[0] != today:
+        _daily_salt = (today, hashlib.sha256(f"ietf-analytics-{today}".encode()).hexdigest()[:16])
+    return _daily_salt[1]
+
+
+def _hash_visitor(ip: str) -> str:
+    """Create a daily-rotating hash from IP. Cannot be reversed or correlated across days."""
+    salt = _get_salt()
+    return hashlib.sha256(f"{salt}:{ip}".encode()).hexdigest()[:12]
+
+
+def _classify_ua(ua: str) -> str:
+    """Rough bot/browser classification."""
+    ua_lower = ua.lower()
+    if any(b in ua_lower for b in ("bot", "spider", "crawl", "slurp", "wget", "curl", "python-requests")):
+        return "bot"
+    if "mobile" in ua_lower:
+        return "mobile"
+    return "browser"
+
+
+def _get_analytics_db() -> sqlite3.Connection:
+    """Get or create the analytics DB connection for this request."""
+    if "analytics_db" not in g:
+        db_path = Path(request.app_root or ".").parent.parent.parent / "data" / "analytics.db"
+        # Fall back to app config if available
+        if hasattr(g, "_analytics_db_path"):
+            db_path = g._analytics_db_path
+        db_path.parent.mkdir(parents=True, exist_ok=True)
+        conn = sqlite3.connect(str(db_path))
+        conn.row_factory = sqlite3.Row
+        conn.executescript(_SCHEMA)
+        g.analytics_db = conn
+    return g.analytics_db
+
+
+# Paths to skip (static assets, API calls, etc.)
+_SKIP_PREFIXES = ("/static/", "/api/", "/favicon", "/robots.txt", "/admin/")
+
+
+def init_analytics(app: Flask, db_path: str | None = None):
+    """Register analytics hooks on the Flask app."""
+
+    _resolved_db_path = Path(db_path) if db_path else (
+        Path(app.root_path).parent.parent / "data" / "analytics.db"
+    )
+
+    @app.before_request
+    def track_pageview():
+        path = request.path
+
+        # Skip static/API/admin routes
+        if any(path.startswith(p) for p in _SKIP_PREFIXES):
+            return
+
+        g._analytics_db_path = _resolved_db_path
+
+        try:
+            conn = _get_analytics_db()
+            ip = request.remote_addr or "unknown"
+            visitor = _hash_visitor(ip)
+            ua = request.headers.get("User-Agent", "")
+            ua_type = _classify_ua(ua)
+
+            # Skip bots from page view counts (still track downloads)
+            if ua_type == "bot" and path != "/export/obsidian":
+                return
+
+            referrer = request.headers.get("Referer", "")
+            # Only keep the domain of referrer
+            if referrer:
+                try:
+                    parsed = urlparse(referrer)
+                    referrer = parsed.netloc or ""
+                except Exception:
+                    referrer = ""
+
+            # Track downloads separately
+            if path == "/export/obsidian":
+                conn.execute(
+                    "INSERT INTO downloads (file_type, visitor) VALUES (?, ?)",
+                    ("obsidian", visitor),
+                )
+                conn.commit()
+
+            conn.execute(
+                "INSERT INTO page_views (path, referrer, visitor, ua_type) VALUES (?, ?, ?, ?)",
+                (path, referrer, visitor, ua_type),
+            )
+            conn.commit()
+        except Exception:
+            pass  # Analytics should never break the app
+
+    @app.teardown_appcontext
+    def close_analytics_db(exception=None):
+        conn = g.pop("analytics_db", None)
+        if conn is not None:
+            conn.close()
+
+
+def get_analytics_data(db_path: str | Path) -> dict:
+    """Query analytics data for the dashboard. Returns dicts ready for rendering."""
+    conn = sqlite3.connect(str(db_path))
+    conn.row_factory = sqlite3.Row
+    conn.executescript(_SCHEMA)
+
+    today = date.today()
+    week_ago = (today - timedelta(days=7)).isoformat()
+    month_ago = (today - timedelta(days=30)).isoformat()
+
+    # --- Overall stats ---
+    total_views = conn.execute("SELECT COUNT(*) FROM page_views").fetchone()[0]
+    total_visitors = conn.execute("SELECT COUNT(DISTINCT visitor || date) FROM page_views").fetchone()[0]
+    total_downloads = conn.execute("SELECT COUNT(*) FROM downloads").fetchone()[0]
+
+    today_views = conn.execute(
+        "SELECT COUNT(*) FROM page_views WHERE date = ?", (today.isoformat(),)
+    ).fetchone()[0]
+    today_visitors = conn.execute(
+        "SELECT COUNT(DISTINCT visitor) FROM page_views WHERE date = ?", (today.isoformat(),)
+    ).fetchone()[0]
+
+    week_views = conn.execute(
+        "SELECT COUNT(*) FROM page_views WHERE date >= ?", (week_ago,)
+    ).fetchone()[0]
+    month_views = conn.execute(
+        "SELECT COUNT(*) FROM page_views WHERE date >= ?", (month_ago,)
+    ).fetchone()[0]
+
+    # --- Daily views (last 30 days) ---
+    daily_rows = conn.execute(
+        "SELECT date, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
+        "FROM page_views WHERE date >= ? GROUP BY date ORDER BY date",
+        (month_ago,),
+    ).fetchall()
+    daily = {
+        "dates": [r["date"] for r in daily_rows],
+        "views": [r["views"] for r in daily_rows],
+        "visitors": [r["visitors"] for r in daily_rows],
+    }
+
+    # --- Top pages (last 30 days) ---
+    page_rows = conn.execute(
+        "SELECT path, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
+        "FROM page_views WHERE date >= ? GROUP BY path ORDER BY views DESC LIMIT 20",
+        (month_ago,),
+    ).fetchall()
+    top_pages = [{"path": r["path"], "views": r["views"], "visitors": r["visitors"]} for r in page_rows]
+
+    # --- Top referrers (last 30 days) ---
+    ref_rows = conn.execute(
+        "SELECT referrer, COUNT(*) as count FROM page_views "
+        "WHERE date >= ? AND referrer != '' GROUP BY referrer ORDER BY count DESC LIMIT 15",
+        (month_ago,),
+    ).fetchall()
+    top_referrers = [{"referrer": r["referrer"], "count": r["count"]} for r in ref_rows]
+
+    # --- Downloads over time ---
+    dl_rows = conn.execute(
+        "SELECT date, COUNT(*) as count FROM downloads GROUP BY date ORDER BY date"
+    ).fetchall()
+    downloads_daily = {
+        "dates": [r["date"] for r in dl_rows],
+        "counts": [r["count"] for r in dl_rows],
+    }
+
+    # --- Hourly pattern (last 7 days) ---
+    hourly_rows = conn.execute(
+        "SELECT CAST(strftime('%H', ts) AS INTEGER) as hour, COUNT(*) as views "
+        "FROM page_views WHERE date >= ? GROUP BY hour ORDER BY hour",
+        (week_ago,),
+    ).fetchall()
+    hourly = {r["hour"]: r["views"] for r in hourly_rows}
+    hourly_full = {"hours": list(range(24)), "views": [hourly.get(h, 0) for h in range(24)]}
+
+    conn.close()
+
+    return {
+        "stats": {
+            "total_views": total_views,
+            "total_visitors": total_visitors,
+            "total_downloads": total_downloads,
+            "today_views": today_views,
+            "today_visitors": today_visitors,
+            "week_views": week_views,
+            "month_views": month_views,
+        },
+        "daily": daily,
+        "top_pages": top_pages,
+        "top_referrers": top_referrers,
+        "downloads_daily": downloads_daily,
+        "hourly": hourly_full,
+    }
--- a/src/webui/auth.py
+++ b/src/webui/auth.py
@@ -0,0 +1,55 @@
+"""Admin authentication with two run modes.
+
+Production (default):
+    python src/webui/app.py
+    All admin routes return 404. No way to access private features.
+
+Development:
+    python src/webui/app.py --dev
+    Every request is auto-authenticated as admin. No login needed.
+
+The mode is set once at startup and cannot be changed at runtime.
+"""
+
+from __future__ import annotations
+
+from functools import wraps
+
+from flask import abort, g
+
+# Module-level flag set by init_auth()
+_dev_mode: bool = False
+_initialized: bool = False
+
+
+def is_admin() -> bool:
+    """Check if the current request has admin access."""
+    return _dev_mode
+
+
+def admin_required(f):
+    """Decorator: returns 404 for non-admin users so routes stay hidden."""
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        if not is_admin():
+            abort(404)
+        return f(*args, **kwargs)
+    return decorated
+
+
+def init_auth(app, dev: bool = False):
+    """Set the auth mode and register Flask hooks (once only)."""
+    global _dev_mode, _initialized
+    _dev_mode = dev
+
+    if _initialized:
+        return
+    _initialized = True
+
+    @app.before_request
+    def set_admin_flag():
+        g.is_admin = is_admin()
+
+    @app.context_processor
+    def inject_admin():
+        return {"is_admin": g.get("is_admin", False)}
--- a/src/webui/obsidian_export.py
+++ b/src/webui/obsidian_export.py
@@ -0,0 +1,508 @@
+"""Export research data as an Obsidian-compatible vault (ZIP).
+
+Generates interlinked markdown files with YAML frontmatter,
+[[wikilinks]], #tags, and Mermaid diagrams that Obsidian renders natively.
+"""
+
+from __future__ import annotations
+
+import io
+import zipfile
+from collections import Counter, defaultdict
+from datetime import date
+
+from ietf_analyzer.db import Database
+
+from webui.data import _extract_month
+
+
+def _safe_filename(name: str) -> str:
+    """Sanitize a string for use as a filename."""
+    return name.replace("/", "-").replace("\\", "-").replace(":", "-").replace('"', "")
+
+
+def _score_bar(val: float, max_val: float = 5.0) -> str:
+    """Render a simple text progress bar."""
+    filled = round(val / max_val * 10)
+    return "`" + "\u2588" * filled + "\u2591" * (10 - filled) + f"` {val}/{max_val}"
+
+
+def _mermaid_pie(title: str, data: dict[str, int], limit: int = 12) -> str:
+    """Generate a Mermaid pie chart."""
+    items = list(data.items())[:limit]
+    if not items:
+        return ""
+    lines = [f'```mermaid\npie title {title}']
+    for label, count in items:
+        safe_label = label.replace('"', "'")
+        lines.append(f'    "{safe_label}" : {count}')
+    lines.append("```")
+    return "\n".join(lines)
+
+
+def _mermaid_bar(title: str, data: dict[str, float], limit: int = 15) -> str:
+    """Generate a Mermaid xychart bar chart."""
+    items = list(data.items())[:limit]
+    if not items:
+        return ""
+    labels = [f'"{k[:20]}"' for k, _ in items]
+    values = [str(round(v, 1)) for _, v in items]
+    return f"""```mermaid
+xychart-beta
+    title "{title}"
+    x-axis [{", ".join(labels)}]
+    y-axis "Score"
+    bar [{", ".join(values)}]
+```"""
+
+
+def _mermaid_timeline_chart(monthly: dict[str, int]) -> str:
+    """Generate a Mermaid xychart for submissions over time."""
+    if len(monthly) < 2:
+        return ""
+    months = sorted(monthly.keys())
+    # Show every 3rd label to avoid clutter
+    labels = []
+    for i, m in enumerate(months):
+        if i % 3 == 0:
+            labels.append(f'"{m}"')
+        else:
+            labels.append('" "')
+    values = [str(monthly[m]) for m in months]
+    return f"""```mermaid
+xychart-beta
+    title "Draft Submissions Over Time"
+    x-axis [{", ".join(labels)}]
+    y-axis "Drafts"
+    bar [{", ".join(values)}]
+```"""
+
+
+def build_obsidian_vault(db: Database) -> bytes:
+    """Build a ZIP file containing an Obsidian vault with all research data."""
+    buf = io.BytesIO()
+    prefix = "IETF-AI-Agent-Drafts"
+
+    pairs = db.drafts_with_ratings(limit=2000)
+    all_drafts_list = db.list_drafts(limit=2000, order_by="time DESC")
+    draft_map = {d.name: d for d in all_drafts_list}
+    all_ideas = db.all_ideas()
+    all_authors = db.top_authors(limit=500)
+
+    # Build lookup maps
+    cat_counts: Counter = Counter()
+    cat_drafts: dict[str, list[str]] = defaultdict(list)
+    score_map: dict[str, float] = {}
+    rating_map: dict[str, object] = {}
+
+    for d, r in pairs:
+        score_map[d.name] = r.composite_score
+        rating_map[d.name] = r
+        for cat in r.categories:
+            cat_counts[cat] += 1
+            cat_drafts[cat].append(d.name)
+
+    # Monthly submission counts
+    monthly: Counter = Counter()
+    for d in all_drafts_list:
+        monthly[_extract_month(d.time)] += 1
+
+    # Ideas by draft
+    ideas_by_draft: dict[str, list[dict]] = defaultdict(list)
+    for idea in all_ideas:
+        ideas_by_draft[idea.get("draft_name", "")].append(idea)
+
+    # Author info by draft
+    author_drafts: dict[str, list[str]] = defaultdict(list)
+    author_info: dict[str, dict] = {}
+    for name, aff, cnt, drafts in all_authors:
+        author_info[name] = {"affiliation": aff or "", "draft_count": cnt, "drafts": drafts}
+        for dn in drafts:
+            author_drafts[dn].append(name)
+
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
+
+        # --- Dashboard.md ---
+        top_rated = sorted(pairs, key=lambda p: p[1].composite_score, reverse=True)[:15]
+        top_table = "| Draft | Score | Category |\n|---|---|---|\n"
+        for d, r in top_rated:
+            score = r.composite_score
+            cat = r.categories[0] if r.categories else ""
+            top_table += f"| [[{d.name}]] | **{score:.2f}** | {cat} |\n"
+
+        cat_pie = _mermaid_pie("Drafts by Category", dict(cat_counts.most_common(12)))
+        timeline_chart = _mermaid_timeline_chart(dict(sorted(monthly.items())))
+
+        # Score distribution as mermaid
+        score_buckets: Counter = Counter()
+        for _, r in pairs:
+            bucket = f"{r.composite_score:.0f}"
+            score_buckets[bucket] += 1
+        score_dist = dict(sorted(score_buckets.items()))
+
+        dashboard = f"""---
+tags: [dashboard, ietf, ai-agents]
+generated: {date.today().isoformat()}
+---
+
+# IETF AI/Agent Draft Analysis
+
+> Automated analysis of {len(all_drafts_list)} Internet-Drafts on AI and agent topics.
+> Generated by [IETF Draft Analyzer](https://github.com) on {date.today().isoformat()}.
+
+## Key Stats
+
+| Metric | Value |
+|---|---|
+| Total Drafts | **{len(all_drafts_list)}** |
+| Rated Drafts | **{len(pairs)}** |
+| Authors | **{len(all_authors)}** |
+| Ideas Extracted | **{len(all_ideas)}** |
+| Categories | **{len(cat_counts)}** |
+
+## Categories
+
+{cat_pie}
+
+### Category Index
+
+{chr(10).join(f"- [[{cat}]] ({count} drafts)" for cat, count in cat_counts.most_common())}
+
+## Submissions Over Time
+
+{timeline_chart}
+
+## Top Rated Drafts
+
+{top_table}
+
+## Navigation
+
+- **[[Categories/index|Categories]]** — Browse by topic
+- **[[Authors/index|Authors]]** — Browse by author
+- **[[Analysis/Score Distribution|Score Distribution]]** — Rating analytics
+- **[[Analysis/Top Rated|Top Rated]]** — Highest-scored drafts
+- **[[Analysis/Ideas Overview|Ideas]]** — Extracted technical ideas
+- **[[Analysis/Glossary|Glossary]]** — Terms, abbreviations, and scoring methodology
+"""
+        zf.writestr(f"{prefix}/Dashboard.md", dashboard)
+
+        # --- Individual Draft Notes ---
+        for d_obj in all_drafts_list:
+            name = d_obj.name
+            draft = draft_map.get(name, d_obj)
+            r = rating_map.get(name)
+            ideas = ideas_by_draft.get(name, [])
+            authors = author_drafts.get(name, [])
+            month = _extract_month(draft.time)
+
+            # Frontmatter
+            fm_lines = [
+                "---",
+                f'title: "{(draft.title or name).replace(chr(34), chr(39))}"',
+                f"date: {draft.time or 'unknown'}",
+                f"rev: {draft.rev or '00'}",
+            ]
+            if r:
+                fm_lines.append(f"score: {r.composite_score:.2f}")
+                fm_lines.append(f"novelty: {r.novelty}")
+                fm_lines.append(f"maturity: {r.maturity}")
+                fm_lines.append(f"overlap: {r.overlap}")
+                fm_lines.append(f"momentum: {r.momentum}")
+                fm_lines.append(f"relevance: {r.relevance}")
+                if r.categories:
+                    fm_lines.append(f"categories: [{', '.join(r.categories)}]")
+            if authors:
+                fm_lines.append(f"authors: [{', '.join(a.replace(',', '') for a in authors)}]")
+            fm_lines.append(f"tags: [draft, ietf, {month}]")
+            fm_lines.append("---")
+            frontmatter = "\n".join(fm_lines)
+
+            # Body
+            body = f"\n# {draft.title or name}\n\n"
+            body += f"**{name}** | rev {draft.rev or '00'} | {draft.time or 'unknown'}\n\n"
+
+            if authors:
+                body += "## Authors\n\n"
+                body += "\n".join(f"- [[{a}]]" for a in authors) + "\n\n"
+
+            if r:
+                body += "## Rating\n\n"
+                body += f"**Composite Score: {r.composite_score:.2f}**\n\n"
+                body += f"| Dimension | Score |\n|---|---|\n"
+                body += f"| Novelty | {_score_bar(r.novelty)} |\n"
+                body += f"| Maturity | {_score_bar(r.maturity)} |\n"
+                body += f"| Overlap | {_score_bar(r.overlap)} |\n"
+                body += f"| Momentum | {_score_bar(r.momentum)} |\n"
+                body += f"| Relevance | {_score_bar(r.relevance)} |\n\n"
+                if r.summary:
+                    body += f"> {r.summary}\n\n"
+                if r.categories:
+                    body += "**Categories:** " + ", ".join(f"[[{c}]]" for c in r.categories) + "\n\n"
+
+            if draft.abstract:
+                body += "## Abstract\n\n"
+                body += draft.abstract + "\n\n"
+
+            if ideas:
+                body += f"## Extracted Ideas ({len(ideas)})\n\n"
+                for idea in ideas:
+                    novelty = f" `N:{idea.get('novelty_score', '?')}`" if idea.get("novelty_score") else ""
+                    itype = f" *{idea.get('type', '')}*" if idea.get("type") else ""
+                    body += f"- **{idea.get('title', 'Untitled')}**{itype}{novelty}\n"
+                    if idea.get("description"):
+                        body += f"  {idea['description']}\n"
+                body += "\n"
+
+            body += "## Links\n\n"
+            body += f"- [View on IETF Datatracker](https://datatracker.ietf.org/doc/{name}/)\n"
+            if draft.rev:
+                body += f"- [Read Full Text](https://www.ietf.org/archive/id/{name}-{draft.rev}.txt)\n"
+
+            content = frontmatter + body
+            zf.writestr(f"{prefix}/Drafts/{_safe_filename(name)}.md", content)
+
+        # --- Author Notes ---
+        author_index_lines = [
+            "---\ntags: [index, authors]\n---\n",
+            "# Authors\n\n",
+            f"**{len(all_authors)}** authors contributing to AI/agent Internet-Drafts.\n\n",
+            "| Author | Affiliation | Drafts |\n|---|---|---|\n",
+        ]
+        for name, aff, cnt, drafts in sorted(all_authors, key=lambda x: x[2], reverse=True):
+            author_index_lines.append(f"| [[{name}]] | {aff or ''} | {cnt} |\n")
+        zf.writestr(f"{prefix}/Authors/index.md", "".join(author_index_lines))
+
+        for name, aff, cnt, drafts in all_authors:
+            fm = f"---\ntags: [author]\naffiliation: \"{aff or ''}\"\ndraft_count: {cnt}\n---\n"
+            body = f"\n# {name}\n\n"
+            if aff:
+                body += f"**Affiliation:** {aff}\n\n"
+            body += f"## Drafts ({cnt})\n\n"
+            for dn in drafts:
+                d = draft_map.get(dn)
+                title = d.title if d else dn
+                score = score_map.get(dn, "")
+                score_str = f" (score: {score:.2f})" if score else ""
+                body += f"- [[{dn}|{title}]]{score_str}\n"
+
+            # Co-authors
+            coauthors: Counter = Counter()
+            for dn in drafts:
+                for other in author_drafts.get(dn, []):
+                    if other != name:
+                        coauthors[other] += 1
+            if coauthors:
+                body += f"\n## Co-authors\n\n"
+                for co, shared in coauthors.most_common(20):
+                    body += f"- [[{co}]] ({shared} shared)\n"
+
+            zf.writestr(f"{prefix}/Authors/{_safe_filename(name)}.md", fm + body)
+
+        # --- Category Notes ---
+        cat_index_lines = [
+            "---\ntags: [index, categories]\n---\n",
+            "# Categories\n\n",
+            _mermaid_pie("Draft Distribution", dict(cat_counts.most_common(12))),
+            "\n\n",
+        ]
+        for cat, count in cat_counts.most_common():
+            cat_index_lines.append(f"- [[{cat}]] — {count} drafts\n")
+        zf.writestr(f"{prefix}/Categories/index.md", "".join(cat_index_lines))
+
+        for cat, count in cat_counts.most_common():
+            fm = f"---\ntags: [category]\ndraft_count: {count}\n---\n"
+            body = f"\n# {cat}\n\n"
+            body += f"**{count} drafts** in this category.\n\n"
+
+            # Table of drafts sorted by score
+            draft_names = cat_drafts[cat]
+            scored = [(dn, score_map.get(dn, 0)) for dn in draft_names]
+            scored.sort(key=lambda x: x[1], reverse=True)
+
+            body += "| Draft | Score |\n|---|---|\n"
+            for dn, score in scored:
+                d = draft_map.get(dn)
+                title = d.title[:60] if d else dn
+                body += f"| [[{dn}|{title}]] | {score:.2f} |\n"
+
+            zf.writestr(f"{prefix}/Categories/{_safe_filename(cat)}.md", fm + body)
+
+        # --- Analysis Notes ---
+
+        # Score Distribution
+        score_lines = [
+            "---\ntags: [analysis]\n---\n",
+            "\n# Score Distribution\n\n",
+            "Composite scores across all rated drafts (1.0–5.0 scale).\n\n",
+        ]
+        # Mermaid bar chart of score buckets
+        buckets: dict[str, int] = defaultdict(int)
+        for _, r in pairs:
+            b = f"{r.composite_score:.1f}"
+            buckets[b] += 1
+        sorted_buckets = dict(sorted(buckets.items()))
+        if sorted_buckets:
+            labels = [f'"{k}"' for k in sorted_buckets.keys()]
+            values = [str(v) for v in sorted_buckets.values()]
+            score_lines.append(f"""```mermaid
+xychart-beta
+    title "Score Distribution"
+    x-axis [{", ".join(labels)}]
+    y-axis "Count"
+    bar [{", ".join(values)}]
+```\n\n""")
+
+        # Dimension averages
+        dims = {"Novelty": [], "Maturity": [], "Overlap": [], "Momentum": [], "Relevance": []}
+        for _, r in pairs:
+            dims["Novelty"].append(r.novelty)
+            dims["Maturity"].append(r.maturity)
+            dims["Overlap"].append(r.overlap)
+            dims["Momentum"].append(r.momentum)
+            dims["Relevance"].append(r.relevance)
+        score_lines.append("## Dimension Averages\n\n")
+        score_lines.append("| Dimension | Average | Min | Max |\n|---|---|---|---|\n")
+        for dim, vals in dims.items():
+            if vals:
+                avg = sum(vals) / len(vals)
+                score_lines.append(f"| {dim} | {avg:.2f} | {min(vals)} | {max(vals)} |\n")
+
+        zf.writestr(f"{prefix}/Analysis/Score Distribution.md", "".join(score_lines))
+
+        # Top Rated
+        top_lines = [
+            "---\ntags: [analysis]\n---\n",
+            "\n# Top Rated Drafts\n\n",
+            "Drafts ranked by composite score.\n\n",
+            "| # | Draft | Score | Novelty | Maturity | Overlap | Momentum | Relevance | Category |\n",
+            "|---|---|---|---|---|---|---|---|---|\n",
+        ]
+        for i, (d, r) in enumerate(top_rated[:30], 1):
+            cat = r.categories[0] if r.categories else ""
+            top_lines.append(
+                f"| {i} | [[{d.name}|{(d.title or d.name)[:45]}]] | **{r.composite_score:.2f}** | "
+                f"{r.novelty} | {r.maturity} | {r.overlap} | {r.momentum} | {r.relevance} | {cat} |\n"
+            )
+        zf.writestr(f"{prefix}/Analysis/Top Rated.md", "".join(top_lines))
+
+        # Ideas Overview
+        type_counts = Counter(i.get("type", "other") or "other" for i in all_ideas)
+        ideas_lines = [
+            "---\ntags: [analysis, ideas]\n---\n",
+            f"\n# Extracted Ideas\n\n",
+            f"**{len(all_ideas)}** technical ideas extracted from rated drafts.\n\n",
+            _mermaid_pie("Ideas by Type", dict(type_counts.most_common(10))),
+            "\n\n## By Type\n\n",
+        ]
+        for itype, count in type_counts.most_common():
+            ideas_lines.append(f"- **{itype}**: {count} ideas\n")
+
+        ideas_lines.append(f"\n## Recent Ideas\n\n")
+        for idea in all_ideas[:50]:
+            dn = idea.get("draft_name", "")
+            novelty = f" `N:{idea.get('novelty_score')}`" if idea.get("novelty_score") else ""
+            ideas_lines.append(f"- **{idea.get('title', 'Untitled')}**{novelty} — [[{dn}]]\n")
+        if len(all_ideas) > 50:
+            ideas_lines.append(f"\n*...and {len(all_ideas) - 50} more. See individual draft notes.*\n")
+
+        zf.writestr(f"{prefix}/Analysis/Ideas Overview.md", "".join(ideas_lines))
+
+        # Timeline
+        timeline_lines = [
+            "---\ntags: [analysis, timeline]\n---\n",
+            "\n# Timeline\n\n",
+            "Draft submission activity over time.\n\n",
+            _mermaid_timeline_chart(dict(sorted(monthly.items()))),
+            "\n\n## Monthly Counts\n\n",
+            "| Month | Drafts |\n|---|---|\n",
+        ]
+        for m in sorted(monthly.keys()):
+            timeline_lines.append(f"| {m} | {monthly[m]} |\n")
+        zf.writestr(f"{prefix}/Analysis/Timeline.md", "".join(timeline_lines))
+
+        # --- Glossary ---
+        glossary = """---
+tags: [reference, glossary]
+---
+
+# Glossary
+
+Reference for all terms, abbreviations, and scoring dimensions used in this vault.
+
+## Scoring Dimensions
+
+Each draft is rated by Claude AI on five dimensions, scored from 1 (lowest) to 5 (highest).
+
+| Dimension | Description |
+|---|---|
+| **Novelty** | How original is this draft? Does it introduce new ideas, or rehash existing approaches? High = genuinely new contribution. |
+| **Maturity** | How complete and well-developed is the specification? High = detailed protocol, clear data formats, ready for implementation. Low = early sketch or position paper. |
+| **Overlap** | How much does this draft duplicate existing work? High overlap (5) = very similar to other drafts. Low overlap (1) = unique in the landscape. *Note: In composite score, this is inverted (5 - overlap) so lower overlap contributes positively.* |
+| **Momentum** | Is this draft gaining traction? High = active revisions, working group adoption, multiple authors/organizations. Low = single submission, no updates. |
+| **Relevance** | How relevant is this draft to AI agent infrastructure? High = directly addresses agent-to-agent communication, identity, authorization. Low = tangentially related. |
+
+## Composite Score
+
+The **composite score** (1.0–5.0) is calculated as:
+
+```
+score = (novelty + maturity + (5 - overlap) + momentum + relevance) / 5
+```
+
+Overlap is inverted because a *lower* overlap is better (more unique).
+
+## Score Bars
+
+Score bars visualize ratings: `\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2591\u2591\u2591` = 3.5/5.0
+
+- `\u2588` (filled) = earned score
+- `\u2591` (empty) = remaining
+
+## Other Terms
+
+| Term | Meaning |
+|---|---|
+| **Draft / I-D** | Internet-Draft — a working document submitted to the IETF. Not yet an RFC (standard). |
+| **RFC** | Request for Comments — a published IETF standard or informational document. |
+| **Working Group (WG)** | An IETF group chartered to work on a specific topic (e.g., WIMSE, OAuth). |
+| **Category** | Topic classification assigned by Claude during analysis (e.g., "A2A protocols", "AI safety/alignment"). A draft can belong to multiple categories. |
+| **Idea** | A distinct technical concept extracted from a draft by Claude. Each idea has a type (protocol, mechanism, framework, etc.) and a novelty score. |
+| **Novelty Score (N:1–5)** | Per-idea originality rating. Shown as `N:4` next to ideas. 5 = completely new concept, 1 = well-known approach. |
+| **Gap** | An area identified where no existing draft adequately addresses a need in the AI agent ecosystem. |
+| **Affiliation** | The organization an author is associated with (from IETF Datatracker records). |
+| **Co-authorship** | Two authors who appear together on at least one draft. |
+| **Datatracker** | The IETF's official system for tracking Internet-Drafts, RFCs, and working groups (datatracker.ietf.org). |
+"""
+        zf.writestr(f"{prefix}/Analysis/Glossary.md", glossary)
+
+        # --- .obsidian settings for graph colors ---
+        graph_json = """{
+  "collapse-filter": false,
+  "search": "",
+  "showTags": true,
+  "showAttachments": false,
+  "hideUnresolved": false,
+  "showOrphans": true,
+  "collapse-color-groups": false,
+  "colorGroups": [
+    {"query": "path:Drafts", "color": {"a": 1, "rgb": 3444735}},
+    {"query": "path:Authors", "color": {"a": 1, "rgb": 10092441}},
+    {"query": "path:Categories", "color": {"a": 1, "rgb": 16744448}},
+    {"query": "path:Analysis", "color": {"a": 1, "rgb": 2293541}}
+  ],
+  "collapse-display": false,
+  "showArrow": true,
+  "textFadeMultiplier": 0,
+  "nodeSizeMultiplier": 1.2,
+  "lineSizeMultiplier": 1,
+  "collapse-forces": true,
+  "centerStrength": 0.5,
+  "repelStrength": 10,
+  "linkStrength": 1,
+  "linkDistance": 100
+}"""
+        zf.writestr(f"{prefix}/.obsidian/graph.json", graph_json)
+
+    buf.seek(0)
+    return buf.getvalue()