v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series

Gap-to-Draft Pipeline (ietf pipeline): - Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision - Generator produces outlines + sections using rich context with Claude - Quality gates: novelty (embedding similarity), references, format, self-rating - Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE) - I-D formatter with proper headers, references, 72-char wrapping Living Standards Observatory (ietf observatory): - Source abstraction with IETF + W3C fetchers - 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record - Static GitHub Pages dashboard (explorer, gap tracker, timeline) - Weekly CI/CD automation via GitHub Actions Also includes: - 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps - Blog series (8 posts planned), reports, arXiv paper figures - Agent team infrastructure (CLAUDE.md, scripts, dev journal) - 5 new DB tables, schema migration, ~15 new query methods Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 00:48:57 +01:00
parent be9cf9c5d9
commit d6beb9c0a0
87 changed files with 24471 additions and 401 deletions
--- a/src/ietf_analyzer/analyzer.py
+++ b/src/ietf_analyzer/analyzer.py
@@ -77,7 +77,7 @@ Abstract: {abstract}

 {text_excerpt}

-Return 3-8 ideas. Focus on CONCRETE technical contributions, not general statements.
+Return 0-8 ideas. Only include CONCRETE, NOVEL technical contributions — not restatements of the abstract or general goals. If the draft has no substantive technical ideas (e.g. it is a problem statement, administrative document, or off-topic), return an empty array [].
 JSON array only, no fences."""

 BATCH_IDEAS_PROMPT = """\
@@ -86,7 +86,7 @@ Per idea: {{"title":"short name","description":"1 sentence","type":"mechanism|pr

 {drafts_block}

-3-8 ideas per draft. CONCRETE technical contributions only.
+0-8 ideas per draft. Only include CONCRETE, NOVEL technical contributions. If a draft has no substantive ideas, map it to an empty array. Do not pad with restatements of the abstract.
 Return ONLY a JSON object like {{"draft-name":[...], ...}}, no fences."""

 GAP_ANALYSIS_PROMPT = """\
@@ -397,16 +397,16 @@ class Analyzer:
            count = 0
            for d in drafts:
                ideas = results.get(d.name, [])
+                if not isinstance(ideas, list):
+                    ideas = [ideas] if ideas else []
+                self.db.cache_response(
+                    d.name, _prompt_hash(f"batch-ideas-{phash}-{d.name}"),
+                    self.config.claude_model_cheap if cheap else self.config.claude_model,
+                    f"batch-ideas[{d.name}]", json.dumps(ideas),
+                    in_tok // len(drafts), out_tok // len(drafts),
+                )
+                self.db.insert_ideas(d.name, ideas)
                if ideas:
-                    if not isinstance(ideas, list):
-                        ideas = [ideas]
-                    self.db.cache_response(
-                        d.name, _prompt_hash(f"batch-ideas-{phash}-{d.name}"),
-                        self.config.claude_model_cheap if cheap else self.config.claude_model,
-                        f"batch-ideas[{d.name}]", json.dumps(ideas),
-                        in_tok // len(drafts), out_tok // len(drafts),
-                    )
-                    self.db.insert_ideas(d.name, ideas)
                    count += 1
            return count
        except (json.JSONDecodeError, anthropic.APIError) as e:
--- a/src/ietf_analyzer/cli.py
+++ b/src/ietf_analyzer/cli.py
--- a/src/ietf_analyzer/config.py
+++ b/src/ietf_analyzer/config.py
@@ -16,6 +16,12 @@ DEFAULT_KEYWORDS = [
    "autonomous",
    "machine-learning",
    "artificial-intelligence",
+    "mcp",
+    "agentic",
+    "inference",
+    "generative",
+    "intelligent",
+    "aipref",
 ]


@@ -32,6 +38,15 @@ class Config:
    fetch_since: str = "2024-01-01"
    # Polite delay between API requests (seconds)
    fetch_delay: float = 0.5
+    # Pipeline
+    generation_max_tokens: int = 4096
+    generation_model: str = ""  # defaults to claude_model
+    # Observatory
+    observatory_sources: list[str] = field(default_factory=lambda: ["ietf"])
+    dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
+    w3c_groups: list[str] = field(default_factory=lambda: [
+        "webmachinelearning", "wot", "credentials", "did", "vc"
+    ])

    def save(self) -> None:
        Path(self.data_dir).mkdir(parents=True, exist_ok=True)
--- a/src/ietf_analyzer/dashboard.py
+++ b/src/ietf_analyzer/dashboard.py
@@ -0,0 +1,981 @@
+"""Static dashboard generator for GitHub Pages — Living Standards Observatory."""
+
+from __future__ import annotations
+
+import json
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+from .config import Config
+from .db import Database
+from .models import Rating
+
+console = None
+
+
+def _get_console():
+    global console
+    if console is None:
+        from rich.console import Console
+
+        console = Console()
+    return console
+
+
+class DashboardGenerator:
+    """Generate a static GitHub Pages site under docs/."""
+
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        self.output_dir = Path(self.config.dashboard_dir)
+
+    def generate(self) -> str:
+        """Generate full static site. Returns path to docs/."""
+        con = _get_console()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        (self.output_dir / "observatory").mkdir(exist_ok=True)
+        (self.output_dir / "data").mkdir(exist_ok=True)
+        (self.output_dir / "assets").mkdir(exist_ok=True)
+
+        con.print("[bold]Generating dashboard...[/]")
+
+        self._generate_data_files()
+        con.print("  [green]OK[/] Data files")
+
+        self._generate_style()
+        con.print("  [green]OK[/] Styles")
+
+        self._generate_index()
+        con.print("  [green]OK[/] Index page")
+
+        self._generate_explorer()
+        con.print("  [green]OK[/] Explorer page")
+
+        self._generate_gaps_page()
+        con.print("  [green]OK[/] Gaps page")
+
+        self._generate_timeline_page()
+        con.print("  [green]OK[/] Timeline page")
+
+        con.print(f"\n[bold green]Dashboard generated at {self.output_dir}/[/]")
+        return str(self.output_dir)
+
+    # ── Data files ──────────────────────────────────────────────────────────
+
+    def _generate_data_files(self) -> None:
+        """Write JSON data files to docs/data/."""
+        data_dir = self.output_dir / "data"
+
+        # observatory.json — key metrics
+        total = self.db.count_drafts()
+        sources = self.db.all_sources()
+        gaps = self.db.all_gaps()
+        snapshots = self.db.get_snapshots(limit=1)
+        unrated = len(self.db.unrated_drafts(limit=10000))
+        idea_count = self.db.idea_count()
+        author_count = self.db.author_count()
+
+        observatory_data = {
+            "total_docs": total,
+            "sources": {s["name"]: s["doc_count"] for s in sources},
+            "gaps_count": len(gaps),
+            "unrated": unrated,
+            "ideas": idea_count,
+            "authors": author_count,
+            "last_update": snapshots[0]["snapshot_at"] if snapshots else None,
+        }
+        (data_dir / "observatory.json").write_text(json.dumps(observatory_data, indent=2))
+
+        # drafts.json — all docs with ratings
+        pairs = self.db.drafts_with_ratings(limit=1000)
+        drafts_data = []
+        for d, r in pairs:
+            drafts_data.append({
+                "name": d.name,
+                "title": d.title,
+                "date": d.date,
+                "source": d.source or "ietf",
+                "url": d.source_url or d.datatracker_url,
+                "pages": d.pages or 0,
+                "group": d.group or "individual",
+                "score": round(r.composite_score, 2),
+                "novelty": r.novelty,
+                "maturity": r.maturity,
+                "overlap": r.overlap,
+                "momentum": r.momentum,
+                "relevance": r.relevance,
+                "categories": r.categories,
+                "summary": r.summary,
+                "novelty_note": r.novelty_note,
+                "maturity_note": r.maturity_note,
+                "overlap_note": r.overlap_note,
+                "momentum_note": r.momentum_note,
+                "relevance_note": r.relevance_note,
+                "doc_status": d.doc_status or "",
+            })
+        (data_dir / "drafts.json").write_text(json.dumps(drafts_data, indent=2))
+
+        # gaps.json — current gaps + history
+        gap_history = self.db.gap_history_timeline()
+        gaps_data = {
+            "current": gaps,
+            "history": gap_history,
+        }
+        (data_dir / "gaps.json").write_text(json.dumps(gaps_data, indent=2))
+
+        # timeline.json — monthly counts by source and category
+        all_drafts = self.db.list_drafts(limit=2000, order_by="time ASC")
+        rating_map = {d.name: r for d, r in pairs}
+        monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+        monthly_source: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
+
+        for d in all_drafts:
+            month = d.time[:7] if d.time else "unknown"
+            src = d.source or "ietf"
+            monthly_source[month][src] += 1
+            r = rating_map.get(d.name)
+            if r:
+                for c in r.categories:
+                    monthly[month][c] += 1
+
+        months = sorted(set(list(monthly.keys()) + list(monthly_source.keys())))
+        all_cats: set[str] = set()
+        for mc in monthly.values():
+            all_cats.update(mc.keys())
+        all_sources_set: set[str] = set()
+        for ms in monthly_source.values():
+            all_sources_set.update(ms.keys())
+
+        timeline_data = {
+            "months": months,
+            "by_category": {m: dict(monthly.get(m, {})) for m in months},
+            "by_source": {m: dict(monthly_source.get(m, {})) for m in months},
+            "categories": sorted(all_cats),
+            "sources": sorted(all_sources_set),
+        }
+        (data_dir / "timeline.json").write_text(json.dumps(timeline_data, indent=2))
+
+        # meta.json
+        meta = {
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+            "version": "0.3.0",
+            "project": "IETF Living Standards Observatory",
+        }
+        (data_dir / "meta.json").write_text(json.dumps(meta, indent=2))
+
+    # ── Style ───────────────────────────────────────────────────────────────
+
+    def _generate_style(self) -> None:
+        """Shared CSS."""
+        css = """\
+:root {
+  --bg: #f5f7fa;
+  --card-bg: #ffffff;
+  --text: #1a1a2e;
+  --text-dim: #666;
+  --accent: #4a6cf7;
+  --accent-light: rgba(74,108,247,0.1);
+  --green: #10b981;
+  --orange: #f59e0b;
+  --red: #ef4444;
+  --border: #e5e7eb;
+  --shadow: 0 1px 4px rgba(0,0,0,0.08);
+  --radius: 10px;
+}
+* { margin: 0; padding: 0; box-sizing: border-box; }
+body {
+  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+  background: var(--bg); color: var(--text);
+  line-height: 1.5;
+}
+a { color: var(--accent); text-decoration: none; }
+a:hover { text-decoration: underline; }
+
+/* Layout */
+.container { max-width: 1200px; margin: 0 auto; padding: 20px; }
+.header {
+  background: var(--card-bg); border-bottom: 1px solid var(--border);
+  padding: 16px 0; margin-bottom: 24px;
+}
+.header .container { display: flex; align-items: center; justify-content: space-between; }
+.header h1 { font-size: 1.3rem; }
+.header nav { display: flex; gap: 20px; font-size: 0.9rem; }
+.header nav a { color: var(--text-dim); font-weight: 500; }
+.header nav a:hover, .header nav a.active { color: var(--accent); text-decoration: none; }
+
+/* Cards */
+.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; margin-bottom: 24px; }
+.card {
+  background: var(--card-bg); border-radius: var(--radius);
+  padding: 20px; box-shadow: var(--shadow);
+}
+.card .label { font-size: 0.8rem; color: var(--text-dim); text-transform: uppercase; letter-spacing: 0.5px; }
+.card .value { font-size: 2rem; font-weight: 700; margin-top: 4px; }
+.card .sub { font-size: 0.8rem; color: var(--text-dim); margin-top: 4px; }
+
+/* Tables */
+.panel {
+  background: var(--card-bg); border-radius: var(--radius);
+  box-shadow: var(--shadow); overflow: hidden; margin-bottom: 24px;
+}
+.panel-header { padding: 16px 20px; border-bottom: 1px solid var(--border); font-weight: 600; }
+table { width: 100%; border-collapse: collapse; }
+th {
+  background: #f8f9fb; padding: 10px 12px; text-align: left;
+  font-size: 0.78rem; color: var(--text-dim); cursor: pointer; user-select: none;
+  white-space: nowrap; border-bottom: 2px solid var(--border);
+}
+th:hover { color: var(--accent); }
+td { padding: 10px 12px; border-bottom: 1px solid #f0f0f0; font-size: 0.83rem; vertical-align: top; }
+tr:hover { background: #fafbff; }
+
+/* Controls */
+.controls {
+  background: var(--card-bg); border-radius: var(--radius);
+  padding: 16px 20px; margin-bottom: 16px; box-shadow: var(--shadow);
+}
+.controls-row { display: flex; gap: 16px; align-items: center; flex-wrap: wrap; margin-bottom: 10px; }
+.controls-row:last-child { margin-bottom: 0; }
+.search-box {
+  flex: 1; min-width: 250px; padding: 8px 14px;
+  border: 1px solid var(--border); border-radius: 6px;
+  font-size: 0.9rem; outline: none;
+}
+.search-box:focus { border-color: var(--accent); box-shadow: 0 0 0 2px var(--accent-light); }
+.slider-group { display: flex; align-items: center; gap: 6px; font-size: 0.8rem; color: var(--text-dim); }
+.slider-group input[type=range] { width: 100px; cursor: pointer; }
+.slider-val { font-weight: 600; min-width: 24px; text-align: center; }
+
+/* Chips */
+.chip-row { display: flex; flex-wrap: wrap; gap: 6px; }
+.chip {
+  display: inline-block; padding: 3px 10px; border-radius: 12px;
+  font-size: 0.75rem; cursor: pointer; border: 1px solid var(--border);
+  background: var(--card-bg); transition: all 0.15s; user-select: none;
+}
+.chip.active { background: var(--accent); color: #fff; border-color: var(--accent); }
+.chip:hover { border-color: var(--accent); }
+
+/* Badges */
+.score-badge {
+  display: inline-block; padding: 2px 8px; border-radius: 10px;
+  font-weight: 600; font-size: 0.8rem;
+}
+.score-high { background: #d4edda; color: #155724; }
+.score-mid { background: #fff3cd; color: #856404; }
+.score-low { background: #f8d7da; color: #721c24; }
+.cat-badge {
+  display: inline-block; padding: 1px 7px; border-radius: 8px;
+  font-size: 0.68rem; margin: 1px 2px; background: #e8eaf6; color: #3949ab;
+}
+.source-badge {
+  display: inline-block; padding: 1px 7px; border-radius: 8px;
+  font-size: 0.68rem; margin: 1px 2px;
+}
+.source-ietf { background: #e3f2fd; color: #1565c0; }
+.source-w3c { background: #fce4ec; color: #c62828; }
+
+/* Severity */
+.sev-critical { color: var(--red); font-weight: 600; }
+.sev-high { color: var(--orange); font-weight: 600; }
+.sev-medium { color: var(--text); }
+.sev-low { color: var(--text-dim); }
+
+/* Bar */
+.bar { display: inline-block; height: 10px; border-radius: 3px; background: var(--accent); vertical-align: middle; }
+
+/* Detail */
+.detail-row td { padding: 12px 20px; background: #f8faff; }
+.detail-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px; max-width: 800px; }
+.detail-item { font-size: 0.82rem; }
+.detail-item strong { color: #333; }
+.detail-item .note { color: var(--text-dim); font-size: 0.78rem; }
+.summary-text { font-size: 0.82rem; color: #444; margin-top: 6px; line-height: 1.4; }
+
+/* Chart container */
+.chart-container {
+  background: var(--card-bg); border-radius: var(--radius);
+  box-shadow: var(--shadow); padding: 20px; margin-bottom: 24px;
+}
+
+/* Gap cards */
+.gap-card {
+  background: var(--card-bg); border-radius: var(--radius);
+  border-left: 4px solid var(--accent); padding: 16px 20px;
+  box-shadow: var(--shadow); margin-bottom: 12px;
+}
+.gap-card h3 { font-size: 0.95rem; margin-bottom: 4px; }
+.gap-card p { font-size: 0.83rem; color: var(--text-dim); margin-bottom: 4px; }
+.gap-card .meta { font-size: 0.75rem; color: var(--text-dim); }
+.gap-card.critical { border-left-color: var(--red); }
+.gap-card.high { border-left-color: var(--orange); }
+
+.dim { font-size: 0.75rem; color: var(--text-dim); }
+.clickable { cursor: pointer; }
+.reset-btn {
+  padding: 4px 12px; border: 1px solid var(--border); border-radius: 6px;
+  background: var(--card-bg); cursor: pointer; font-size: 0.78rem; color: var(--text-dim);
+}
+.reset-btn:hover { border-color: var(--accent); color: var(--accent); }
+.result-count { font-size: 0.85rem; color: var(--text-dim); margin: 10px 0 8px; }
+
+/* Timeline bars */
+.tl-bar {
+  display: inline-block; height: 16px; border-radius: 3px;
+  vertical-align: middle; min-width: 2px;
+}
+
+@media (max-width: 768px) {
+  .cards { grid-template-columns: 1fr 1fr; }
+  .controls-row { flex-direction: column; align-items: stretch; }
+  .detail-grid { grid-template-columns: 1fr; }
+}
+"""
+        (self.output_dir / "assets" / "style.css").write_text(css)
+
+    # ── Shared HTML pieces ──────────────────────────────────────────────────
+
+    def _header_html(self, active: str = "") -> str:
+        def active_cls(page: str) -> str:
+            return ' class="active"' if page == active else ""
+
+        return f"""\
+<div class="header">
+  <div class="container">
+    <h1>Living Standards Observatory</h1>
+    <nav>
+      <a href="../index.html"{active_cls("index")}>Dashboard</a>
+      <a href="explorer.html"{active_cls("explorer")}>Explorer</a>
+      <a href="gaps.html"{active_cls("gaps")}>Gaps</a>
+      <a href="timeline.html"{active_cls("timeline")}>Timeline</a>
+    </nav>
+  </div>
+</div>"""
+
+    def _index_header_html(self) -> str:
+        return """\
+<div class="header">
+  <div class="container">
+    <h1>Living Standards Observatory</h1>
+    <nav>
+      <a href="index.html" class="active">Dashboard</a>
+      <a href="observatory/explorer.html">Explorer</a>
+      <a href="observatory/gaps.html">Gaps</a>
+      <a href="observatory/timeline.html">Timeline</a>
+    </nav>
+  </div>
+</div>"""
+
+    # ── Index page ──────────────────────────────────────────────────────────
+
+    def _generate_index(self) -> None:
+        """Landing page with key metrics dashboard."""
+        html = f"""\
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Living Standards Observatory</title>
+<link rel="stylesheet" href="assets/style.css">
+</head>
+<body>
+{self._index_header_html()}
+<div class="container">
+
+<div class="cards" id="metricsCards">
+  <div class="card"><div class="label">Total Documents</div><div class="value" id="totalDocs">--</div><div class="sub" id="sourceSub"></div></div>
+  <div class="card"><div class="label">Standards Bodies</div><div class="value" id="sourceCount">--</div><div class="sub">Active sources</div></div>
+  <div class="card"><div class="label">Open Gaps</div><div class="value" id="gapCount">--</div><div class="sub">Identified coverage gaps</div></div>
+  <div class="card"><div class="label">Ideas Extracted</div><div class="value" id="ideaCount">--</div><div class="sub">Technical contributions</div></div>
+  <div class="card"><div class="label">Authors Tracked</div><div class="value" id="authorCount">--</div><div class="sub">Individual contributors</div></div>
+  <div class="card"><div class="label">Last Update</div><div class="value" id="lastUpdate" style="font-size:1rem">--</div><div class="sub" id="updateSub"></div></div>
+</div>
+
+<div class="panel">
+  <div class="panel-header">Top Rated Documents</div>
+  <table>
+    <thead>
+      <tr><th>Score</th><th>Document</th><th>Source</th><th>Date</th><th>Categories</th></tr>
+    </thead>
+    <tbody id="topDrafts"></tbody>
+  </table>
+</div>
+
+<div class="panel">
+  <div class="panel-header">Critical &amp; High Severity Gaps</div>
+  <div id="gapsList" style="padding: 16px;"></div>
+</div>
+
+</div>
+
+<script>
+function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
+function scoreBadge(s) {{
+  const cls = s >= 4.0 ? 'score-high' : s >= 3.0 ? 'score-mid' : 'score-low';
+  return '<span class="score-badge ' + cls + '">' + s.toFixed(1) + '</span>';
+}}
+
+async function init() {{
+  const [obs, drafts, gaps] = await Promise.all([
+    fetch('data/observatory.json').then(r => r.json()),
+    fetch('data/drafts.json').then(r => r.json()),
+    fetch('data/gaps.json').then(r => r.json()),
+  ]);
+
+  // Metrics
+  document.getElementById('totalDocs').textContent = obs.total_docs;
+  const srcNames = Object.keys(obs.sources || {{}});
+  document.getElementById('sourceCount').textContent = srcNames.length || 1;
+  document.getElementById('sourceSub').textContent = srcNames.map(s => s.toUpperCase() + ': ' + (obs.sources[s] || 0)).join(' | ') || '';
+  document.getElementById('gapCount').textContent = obs.gaps_count;
+  document.getElementById('ideaCount').textContent = obs.ideas;
+  document.getElementById('authorCount').textContent = obs.authors;
+  if (obs.last_update) {{
+    document.getElementById('lastUpdate').textContent = obs.last_update.substring(0, 10);
+  }}
+
+  // Top drafts
+  const top = drafts.sort((a, b) => b.score - a.score).slice(0, 15);
+  const tbody = document.getElementById('topDrafts');
+  top.forEach(d => {{
+    const tr = document.createElement('tr');
+    const srcClass = 'source-' + (d.source || 'ietf');
+    tr.innerHTML =
+      '<td>' + scoreBadge(d.score) + '</td>' +
+      '<td><a href="' + escHtml(d.url) + '" target="_blank">' + escHtml(d.name) + '</a><br><span class="dim">' + escHtml(d.title.substring(0,80)) + '</span></td>' +
+      '<td><span class="source-badge ' + srcClass + '">' + (d.source || 'ietf').toUpperCase() + '</span></td>' +
+      '<td class="dim">' + d.date + '</td>' +
+      '<td>' + d.categories.map(c => '<span class="cat-badge">' + escHtml(c) + '</span>').join('') + '</td>';
+    tbody.appendChild(tr);
+  }});
+
+  // Gaps
+  const gapsList = document.getElementById('gapsList');
+  const critical = (gaps.current || []).filter(g => g.severity === 'critical' || g.severity === 'high');
+  if (critical.length === 0) {{
+    gapsList.innerHTML = '<p class="dim">No critical or high severity gaps found.</p>';
+  }} else {{
+    critical.forEach(g => {{
+      const cls = g.severity === 'critical' ? 'critical' : 'high';
+      gapsList.innerHTML +=
+        '<div class="gap-card ' + cls + '">' +
+        '<h3>' + escHtml(g.topic) + '</h3>' +
+        '<p>' + escHtml(g.description) + '</p>' +
+        '<div class="meta"><span class="sev-' + g.severity + '">' + g.severity.toUpperCase() + '</span> &middot; ' + escHtml(g.category || '') + '</div>' +
+        '</div>';
+    }});
+  }}
+}}
+init();
+</script>
+</body>
+</html>"""
+        (self.output_dir / "index.html").write_text(html)
+
+    # ── Explorer page ───────────────────────────────────────────────────────
+
+    def _generate_explorer(self) -> None:
+        """Multi-source draft browser with search, filters, score sliders."""
+        html = f"""\
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Explorer - Living Standards Observatory</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+{self._header_html("explorer")}
+<div class="container">
+
+<div class="controls">
+  <div class="controls-row">
+    <input type="text" class="search-box" id="searchBox" placeholder="Search by name, title, summary, or keyword...">
+    <select id="sourceFilter" style="padding:8px;border:1px solid var(--border);border-radius:6px;font-size:0.85rem">
+      <option value="">All sources</option>
+    </select>
+    <div class="slider-group">Min score: <input type="range" id="minScore" min="1" max="5" step="0.1" value="1"><span class="slider-val" id="minScoreVal">1.0</span></div>
+    <div class="slider-group">Min novelty: <input type="range" id="minNovelty" min="1" max="5" step="1" value="1"><span class="slider-val" id="minNoveltyVal">1</span></div>
+    <div class="slider-group">Max overlap: <input type="range" id="maxOverlap" min="1" max="5" step="1" value="5"><span class="slider-val" id="maxOverlapVal">5</span></div>
+    <button class="reset-btn" onclick="resetFilters()">Reset</button>
+  </div>
+  <div class="controls-row">
+    <div class="chip-row" id="catChips"></div>
+  </div>
+</div>
+
+<div class="result-count" id="resultCount"></div>
+
+<table>
+  <thead>
+    <tr>
+      <th onclick="sortBy('score')" width="60">Score <span class="sort-arrow" id="sort-score"></span></th>
+      <th onclick="sortBy('name')">Draft <span class="sort-arrow" id="sort-name"></span></th>
+      <th onclick="sortBy('source')" width="60">Src <span class="sort-arrow" id="sort-source"></span></th>
+      <th onclick="sortBy('date')" width="90">Date <span class="sort-arrow" id="sort-date"></span></th>
+      <th onclick="sortBy('novelty')" width="30">N</th>
+      <th onclick="sortBy('maturity')" width="30">M</th>
+      <th onclick="sortBy('overlap')" width="30">O</th>
+      <th onclick="sortBy('momentum')" width="30">Mom</th>
+      <th onclick="sortBy('relevance')" width="30">R</th>
+      <th>Categories</th>
+    </tr>
+  </thead>
+  <tbody id="tableBody"></tbody>
+</table>
+
+</div>
+
+<script>
+let DRAFTS = [];
+let ALL_CATS = [];
+let activeCats = new Set();
+let sortField = 'score';
+let sortAsc = false;
+let expandedRow = null;
+
+function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
+function scoreBadge(s) {{
+  const cls = s >= 4.0 ? 'score-high' : s >= 3.0 ? 'score-mid' : 'score-low';
+  return '<span class="score-badge ' + cls + '">' + s.toFixed(1) + '</span>';
+}}
+function dimBar(v) {{ return '<span class="bar" style="width:' + (v * 12) + 'px"></span> ' + v; }}
+
+const searchBox = document.getElementById('searchBox');
+const sourceFilter = document.getElementById('sourceFilter');
+const minScore = document.getElementById('minScore');
+const minNovelty = document.getElementById('minNovelty');
+const maxOverlap = document.getElementById('maxOverlap');
+
+searchBox.oninput = render;
+sourceFilter.onchange = render;
+minScore.oninput = () => {{ document.getElementById('minScoreVal').textContent = parseFloat(minScore.value).toFixed(1); render(); }};
+minNovelty.oninput = () => {{ document.getElementById('minNoveltyVal').textContent = minNovelty.value; render(); }};
+maxOverlap.oninput = () => {{ document.getElementById('maxOverlapVal').textContent = maxOverlap.value; render(); }};
+
+function resetFilters() {{
+  searchBox.value = '';
+  sourceFilter.value = '';
+  minScore.value = 1; document.getElementById('minScoreVal').textContent = '1.0';
+  minNovelty.value = 1; document.getElementById('minNoveltyVal').textContent = '1';
+  maxOverlap.value = 5; document.getElementById('maxOverlapVal').textContent = '5';
+  activeCats.clear();
+  document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
+  sortField = 'score'; sortAsc = false;
+  render();
+}}
+
+function sortBy(field) {{
+  if (sortField === field) sortAsc = !sortAsc;
+  else {{ sortField = field; sortAsc = field === 'name' || field === 'date'; }}
+  render();
+}}
+
+function cmp(a, b) {{
+  let va = a[sortField], vb = b[sortField];
+  if (typeof va === 'string') return sortAsc ? va.localeCompare(vb) : vb.localeCompare(va);
+  return sortAsc ? va - vb : vb - va;
+}}
+
+function render() {{
+  const q = searchBox.value.toLowerCase().trim();
+  const src = sourceFilter.value;
+  const ms = parseFloat(minScore.value);
+  const mn = parseInt(minNovelty.value);
+  const mo = parseInt(maxOverlap.value);
+
+  let filtered = DRAFTS.filter(d => {{
+    if (d.score < ms) return false;
+    if (d.novelty < mn) return false;
+    if (d.overlap > mo) return false;
+    if (src && (d.source || 'ietf') !== src) return false;
+    if (activeCats.size > 0 && !d.categories.some(c => activeCats.has(c))) return false;
+    if (q) {{
+      const hay = (d.name + ' ' + d.title + ' ' + d.summary + ' ' + d.categories.join(' ')).toLowerCase();
+      const words = q.split(/\\s+/);
+      if (!words.every(w => hay.includes(w))) return false;
+    }}
+    return true;
+  }});
+
+  filtered.sort(cmp);
+
+  document.querySelectorAll('.sort-arrow').forEach(el => el.textContent = '');
+  const arrow = document.getElementById('sort-' + sortField);
+  if (arrow) arrow.textContent = sortAsc ? '\\u25B2' : '\\u25BC';
+
+  const tbody = document.getElementById('tableBody');
+  tbody.innerHTML = '';
+  expandedRow = null;
+
+  filtered.forEach(d => {{
+    const tr = document.createElement('tr');
+    tr.className = 'clickable';
+    const srcClass = 'source-' + (d.source || 'ietf');
+    tr.innerHTML =
+      '<td>' + scoreBadge(d.score) + '</td>' +
+      '<td style="max-width:300px"><a href="' + escHtml(d.url) + '" target="_blank" onclick="event.stopPropagation()" style="color:var(--accent);font-weight:500">' + escHtml(d.name) + '</a>' +
+        '<br><span class="dim">' + escHtml(d.title.substring(0, 80)) + '</span></td>' +
+      '<td><span class="source-badge ' + srcClass + '">' + (d.source || 'ietf').toUpperCase() + '</span></td>' +
+      '<td class="dim">' + d.date + '</td>' +
+      '<td>' + dimBar(d.novelty) + '</td>' +
+      '<td>' + dimBar(d.maturity) + '</td>' +
+      '<td>' + dimBar(d.overlap) + '</td>' +
+      '<td>' + dimBar(d.momentum) + '</td>' +
+      '<td>' + dimBar(d.relevance) + '</td>' +
+      '<td>' + d.categories.map(c => '<span class="cat-badge">' + escHtml(c) + '</span>').join('') + '</td>';
+
+    tr.onclick = () => toggleDetail(tr, d);
+    tbody.appendChild(tr);
+  }});
+
+  document.getElementById('resultCount').textContent =
+    'Showing ' + filtered.length + ' of ' + DRAFTS.length + ' drafts';
+}}
+
+function toggleDetail(tr, d) {{
+  if (expandedRow) {{
+    expandedRow.previousElementSibling?.classList.remove('expanded');
+    expandedRow.remove();
+    if (expandedRow._draftName === d.name) {{ expandedRow = null; return; }}
+  }}
+  tr.classList.add('expanded');
+  const detail = document.createElement('tr');
+  detail.className = 'detail-row';
+  detail._draftName = d.name;
+  function detailItem(label, score, note) {{
+    return '<div class="detail-item"><strong>' + label + ':</strong> ' + score + '/5 ' +
+      '<span class="bar" style="width:' + (score * 16) + 'px"></span>' +
+      (note ? '<div class="note">' + escHtml(note) + '</div>' : '') + '</div>';
+  }}
+  detail.innerHTML = '<td colspan="10">' +
+    '<div class="summary-text"><strong>Summary:</strong> ' + escHtml(d.summary) + '</div>' +
+    '<div class="detail-grid" style="margin-top:10px">' +
+      detailItem('Novelty', d.novelty, d.novelty_note) +
+      detailItem('Maturity', d.maturity, d.maturity_note) +
+      detailItem('Overlap', d.overlap, d.overlap_note) +
+      detailItem('Momentum', d.momentum, d.momentum_note) +
+      detailItem('Relevance', d.relevance, d.relevance_note) +
+      '<div class="detail-item"><strong>Source:</strong> ' + (d.source || 'ietf').toUpperCase() + ' &middot; <strong>Pages:</strong> ' + d.pages + '</div>' +
+    '</div>' +
+    '<div style="margin-top:8px"><a href="' + escHtml(d.url) + '" target="_blank" style="color:var(--accent)">Open document \\u2192</a></div>' +
+  '</td>';
+  tr.after(detail);
+  expandedRow = detail;
+}}
+
+async function init() {{
+  DRAFTS = await fetch('../data/drafts.json').then(r => r.json());
+
+  // Build categories
+  const catSet = new Set();
+  const sources = new Set();
+  DRAFTS.forEach(d => {{
+    d.categories.forEach(c => catSet.add(c));
+    sources.add(d.source || 'ietf');
+  }});
+  ALL_CATS = [...catSet].sort();
+
+  // Source filter options
+  sources.forEach(s => {{
+    const opt = document.createElement('option');
+    opt.value = s;
+    opt.textContent = s.toUpperCase();
+    sourceFilter.appendChild(opt);
+  }});
+
+  // Category chips
+  const chipBox = document.getElementById('catChips');
+  ALL_CATS.forEach(cat => {{
+    const el = document.createElement('span');
+    el.className = 'chip';
+    const count = DRAFTS.filter(d => d.categories.includes(cat)).length;
+    el.innerHTML = escHtml(cat) + '<span style="font-size:0.65rem;opacity:0.7;margin-left:2px">(' + count + ')</span>';
+    el.onclick = () => {{
+      if (activeCats.has(cat)) {{ activeCats.delete(cat); el.classList.remove('active'); }}
+      else {{ activeCats.add(cat); el.classList.add('active'); }}
+      render();
+    }};
+    chipBox.appendChild(el);
+  }});
+
+  render();
+}}
+init();
+</script>
+</body>
+</html>"""
+        (self.output_dir / "observatory" / "explorer.html").write_text(html)
+
+    # ── Gaps page ───────────────────────────────────────────────────────────
+
+    def _generate_gaps_page(self) -> None:
+        """Gap tracker with fill-status over time."""
+        html = f"""\
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Gaps - Living Standards Observatory</title>
+<link rel="stylesheet" href="../assets/style.css">
+</head>
+<body>
+{self._header_html("gaps")}
+<div class="container">
+
+<h2 style="margin-bottom:16px">Coverage Gaps</h2>
+<p class="dim" style="margin-bottom:20px">Areas, problems, or technical challenges not adequately addressed by existing standards documents.</p>
+
+<div class="controls">
+  <div class="controls-row">
+    <select id="sevFilter" style="padding:8px;border:1px solid var(--border);border-radius:6px;font-size:0.85rem">
+      <option value="">All severities</option>
+      <option value="critical">Critical</option>
+      <option value="high">High</option>
+      <option value="medium">Medium</option>
+      <option value="low">Low</option>
+    </select>
+    <input type="text" class="search-box" id="gapSearch" placeholder="Filter gaps..." style="max-width:400px">
+  </div>
+</div>
+
+<div id="gapsList"></div>
+
+<h2 style="margin:32px 0 16px">Gap History</h2>
+<p class="dim" style="margin-bottom:20px">How gaps have evolved across observatory snapshots.</p>
+<div class="panel">
+  <table>
+    <thead>
+      <tr><th>Snapshot</th><th>Topic</th><th>Severity</th><th>Status</th></tr>
+    </thead>
+    <tbody id="historyBody"></tbody>
+  </table>
+</div>
+
+</div>
+
+<script>
+function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
+
+let GAPS_DATA = null;
+
+function renderGaps() {{
+  const sev = document.getElementById('sevFilter').value;
+  const q = document.getElementById('gapSearch').value.toLowerCase().trim();
+  const list = document.getElementById('gapsList');
+  list.innerHTML = '';
+
+  let current = GAPS_DATA.current || [];
+  if (sev) current = current.filter(g => g.severity === sev);
+  if (q) current = current.filter(g => (g.topic + ' ' + g.description + ' ' + (g.category || '')).toLowerCase().includes(q));
+
+  if (current.length === 0) {{
+    list.innerHTML = '<p class="dim" style="padding:16px">No gaps match the current filters.</p>';
+    return;
+  }}
+
+  const order = {{'critical': 0, 'high': 1, 'medium': 2, 'low': 3}};
+  current.sort((a, b) => (order[a.severity] || 2) - (order[b.severity] || 2));
+
+  current.forEach(g => {{
+    const cls = (g.severity === 'critical' || g.severity === 'high') ? g.severity : '';
+    list.innerHTML +=
+      '<div class="gap-card ' + cls + '">' +
+      '<h3>' + escHtml(g.topic) + '</h3>' +
+      '<p>' + escHtml(g.description) + '</p>' +
+      '<div class="meta">' +
+        '<span class="sev-' + g.severity + '">' + (g.severity || 'medium').toUpperCase() + '</span>' +
+        (g.category ? ' &middot; ' + escHtml(g.category) : '') +
+        (g.evidence ? '<br><em>' + escHtml(g.evidence) + '</em>' : '') +
+      '</div></div>';
+  }});
+}}
+
+async function init() {{
+  GAPS_DATA = await fetch('../data/gaps.json').then(r => r.json());
+
+  document.getElementById('sevFilter').onchange = renderGaps;
+  document.getElementById('gapSearch').oninput = renderGaps;
+  renderGaps();
+
+  // History table
+  const history = GAPS_DATA.history || [];
+  const tbody = document.getElementById('historyBody');
+  if (history.length === 0) {{
+    tbody.innerHTML = '<tr><td colspan="4" class="dim">No history recorded yet.</td></tr>';
+  }} else {{
+    history.slice(-50).reverse().forEach(h => {{
+      const tr = document.createElement('tr');
+      tr.innerHTML =
+        '<td class="dim">' + (h.snapshot_at || h.recorded_at || '').substring(0, 10) + '</td>' +
+        '<td>' + escHtml(h.gap_topic) + '</td>' +
+        '<td><span class="sev-' + (h.severity || 'medium') + '">' + (h.severity || 'medium').toUpperCase() + '</span></td>' +
+        '<td>' + escHtml(h.status || 'open') + '</td>';
+      tbody.appendChild(tr);
+    }});
+  }}
+}}
+init();
+</script>
+</body>
+</html>"""
+        (self.output_dir / "observatory" / "gaps.html").write_text(html)
+
+    # ── Timeline page ───────────────────────────────────────────────────────
+
+    def _generate_timeline_page(self) -> None:
+        """Submission timeline across sources."""
+        html = f"""\
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>Timeline - Living Standards Observatory</title>
+<link rel="stylesheet" href="../assets/style.css">
+<style>
+.tl-row {{ display: flex; align-items: center; gap: 8px; padding: 6px 0; border-bottom: 1px solid #f0f0f0; }}
+.tl-month {{ min-width: 80px; font-size: 0.82rem; color: var(--text-dim); font-family: monospace; }}
+.tl-bars {{ flex: 1; display: flex; gap: 1px; align-items: center; }}
+.tl-count {{ min-width: 30px; text-align: right; font-size: 0.78rem; color: var(--text-dim); }}
+.legend {{ display: flex; gap: 16px; flex-wrap: wrap; margin-bottom: 16px; }}
+.legend-item {{ display: flex; align-items: center; gap: 4px; font-size: 0.8rem; }}
+.legend-swatch {{ width: 14px; height: 14px; border-radius: 3px; }}
+.view-toggle {{ display: flex; gap: 8px; margin-bottom: 16px; }}
+.view-btn {{ padding: 6px 16px; border: 1px solid var(--border); border-radius: 6px; background: var(--card-bg); cursor: pointer; font-size: 0.82rem; }}
+.view-btn.active {{ background: var(--accent); color: #fff; border-color: var(--accent); }}
+</style>
+</head>
+<body>
+{self._header_html("timeline")}
+<div class="container">
+
+<h2 style="margin-bottom:8px">Submission Timeline</h2>
+<p class="dim" style="margin-bottom:20px">Monthly document submissions across standards bodies and categories.</p>
+
+<div class="view-toggle">
+  <button class="view-btn active" id="btnSource" onclick="setView('source')">By Source</button>
+  <button class="view-btn" id="btnCategory" onclick="setView('category')">By Category</button>
+</div>
+
+<div class="legend" id="legend"></div>
+
+<div class="chart-container" id="timeline"></div>
+
+<div class="panel">
+  <div class="panel-header">Monthly Totals</div>
+  <table>
+    <thead><tr><th>Month</th><th>Total</th><th id="breakdownHeader">By Source</th></tr></thead>
+    <tbody id="monthTable"></tbody>
+  </table>
+</div>
+
+</div>
+
+<script>
+function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
+
+const COLORS_SOURCE = {{'ietf': '#4a6cf7', 'w3c': '#ef4444', 'ieee': '#10b981', 'other': '#9ca3af'}};
+const COLORS_CAT = [
+  '#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A',
+  '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52',
+  '#7C8CF5', '#FF8C69', '#66CDAA', '#BA55D3', '#FFD700',
+];
+
+let TL_DATA = null;
+let currentView = 'source';
+
+function setView(view) {{
+  currentView = view;
+  document.getElementById('btnSource').className = 'view-btn' + (view === 'source' ? ' active' : '');
+  document.getElementById('btnCategory').className = 'view-btn' + (view === 'category' ? ' active' : '');
+  document.getElementById('breakdownHeader').textContent = view === 'source' ? 'By Source' : 'By Category';
+  renderTimeline();
+}}
+
+function renderTimeline() {{
+  if (!TL_DATA) return;
+  const months = TL_DATA.months;
+  const isSource = currentView === 'source';
+  const dataMap = isSource ? TL_DATA.by_source : TL_DATA.by_category;
+  const keys = isSource ? TL_DATA.sources : TL_DATA.categories;
+
+  // Assign colors
+  const colorMap = {{}};
+  if (isSource) {{
+    keys.forEach(k => {{ colorMap[k] = COLORS_SOURCE[k] || '#9ca3af'; }});
+  }} else {{
+    keys.forEach((k, i) => {{ colorMap[k] = COLORS_CAT[i % COLORS_CAT.length]; }});
+  }}
+
+  // Max for scaling
+  let maxTotal = 0;
+  months.forEach(m => {{
+    const d = dataMap[m] || {{}};
+    let t = 0;
+    keys.forEach(k => {{ t += d[k] || 0; }});
+    if (t > maxTotal) maxTotal = t;
+  }});
+  const scale = maxTotal > 0 ? 500 / maxTotal : 1;
+
+  // Legend
+  const legendEl = document.getElementById('legend');
+  legendEl.innerHTML = '';
+  keys.forEach(k => {{
+    legendEl.innerHTML += '<div class="legend-item"><div class="legend-swatch" style="background:' + colorMap[k] + '"></div>' + escHtml(k) + '</div>';
+  }});
+
+  // Chart
+  const container = document.getElementById('timeline');
+  container.innerHTML = '';
+  months.forEach(m => {{
+    const d = dataMap[m] || {{}};
+    let total = 0;
+    keys.forEach(k => {{ total += d[k] || 0; }});
+
+    let barsHtml = '';
+    keys.forEach(k => {{
+      const v = d[k] || 0;
+      if (v > 0) {{
+        const w = Math.max(v * scale, 2);
+        barsHtml += '<div class="tl-bar" style="width:' + w + 'px;background:' + colorMap[k] + '" title="' + escHtml(k) + ': ' + v + '"></div>';
+      }}
+    }});
+
+    container.innerHTML += '<div class="tl-row"><span class="tl-month">' + m + '</span><div class="tl-bars">' + barsHtml + '</div><span class="tl-count">' + total + '</span></div>';
+  }});
+
+  // Table
+  const tbody = document.getElementById('monthTable');
+  tbody.innerHTML = '';
+  [...months].reverse().forEach(m => {{
+    const d = dataMap[m] || {{}};
+    let total = 0;
+    const parts = [];
+    keys.forEach(k => {{
+      const v = d[k] || 0;
+      total += v;
+      if (v > 0) parts.push(k + ': ' + v);
+    }});
+    if (total > 0) {{
+      const tr = document.createElement('tr');
+      tr.innerHTML = '<td class="dim">' + m + '</td><td>' + total + '</td><td class="dim">' + parts.join(', ') + '</td>';
+      tbody.appendChild(tr);
+    }}
+  }});
+}}
+
+async function init() {{
+  TL_DATA = await fetch('../data/timeline.json').then(r => r.json());
+  renderTimeline();
+}}
+init();
+</script>
+</body>
+</html>"""
+        (self.output_dir / "observatory" / "timeline.html").write_text(html)
--- a/src/ietf_analyzer/db.py
+++ b/src/ietf_analyzer/db.py
@@ -10,7 +10,7 @@ from pathlib import Path
 import numpy as np

 from .config import Config
-from .models import Author, Draft, Rating
+from .models import Author, Draft, Rating, normalize_category

 SCHEMA = """
 CREATE TABLE IF NOT EXISTS drafts (
@@ -117,6 +117,73 @@ CREATE TABLE IF NOT EXISTS gaps (
    analyzed_at TEXT
 );

+-- Cross-references (RFC, draft, BCP references found in draft text)
+CREATE TABLE IF NOT EXISTS draft_refs (
+    draft_name TEXT NOT NULL REFERENCES drafts(name),
+    ref_type TEXT NOT NULL,             -- 'rfc', 'draft', 'bcp'
+    ref_id TEXT NOT NULL,               -- e.g. '8259', 'draft-ietf-httpbis-semantics', 'BCP14'
+    UNIQUE(draft_name, ref_type, ref_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_draft_refs_ref ON draft_refs(ref_type, ref_id);
+
+-- Generated drafts from gap-to-draft pipeline
+CREATE TABLE IF NOT EXISTS generated_drafts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    gap_topic TEXT NOT NULL,
+    draft_name TEXT NOT NULL,
+    title TEXT NOT NULL,
+    abstract TEXT NOT NULL DEFAULT '',
+    outline_json TEXT DEFAULT '{}',
+    sections_json TEXT DEFAULT '[]',
+    full_text TEXT,
+    family_name TEXT DEFAULT '',
+    family_role TEXT DEFAULT '',
+    version INTEGER DEFAULT 0,
+    rating_json TEXT DEFAULT '{}',
+    novelty_score REAL DEFAULT 0.0,
+    quality_score REAL DEFAULT 0.0,
+    status TEXT DEFAULT 'draft',
+    created_at TEXT
+);
+
+CREATE TABLE IF NOT EXISTS generation_runs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    family_name TEXT DEFAULT '',
+    gap_ids TEXT DEFAULT '[]',
+    total_input_tokens INTEGER DEFAULT 0,
+    total_output_tokens INTEGER DEFAULT 0,
+    model_used TEXT DEFAULT '',
+    status TEXT DEFAULT 'running',
+    started_at TEXT,
+    completed_at TEXT
+);
+
+-- Observatory tables
+CREATE TABLE IF NOT EXISTS sources (
+    name TEXT PRIMARY KEY,
+    last_fetch TEXT,
+    doc_count INTEGER DEFAULT 0
+);
+
+CREATE TABLE IF NOT EXISTS observatory_snapshots (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    snapshot_at TEXT NOT NULL,
+    total_docs INTEGER DEFAULT 0,
+    new_since_last INTEGER DEFAULT 0,
+    changed_gaps INTEGER DEFAULT 0
+);
+
+CREATE TABLE IF NOT EXISTS gap_history (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    snapshot_id INTEGER REFERENCES observatory_snapshots(id),
+    gap_topic TEXT NOT NULL,
+    gap_description TEXT NOT NULL,
+    severity TEXT DEFAULT 'medium',
+    status TEXT DEFAULT 'open',
+    recorded_at TEXT
+);
+
 -- Triggers to keep FTS index in sync
 CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
    INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
@@ -152,8 +219,23 @@ class Database:
            self._conn.execute("PRAGMA journal_mode=WAL")
            self._conn.execute("PRAGMA foreign_keys=ON")
            self._conn.executescript(SCHEMA)
+            self._migrate_schema()
        return self._conn

+    def _migrate_schema(self) -> None:
+        """Additive migration — add columns if missing."""
+        cols = {r[1] for r in self._conn.execute("PRAGMA table_info(drafts)").fetchall()}
+        migrations = [
+            ("source", "TEXT DEFAULT 'ietf'"),
+            ("source_id", "TEXT DEFAULT ''"),
+            ("source_url", "TEXT DEFAULT ''"),
+            ("doc_status", "TEXT DEFAULT ''"),
+        ]
+        for col, typedef in migrations:
+            if col not in cols:
+                self._conn.execute(f"ALTER TABLE drafts ADD COLUMN {col} {typedef}")
+        self._conn.commit()
+
    def close(self) -> None:
        if self._conn:
            self._conn.close()
@@ -303,7 +385,7 @@ class Database:
                novelty_note=r["novelty_note"], maturity_note=r["maturity_note"],
                overlap_note=r["overlap_note"], momentum_note=r["momentum_note"],
                relevance_note=r["relevance_note"],
-                categories=json.loads(r["r_categories"]) if r["r_categories"] else [],
+                categories=[normalize_category(c) for c in json.loads(r["r_categories"])] if r["r_categories"] else [],
                rated_at=r["rated_at"],
            )
            results.append((draft, rating))
@@ -503,6 +585,30 @@ class Database:
        ).fetchall()
        return [(r["org_a"], r["org_b"], r["shared"]) for r in rows]

+    def org_data_raw(self) -> list[tuple[str, int, str]]:
+        """Return (affiliation, person_id, draft_name) for all draft_authors with affiliation."""
+        rows = self.conn.execute(
+            "SELECT affiliation, person_id, draft_name FROM draft_authors WHERE affiliation != ''"
+        ).fetchall()
+        return [(r[0], r[1], r[2]) for r in rows]
+
+    def author_draft_counts(self) -> dict[int, int]:
+        """Return {person_id: draft_count} for all authors."""
+        rows = self.conn.execute(
+            "SELECT person_id, COUNT(*) FROM draft_authors GROUP BY person_id"
+        ).fetchall()
+        return {r[0]: r[1] for r in rows}
+
+    def author_draft_sets(self) -> dict[int, set[str]]:
+        """Return {person_id: set(draft_names)} for all authors."""
+        rows = self.conn.execute(
+            "SELECT person_id, draft_name FROM draft_authors"
+        ).fetchall()
+        result: dict[int, set[str]] = {}
+        for r in rows:
+            result.setdefault(r[0], set()).add(r[1])
+        return result
+
    # --- Ideas ---

    def insert_ideas(self, draft_name: str, ideas: list[dict]) -> None:
@@ -529,7 +635,9 @@ class Database:
        rows = self.conn.execute(
            """SELECT d.name FROM drafts d
            LEFT JOIN ideas i ON d.name = i.draft_name
-            WHERE i.draft_name IS NULL
+            LEFT JOIN llm_cache lc ON d.name = lc.draft_name
+                AND lc.request_json LIKE 'batch-ideas[%'
+            WHERE i.draft_name IS NULL AND lc.draft_name IS NULL
            LIMIT ?""",
            (limit,),
        ).fetchall()
@@ -565,6 +673,314 @@ class Database:
                 "category": r["category"], "evidence": r["evidence"],
                 "severity": r["severity"]} for r in rows]

+    # --- Refs ---
+
+    def insert_refs(self, draft_name: str, refs: list[tuple[str, str]]) -> None:
+        """Insert cross-references for a draft. refs = [(ref_type, ref_id), ...]."""
+        for ref_type, ref_id in refs:
+            self.conn.execute(
+                """INSERT OR IGNORE INTO draft_refs (draft_name, ref_type, ref_id)
+                VALUES (?, ?, ?)""",
+                (draft_name, ref_type, ref_id),
+            )
+        self.conn.commit()
+
+    def get_refs_for_draft(self, draft_name: str) -> list[tuple[str, str]]:
+        """Return [(ref_type, ref_id)] for a draft."""
+        rows = self.conn.execute(
+            "SELECT ref_type, ref_id FROM draft_refs WHERE draft_name = ?",
+            (draft_name,),
+        ).fetchall()
+        return [(r["ref_type"], r["ref_id"]) for r in rows]
+
+    def top_referenced(self, ref_type: str = "rfc", limit: int = 30) -> list[tuple[str, int, list[str]]]:
+        """Return (ref_id, count, [draft_names]) for most-referenced items."""
+        rows = self.conn.execute(
+            """SELECT ref_id, COUNT(*) as cnt,
+                GROUP_CONCAT(draft_name, '||') as drafts
+            FROM draft_refs
+            WHERE ref_type = ?
+            GROUP BY ref_id
+            ORDER BY cnt DESC
+            LIMIT ?""",
+            (ref_type, limit),
+        ).fetchall()
+        return [
+            (r["ref_id"], r["cnt"], r["drafts"].split("||") if r["drafts"] else [])
+            for r in rows
+        ]
+
+    def drafts_referencing(self, ref_type: str, ref_id: str) -> list[str]:
+        """Return draft names that reference a specific RFC/draft/BCP."""
+        rows = self.conn.execute(
+            "SELECT draft_name FROM draft_refs WHERE ref_type = ? AND ref_id = ?",
+            (ref_type, ref_id),
+        ).fetchall()
+        return [r["draft_name"] for r in rows]
+
+    def ref_counts_by_draft(self) -> list[tuple[str, int, int, int]]:
+        """Return (draft_name, rfc_count, draft_count, bcp_count) for all drafts with refs."""
+        rows = self.conn.execute(
+            """SELECT draft_name,
+                SUM(CASE WHEN ref_type = 'rfc' THEN 1 ELSE 0 END) as rfcs,
+                SUM(CASE WHEN ref_type = 'draft' THEN 1 ELSE 0 END) as drafts,
+                SUM(CASE WHEN ref_type = 'bcp' THEN 1 ELSE 0 END) as bcps
+            FROM draft_refs
+            GROUP BY draft_name
+            ORDER BY rfcs DESC"""
+        ).fetchall()
+        return [(r["draft_name"], r["rfcs"], r["drafts"], r["bcps"]) for r in rows]
+
+    def drafts_without_refs(self, limit: int = 500) -> list[str]:
+        """Return draft names that have full_text but no refs extracted yet."""
+        rows = self.conn.execute(
+            """SELECT d.name FROM drafts d
+            LEFT JOIN draft_refs dr ON d.name = dr.draft_name
+            WHERE d.full_text IS NOT NULL AND dr.draft_name IS NULL
+            LIMIT ?""",
+            (limit,),
+        ).fetchall()
+        return [r["name"] for r in rows]
+
+    def ref_stats(self) -> dict:
+        """Return summary stats for refs table."""
+        row = self.conn.execute(
+            """SELECT COUNT(DISTINCT draft_name) as drafts_with_refs,
+                COUNT(*) as total_refs,
+                SUM(CASE WHEN ref_type = 'rfc' THEN 1 ELSE 0 END) as rfc_refs,
+                SUM(CASE WHEN ref_type = 'draft' THEN 1 ELSE 0 END) as draft_refs,
+                SUM(CASE WHEN ref_type = 'bcp' THEN 1 ELSE 0 END) as bcp_refs,
+                COUNT(DISTINCT ref_id) as unique_refs
+            FROM draft_refs"""
+        ).fetchone()
+        return dict(row)
+
+    # --- Generated Drafts ---
+
+    def upsert_generated_draft(self, data: dict) -> int:
+        """Insert or update a generated draft. Returns row id."""
+        now = datetime.now(timezone.utc).isoformat()
+        existing = self.conn.execute(
+            "SELECT id FROM generated_drafts WHERE draft_name = ? AND version = ?",
+            (data["draft_name"], data.get("version", 0)),
+        ).fetchone()
+        if existing:
+            self.conn.execute(
+                """UPDATE generated_drafts SET
+                    gap_topic=?, title=?, abstract=?, outline_json=?,
+                    sections_json=?, full_text=?, family_name=?, family_role=?,
+                    rating_json=?, novelty_score=?, quality_score=?, status=?
+                WHERE id=?""",
+                (data["gap_topic"], data["title"], data.get("abstract", ""),
+                 json.dumps(data.get("outline", {})), json.dumps(data.get("sections", [])),
+                 data.get("full_text"), data.get("family_name", ""),
+                 data.get("family_role", ""), json.dumps(data.get("rating", {})),
+                 data.get("novelty_score", 0.0), data.get("quality_score", 0.0),
+                 data.get("status", "draft"), existing["id"]),
+            )
+            self.conn.commit()
+            return existing["id"]
+        else:
+            cur = self.conn.execute(
+                """INSERT INTO generated_drafts
+                    (gap_topic, draft_name, title, abstract, outline_json, sections_json,
+                     full_text, family_name, family_role, version, rating_json,
+                     novelty_score, quality_score, status, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                (data["gap_topic"], data["draft_name"], data["title"],
+                 data.get("abstract", ""), json.dumps(data.get("outline", {})),
+                 json.dumps(data.get("sections", [])), data.get("full_text"),
+                 data.get("family_name", ""), data.get("family_role", ""),
+                 data.get("version", 0), json.dumps(data.get("rating", {})),
+                 data.get("novelty_score", 0.0), data.get("quality_score", 0.0),
+                 data.get("status", "draft"), now),
+            )
+            self.conn.commit()
+            return cur.lastrowid
+
+    def get_generated_drafts(self, status: str | None = None) -> list[dict]:
+        query = "SELECT * FROM generated_drafts"
+        params: list = []
+        if status:
+            query += " WHERE status = ?"
+            params.append(status)
+        query += " ORDER BY created_at DESC"
+        rows = self.conn.execute(query, params).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_generated_draft(self, draft_id: int) -> dict | None:
+        row = self.conn.execute(
+            "SELECT * FROM generated_drafts WHERE id = ?", (draft_id,)
+        ).fetchone()
+        return dict(row) if row else None
+
+    def get_family_drafts(self, family_name: str) -> list[dict]:
+        rows = self.conn.execute(
+            "SELECT * FROM generated_drafts WHERE family_name = ? ORDER BY family_role",
+            (family_name,),
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def log_generation_run(self, data: dict) -> int:
+        now = datetime.now(timezone.utc).isoformat()
+        cur = self.conn.execute(
+            """INSERT INTO generation_runs
+                (family_name, gap_ids, total_input_tokens, total_output_tokens,
+                 model_used, status, started_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?)""",
+            (data.get("family_name", ""), json.dumps(data.get("gap_ids", [])),
+             data.get("total_input_tokens", 0), data.get("total_output_tokens", 0),
+             data.get("model_used", ""), data.get("status", "running"), now),
+        )
+        self.conn.commit()
+        return cur.lastrowid
+
+    def update_generation_run(self, run_id: int, **kwargs) -> None:
+        sets = []
+        params = []
+        for k, v in kwargs.items():
+            sets.append(f"{k} = ?")
+            params.append(v)
+        if not sets:
+            return
+        params.append(run_id)
+        self.conn.execute(
+            f"UPDATE generation_runs SET {', '.join(sets)} WHERE id = ?", params
+        )
+        self.conn.commit()
+
+    # --- Observatory ---
+
+    def upsert_source(self, name: str, doc_count: int = 0) -> None:
+        now = datetime.now(timezone.utc).isoformat()
+        self.conn.execute(
+            """INSERT INTO sources (name, last_fetch, doc_count)
+            VALUES (?, ?, ?)
+            ON CONFLICT(name) DO UPDATE SET last_fetch=excluded.last_fetch, doc_count=excluded.doc_count""",
+            (name, now, doc_count),
+        )
+        self.conn.commit()
+
+    def get_source(self, name: str) -> dict | None:
+        row = self.conn.execute("SELECT * FROM sources WHERE name = ?", (name,)).fetchone()
+        return dict(row) if row else None
+
+    def all_sources(self) -> list[dict]:
+        rows = self.conn.execute("SELECT * FROM sources ORDER BY name").fetchall()
+        return [dict(r) for r in rows]
+
+    def create_snapshot(self) -> int:
+        now = datetime.now(timezone.utc).isoformat()
+        total = self.count_drafts()
+        # Count new since last snapshot
+        last = self.conn.execute(
+            "SELECT snapshot_at FROM observatory_snapshots ORDER BY id DESC LIMIT 1"
+        ).fetchone()
+        new_count = 0
+        if last:
+            new_count = self.conn.execute(
+                "SELECT COUNT(*) FROM drafts WHERE fetched_at > ?", (last["snapshot_at"],)
+            ).fetchone()[0]
+        else:
+            new_count = total
+        cur = self.conn.execute(
+            """INSERT INTO observatory_snapshots (snapshot_at, total_docs, new_since_last, changed_gaps)
+            VALUES (?, ?, ?, 0)""",
+            (now, total, new_count),
+        )
+        self.conn.commit()
+        return cur.lastrowid
+
+    def record_gap_history(self, snapshot_id: int, gaps: list[dict]) -> None:
+        now = datetime.now(timezone.utc).isoformat()
+        for g in gaps:
+            self.conn.execute(
+                """INSERT INTO gap_history (snapshot_id, gap_topic, gap_description, severity, status, recorded_at)
+                VALUES (?, ?, ?, ?, ?, ?)""",
+                (snapshot_id, g["topic"], g["description"],
+                 g.get("severity", "medium"), g.get("status", "open"), now),
+            )
+        self.conn.commit()
+
+    def gap_history_timeline(self) -> list[dict]:
+        rows = self.conn.execute(
+            """SELECT gh.*, os.snapshot_at FROM gap_history gh
+            JOIN observatory_snapshots os ON gh.snapshot_id = os.id
+            ORDER BY os.snapshot_at, gh.gap_topic"""
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def get_snapshots(self, limit: int = 20) -> list[dict]:
+        rows = self.conn.execute(
+            "SELECT * FROM observatory_snapshots ORDER BY id DESC LIMIT ?", (limit,)
+        ).fetchall()
+        return [dict(r) for r in rows]
+
+    def drafts_by_source(self, source: str, limit: int = 500) -> list[Draft]:
+        rows = self.conn.execute(
+            "SELECT * FROM drafts WHERE source = ? ORDER BY time DESC LIMIT ?",
+            (source, limit),
+        ).fetchall()
+        return [self._row_to_draft(r) for r in rows]
+
+    # --- WG/Status ---
+
+    def draft_adoption_status(self) -> list[dict]:
+        """Return adoption status for all drafts based on naming convention.
+
+        Returns list of dicts: {name, title, time, wg_adopted, wg_name, stream}
+        """
+        import re
+        rows = self.conn.execute(
+            'SELECT name, title, time FROM drafts'
+        ).fetchall()
+        results = []
+        for r in rows:
+            name = r["name"]
+            wg_adopted = False
+            wg_name = ""
+            stream = "individual"
+
+            # Primary signal: draft-ietf-{wg}-* naming convention
+            m = re.match(r'^draft-ietf-(\w+)-', name)
+            if m:
+                wg_adopted = True
+                wg_name = m.group(1)
+                stream = "ietf"
+            elif name.startswith("draft-irtf-"):
+                m2 = re.match(r'^draft-irtf-(\w+)-', name)
+                wg_name = m2.group(1) if m2 else ""
+                stream = "irtf"
+
+            results.append({
+                "name": name,
+                "title": r["title"],
+                "time": r["time"],
+                "wg_adopted": wg_adopted,
+                "wg_name": wg_name,
+                "stream": stream,
+            })
+        return results
+
+    def revision_velocity(self) -> list[dict]:
+        """Return revision data for all drafts.
+
+        Returns list of dicts: {name, title, time, rev, rev_int}
+        """
+        rows = self.conn.execute(
+            "SELECT name, title, time, rev FROM drafts"
+        ).fetchall()
+        return [
+            {
+                "name": r["name"],
+                "title": r["title"],
+                "time": r["time"],
+                "rev": r["rev"],
+                "rev_int": int(r["rev"]) if r["rev"].isdigit() else 0,
+            }
+            for r in rows
+        ]
+
    # --- Helpers ---

    @staticmethod
@@ -580,11 +996,16 @@ class Database:
            categories=json.loads(d.get("categories") or "[]"),
            tags=json.loads(d.get("tags") or "[]"),
            fetched_at=d.get("fetched_at"),
+            source=d.get("source", "ietf"),
+            source_id=d.get("source_id", ""),
+            source_url=d.get("source_url", ""),
+            doc_status=d.get("doc_status", ""),
        )

    @staticmethod
    def _row_to_rating(row: sqlite3.Row) -> Rating:
        d = dict(row)
+        raw_cats = json.loads(d.get("categories") or "[]")
        return Rating(
            draft_name=d["draft_name"], novelty=d["novelty"], maturity=d["maturity"],
            overlap=d["overlap"], momentum=d["momentum"], relevance=d["relevance"],
@@ -594,6 +1015,6 @@ class Database:
            overlap_note=d.get("overlap_note", ""),
            momentum_note=d.get("momentum_note", ""),
            relevance_note=d.get("relevance_note", ""),
-            categories=json.loads(d.get("categories") or "[]"),
+            categories=[normalize_category(c) for c in raw_cats],
            rated_at=d.get("rated_at"),
        )
--- a/src/ietf_analyzer/models.py
+++ b/src/ietf_analyzer/models.py
@@ -5,6 +5,24 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime

+# Map old verbose category names to canonical short names
+CATEGORY_NORMALIZE: dict[str, str] = {
+    "Agent-to-agent communication protocols": "A2A protocols",
+    "AI safety / guardrails / alignment": "AI safety/alignment",
+    "ML-based traffic management / optimization": "ML traffic mgmt",
+    "Autonomous network operations": "Autonomous netops",
+    "Identity / authentication for AI agents": "Agent identity/auth",
+    "Data formats / semantics for AI interop": "Data formats/interop",
+    "Policy / governance / ethical frameworks": "Policy/governance",
+    "AI model serving / inference protocols": "Model serving/inference",
+    "Agent discovery / registration": "Agent discovery/reg",
+}
+
+
+def normalize_category(cat: str) -> str:
+    """Normalize a category name to its canonical short form."""
+    return CATEGORY_NORMALIZE.get(cat, cat)
+

@dataclass
 class Author:
@@ -36,6 +54,10 @@ class Draft:
    categories: list[str] = field(default_factory=list)
    tags: list[str] = field(default_factory=list)
    fetched_at: str | None = None
+    source: str = "ietf"
+    source_id: str = ""
+    source_url: str = ""
+    doc_status: str = ""

    @property
    def text_url(self) -> str:
--- a/src/ietf_analyzer/observatory.py
+++ b/src/ietf_analyzer/observatory.py
@@ -0,0 +1,286 @@
+"""Observatory — orchestrates periodic update cycles across sources."""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+
+from rich.console import Console
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+)
+
+from .config import Config
+from .db import Database
+from .models import Draft
+from .sources import get_fetcher
+from .sources.base import SourceDocument
+
+console = Console()
+
+
+def _doc_to_draft(doc: SourceDocument) -> Draft:
+    """Convert a SourceDocument to a Draft for DB storage."""
+    extra = doc.extra or {}
+    return Draft(
+        name=doc.name,
+        rev=extra.get("rev", "00"),
+        title=doc.title,
+        abstract=doc.abstract,
+        time=doc.time,
+        dt_id=int(doc.source_id) if doc.source_id and doc.source_id.isdigit() else None,
+        pages=extra.get("pages"),
+        words=extra.get("words"),
+        group=extra.get("group"),
+        group_uri=extra.get("group_uri"),
+        expires=extra.get("expires"),
+        ad=extra.get("ad"),
+        shepherd=extra.get("shepherd"),
+        states=extra.get("states", []),
+        full_text=doc.full_text,
+        fetched_at=datetime.now(timezone.utc).isoformat(),
+        source=doc.source,
+        source_id=doc.source_id,
+        source_url=doc.source_url,
+        doc_status=doc.doc_status,
+    )
+
+
+class Observatory:
+    """Orchestrates the full observatory update cycle."""
+
+    def __init__(
+        self,
+        config: Config | None = None,
+        db: Database | None = None,
+        analyzer=None,
+    ):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        self._analyzer = analyzer
+
+    @property
+    def analyzer(self):
+        """Lazy-load analyzer to avoid Anthropic key requirement for status/diff."""
+        if self._analyzer is None:
+            from .analyzer import Analyzer
+
+            self._analyzer = Analyzer(self.config, self.db)
+        return self._analyzer
+
+    def update(
+        self,
+        sources: list[str] | None = None,
+        full: bool = False,
+    ) -> dict:
+        """Full update cycle.
+
+        1. Snapshot current state
+        2. Fetch from enabled sources (delta by default)
+        3. Analyze unrated docs (Claude, with caching)
+        4. Embed missing docs (Ollama)
+        5. Extract ideas from new docs
+        6. Re-run gap analysis if >= 5 new docs
+        7. Record gap changes in gap_history
+        8. Return summary stats
+        """
+        sources = sources or self.config.observatory_sources
+        stats: dict = {"sources": {}, "new_docs": 0, "analyzed": 0, "embedded": 0, "ideas": 0, "gaps_changed": False}
+
+        # 1. Snapshot current state
+        console.print("[bold]1/7[/] Creating snapshot...")
+        snapshot_id = self.db.create_snapshot()
+
+        # 2. Fetch from enabled sources
+        console.print("[bold]2/7[/] Fetching from sources...")
+        total_new = 0
+        for src_name in sources:
+            new_count = self._fetch_source(src_name, full=full)
+            stats["sources"][src_name] = new_count
+            total_new += new_count
+        stats["new_docs"] = total_new
+        console.print(f"  Fetched [bold green]{total_new}[/] new documents total")
+
+        # 3. Analyze unrated docs
+        console.print("[bold]3/7[/] Analyzing unrated documents...")
+        analyzed = self.analyzer.rate_all_unrated(limit=200, batch_size=5)
+        stats["analyzed"] = analyzed
+
+        # 4. Embed missing docs
+        console.print("[bold]4/7[/] Embedding missing documents...")
+        embedded = self._embed_missing()
+        stats["embedded"] = embedded
+
+        # 5. Extract ideas from new docs
+        console.print("[bold]5/7[/] Extracting ideas...")
+        ideas = self.analyzer.extract_all_ideas(limit=200, batch_size=5, cheap=True)
+        stats["ideas"] = ideas
+
+        # 6. Re-run gap analysis if enough new docs
+        if total_new >= 5:
+            console.print("[bold]6/7[/] Re-running gap analysis...")
+            gaps = self.analyzer.gap_analysis()
+            if gaps:
+                self.db.record_gap_history(snapshot_id, gaps)
+                stats["gaps_changed"] = True
+                console.print(f"  Found [bold]{len(gaps)}[/] gaps")
+        else:
+            console.print(f"[bold]6/7[/] Skipping gap analysis ({total_new} < 5 new docs)")
+            # Record current gaps unchanged
+            current_gaps = self.db.all_gaps()
+            if current_gaps:
+                self.db.record_gap_history(snapshot_id, current_gaps)
+
+        # 7. Update source records
+        console.print("[bold]7/7[/] Updating source records...")
+        for src_name in sources:
+            count = len(self.db.drafts_by_source(src_name, limit=10000))
+            self.db.upsert_source(src_name, doc_count=count)
+
+        console.print("\n[bold green]Observatory update complete![/]")
+        console.print(f"  New docs: {total_new}  |  Analyzed: {analyzed}  |  Embedded: {embedded}  |  Ideas: {ideas}")
+        return stats
+
+    def _fetch_source(self, source_name: str, full: bool = False) -> int:
+        """Fetch documents from a single source. Returns count of new docs."""
+        fetcher = get_fetcher(source_name, self.config)
+        try:
+            # Delta fetch: only since last fetch unless full=True
+            since = None
+            if not full:
+                src = self.db.get_source(source_name)
+                if src and src.get("last_fetch"):
+                    since = src["last_fetch"][:10]  # Date portion only
+
+            docs = fetcher.search(self.config.search_keywords, since=since)
+
+            new_count = 0
+            for doc in docs:
+                existing = self.db.get_draft(doc.name)
+                if existing is None:
+                    new_count += 1
+                draft = _doc_to_draft(doc)
+                self.db.upsert_draft(draft)
+
+            # Download text for docs missing it
+            missing_text = [
+                d for d in docs
+                if self.db.get_draft(d.name) and self.db.get_draft(d.name).full_text is None
+            ]
+            if missing_text:
+                console.print(f"  Downloading text for {len(missing_text)} {source_name} docs...")
+                with Progress(
+                    SpinnerColumn(),
+                    TextColumn("[progress.description]{task.description}"),
+                    BarColumn(),
+                    MofNCompleteColumn(),
+                    console=console,
+                ) as progress:
+                    task = progress.add_task(f"Downloading {source_name} texts...", total=len(missing_text))
+                    for doc in missing_text:
+                        text = fetcher.download_text(doc)
+                        if text:
+                            draft = self.db.get_draft(doc.name)
+                            if draft:
+                                draft.full_text = text
+                                self.db.upsert_draft(draft)
+                        progress.advance(task)
+
+            return new_count
+        finally:
+            fetcher.close()
+
+    def _embed_missing(self) -> int:
+        """Embed documents that don't have embeddings yet."""
+        missing = self.db.drafts_without_embeddings(limit=500)
+        if not missing:
+            console.print("  All documents already embedded.")
+            return 0
+
+        try:
+            from .embeddings import Embedder
+
+            embedder = Embedder(self.config, self.db)
+        except Exception as e:
+            console.print(f"  [yellow]Skipping embeddings (Ollama unavailable): {e}[/]")
+            return 0
+
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Embedding...", total=len(missing))
+            for name in missing:
+                try:
+                    vec = embedder.embed_draft(name)
+                    if vec is not None:
+                        count += 1
+                except Exception:
+                    pass
+                progress.advance(task)
+
+        console.print(f"  Embedded [bold green]{count}[/] documents")
+        return count
+
+    def status(self) -> dict:
+        """Current observatory state -- doc counts, sources, last update."""
+        total = self.db.count_drafts()
+        sources = self.db.all_sources()
+        snapshots = self.db.get_snapshots(limit=1)
+        gaps = self.db.all_gaps()
+
+        # Count by source
+        source_counts = {}
+        for src in sources:
+            source_counts[src["name"]] = src["doc_count"]
+
+        # Unrated / unembedded
+        unrated = len(self.db.unrated_drafts(limit=10000))
+        unembedded = len(self.db.drafts_without_embeddings(limit=10000))
+
+        last_update = snapshots[0]["snapshot_at"] if snapshots else None
+
+        return {
+            "total_docs": total,
+            "sources": source_counts,
+            "unrated": unrated,
+            "unembedded": unembedded,
+            "gaps": len(gaps),
+            "last_update": last_update,
+            "snapshots": len(self.db.get_snapshots(limit=100)),
+        }
+
+    def diff(self, since: str | None = None) -> dict:
+        """What changed since a date -- new docs, gap changes."""
+        if since is None:
+            # Default to last snapshot
+            snapshots = self.db.get_snapshots(limit=2)
+            if len(snapshots) >= 2:
+                since = snapshots[1]["snapshot_at"]
+            else:
+                since = "2000-01-01"
+
+        # New docs since date
+        new_docs = self.db.conn.execute(
+            "SELECT name, title, source, time FROM drafts WHERE fetched_at > ? ORDER BY time DESC",
+            (since,),
+        ).fetchall()
+
+        # Gap changes
+        gap_timeline = self.db.gap_history_timeline()
+        recent_gaps = [g for g in gap_timeline if g.get("recorded_at", "") > since]
+
+        return {
+            "since": since,
+            "new_docs": [dict(r) for r in new_docs],
+            "new_doc_count": len(new_docs),
+            "gap_changes": recent_gaps,
+        }
--- a/src/ietf_analyzer/orgs.py
+++ b/src/ietf_analyzer/orgs.py
@@ -0,0 +1,291 @@
+"""Organization normalization and team bloc detection."""
+
+from __future__ import annotations
+
+from collections import defaultdict
+from dataclasses import dataclass, field
+
+from .db import Database
+
+# Maps raw affiliation strings to canonical org names.
+# Built from SELECT DISTINCT affiliation FROM draft_authors.
+ORG_ALIASES: dict[str, str] = {
+    # Huawei
+    "Huawei Technologies": "Huawei",
+    "Huawei Technologies Co., Ltd.": "Huawei",
+    "Huawei Technologies, Co., Ltd": "Huawei",
+    "Huawei Tech": "Huawei",
+    "Huawei Canada": "Huawei",
+    "Huawei R&D": "Huawei",
+    "Huawei Singapore": "Huawei",
+    # Cisco
+    "Cisco Systems": "Cisco",
+    "Cisco Systems, Inc.": "Cisco",
+    # Ericsson
+    "Ericsson AB": "Ericsson",
+    # RISE
+    "RISE AB": "RISE",
+    "RISE": "RISE",
+    # Independent
+    "Independent Researcher": "Independent",
+    "Unaffiliated": "Independent",
+    "Individual Contributor": "Independent",
+    # Inria
+    "INRIA": "Inria",
+    # Google
+    "Google LLC": "Google",
+    "Google": "Google",
+    # Apple
+    "Apple Inc": "Apple",
+    "Apple, Inc": "Apple",
+    "Apple": "Apple",
+    # Amazon
+    "Amazon Web Services": "Amazon",
+    "AWS": "Amazon",
+    "Amazon": "Amazon",
+    # Siemens
+    "Siemens AG": "Siemens",
+    # ZTE
+    "ZTE": "ZTE Corporation",
+    # Telefonica
+    "Telefonica I+D": "Telefonica",
+    # Deutsche Telekom
+    "Deutsche Telecom": "Deutsche Telekom",
+    # InterDigital
+    "InterDigital Europe Ltd.": "InterDigital Europe",
+    # Boeing
+    "Boeing Technology Innovation": "Boeing",
+    "Boeing Research & Technology": "Boeing",
+    # Futurewei
+    "Futurewei Technologies USA": "Futurewei",
+    "Futurewei": "Futurewei",
+    # IBM
+    "IBM Research": "IBM",
+    "IBM": "IBM",
+    # China Telecom
+    "China Telecom Research Institute": "China Telecom",
+    # Beijing University (multiline variant from Datatracker)
+    "Beijing University of Posts and\n      Telecommunications": "BUPT",
+    "Beijing University of Posts and Telecommunications": "BUPT",
+    # AsiaInfo
+    "AsiaInfo Technologies (China) Inc.": "AsiaInfo",
+    "AsiaInfo Technologies (China) Inc": "AsiaInfo",
+    # Dept of CS
+    "Department of Computer Science and Engineering": "Department of Computer Science & Engineering",
+}
+
+# Common suffixes to strip for fuzzy matching
+_SUFFIXES = [
+    ", Inc.", ", Inc", " Inc.", " Inc",
+    " LLC", " Ltd.", " Ltd",
+    " AB", " GmbH", " Corp",
+    " Co., Ltd.", " Co., Ltd",
+    " Technologies",
+]
+
+
+def normalize_org(raw: str) -> str:
+    """Normalize an affiliation string to a canonical org name."""
+    raw = raw.strip()
+    if not raw:
+        return ""
+    # Exact match
+    if raw in ORG_ALIASES:
+        return ORG_ALIASES[raw]
+    # Fuzzy: strip suffixes and check again
+    stripped = raw
+    for suffix in _SUFFIXES:
+        if stripped.endswith(suffix):
+            stripped = stripped[: -len(suffix)].strip()
+            break
+    if stripped in ORG_ALIASES:
+        return ORG_ALIASES[stripped]
+    if stripped != raw and stripped:
+        # Check if the stripped form matches a canonical name directly
+        for canonical in set(ORG_ALIASES.values()):
+            if stripped.lower() == canonical.lower():
+                return canonical
+    return raw
+
+
+@dataclass
+class Bloc:
+    """A team of authors who consistently co-author together."""
+
+    members: list[tuple[int, str, str]]  # (person_id, name, normalized_org)
+    shared_drafts: int  # drafts where >= 2 members co-author
+    primary_org: str
+    cohesion: float  # avg pairwise cohesion
+
+    @property
+    def member_pids(self) -> set[int]:
+        return {pid for pid, _, _ in self.members}
+
+    @property
+    def label(self) -> str:
+        return f"{self.primary_org} team ({len(self.members)})"
+
+
+def detect_blocs(
+    db: Database,
+    cohesion_threshold: float = 0.70,
+    min_size: int = 2,
+    min_shared_drafts: int = 2,
+) -> list[Bloc]:
+    """Detect team blocs where all member pairs share >= threshold of drafts.
+
+    Uses connected components on a cohesion-filtered co-author graph,
+    then merges overlapping groups into single blocs.
+    """
+    draft_counts = db.author_draft_counts()
+    draft_sets = db.author_draft_sets()
+
+    # Get enriched pair data with person_ids
+    rows = db.conn.execute(
+        """SELECT a1.name, da1.person_id, a2.name, da2.person_id, COUNT(*) as shared
+        FROM draft_authors da1
+        JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
+            AND da1.person_id < da2.person_id
+        JOIN authors a1 ON da1.person_id = a1.person_id
+        JOIN authors a2 ON da2.person_id = a2.person_id
+        GROUP BY da1.person_id, da2.person_id
+        HAVING shared >= ?
+        ORDER BY shared DESC""",
+        (min_shared_drafts,),
+    ).fetchall()
+
+    # Get affiliations per person
+    aff_rows = db.conn.execute(
+        "SELECT person_id, affiliation FROM authors"
+    ).fetchall()
+    person_aff = {r[0]: normalize_org(r[1]) for r in aff_rows}
+    person_name: dict[int, str] = {}
+
+    # Build cohesion-filtered adjacency: only keep edges with high overlap
+    adj: dict[int, set[int]] = defaultdict(set)
+    pair_shared: dict[tuple[int, int], int] = {}
+    pair_cohesion: dict[tuple[int, int], float] = {}
+
+    for r in rows:
+        name_a, pid_a, name_b, pid_b, shared = r[0], r[1], r[2], r[3], r[4]
+        person_name[pid_a] = name_a
+        person_name[pid_b] = name_b
+        min_d = min(draft_counts.get(pid_a, 1), draft_counts.get(pid_b, 1))
+        cohesion = shared / min_d
+        if cohesion >= cohesion_threshold:
+            adj[pid_a].add(pid_b)
+            adj[pid_b].add(pid_a)
+            key = (min(pid_a, pid_b), max(pid_a, pid_b))
+            pair_shared[key] = shared
+            pair_cohesion[key] = cohesion
+
+    # Find connected components (each component = one merged bloc)
+    visited: set[int] = set()
+    components: list[set[int]] = []
+    for pid in adj:
+        if pid in visited:
+            continue
+        component: set[int] = set()
+        stack = [pid]
+        while stack:
+            node = stack.pop()
+            if node in visited:
+                continue
+            visited.add(node)
+            component.add(node)
+            stack.extend(adj[node] - visited)
+        if len(component) >= min_size:
+            components.append(component)
+
+    # Build Bloc objects from components
+    blocs = []
+    for comp in components:
+        members = [
+            (pid, person_name.get(pid, "?"), person_aff.get(pid, ""))
+            for pid in comp
+        ]
+
+        # Shared drafts = drafts where >= 2 bloc members appear
+        all_drafts: dict[str, int] = defaultdict(int)
+        for pid in comp:
+            for d in draft_sets.get(pid, set()):
+                all_drafts[d] += 1
+        shared_count = sum(1 for cnt in all_drafts.values() if cnt >= 2)
+
+        # Primary org = most common among members
+        org_counts: dict[str, int] = defaultdict(int)
+        for _, _, org in members:
+            if org:
+                org_counts[org] += 1
+        primary = max(org_counts, key=org_counts.get) if org_counts else ""
+
+        # Average pairwise cohesion (only for connected pairs)
+        edges = [
+            pair_cohesion[key]
+            for a in comp for b in comp if a < b
+            for key in [(a, b)] if key in pair_cohesion
+        ]
+        avg_coh = sum(edges) / len(edges) if edges else 0
+
+        blocs.append(Bloc(
+            members=sorted(members, key=lambda m: -len(draft_sets.get(m[0], set()))),
+            shared_drafts=shared_count,
+            primary_org=primary,
+            cohesion=avg_coh,
+        ))
+
+    # Sort: most shared drafts first (the interesting ones)
+    blocs.sort(key=lambda b: (-b.shared_drafts, -len(b.members)))
+    return blocs
+
+
+def top_orgs_normalized(
+    db: Database, limit: int = 20
+) -> list[tuple[str, int, int]]:
+    """Return (canonical_org, unique_authors, unique_drafts) with merged orgs."""
+    raw = db.org_data_raw()
+
+    org_people: dict[str, set[int]] = defaultdict(set)
+    org_drafts: dict[str, set[str]] = defaultdict(set)
+
+    for aff, pid, draft_name in raw:
+        canonical = normalize_org(aff)
+        if canonical:
+            org_people[canonical].add(pid)
+            org_drafts[canonical].add(draft_name)
+
+    results = [
+        (org, len(org_people[org]), len(org_drafts[org]))
+        for org in org_people
+    ]
+    results.sort(key=lambda x: -x[2])
+    return results[:limit]
+
+
+def cross_org_normalized(
+    db: Database, limit: int = 20
+) -> list[tuple[str, str, int]]:
+    """Return (org_a, org_b, shared_drafts) with normalized org names."""
+    # Get all (aff_a, aff_b, draft_name) cross-org triples
+    rows = db.conn.execute(
+        """SELECT da1.affiliation, da2.affiliation, da1.draft_name
+        FROM draft_authors da1
+        JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
+            AND da1.person_id < da2.person_id
+        WHERE da1.affiliation != '' AND da2.affiliation != ''"""
+    ).fetchall()
+
+    pair_drafts: dict[tuple[str, str], set[str]] = defaultdict(set)
+    for aff_a, aff_b, draft_name in rows:
+        norm_a = normalize_org(aff_a)
+        norm_b = normalize_org(aff_b)
+        if norm_a and norm_b and norm_a != norm_b:
+            key = tuple(sorted([norm_a, norm_b]))
+            pair_drafts[key].add(draft_name)
+
+    results = [
+        (org_a, org_b, len(drafts))
+        for (org_a, org_b), drafts in pair_drafts.items()
+    ]
+    results.sort(key=lambda x: -x[2])
+    return results[:limit]
--- a/src/ietf_analyzer/pipeline/init.py
+++ b/src/ietf_analyzer/pipeline/init.py
@@ -0,0 +1,6 @@
+"""Gap-to-Draft generation pipeline."""
+from .context import ContextBuilder
+from .generator import PipelineGenerator
+from .quality import QualityGates
+from .family import FamilyCoordinator
+from .formatter import DraftFormatter
--- a/src/ietf_analyzer/pipeline/context.py
+++ b/src/ietf_analyzer/pipeline/context.py
@@ -0,0 +1,259 @@
+"""Context builder — assembles rich context for draft generation from DB queries."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+from rich.console import Console
+
+from ..config import Config
+from ..db import Database
+
+console = Console()
+
+
+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    dot = np.dot(a, b)
+    norm = np.linalg.norm(a) * np.linalg.norm(b)
+    if norm == 0:
+        return 0.0
+    return float(dot / norm)
+
+
+class ContextBuilder:
+    def __init__(self, config: Config, db: Database):
+        self.config = config
+        self.db = db
+
+    def build_context(self, gap_topic: str) -> dict:
+        """Assemble full context for a gap topic. All DB queries, zero Claude calls."""
+        gap = self._find_gap(gap_topic)
+        if not gap:
+            console.print(f"[yellow]No gap found matching '{gap_topic}', using topic as-is[/]")
+            gap = {
+                "id": 0,
+                "topic": gap_topic,
+                "description": gap_topic,
+                "category": "",
+                "evidence": "",
+                "severity": "medium",
+            }
+
+        ideas = self._convergent_ideas(gap)
+        rfcs = self._rfc_foundations(gap.get("category", ""))
+        similar = self._similar_drafts(gap["description"])
+        top_rated = self._top_rated_in_category(gap.get("category", ""))
+        wg_context = self._wg_context()
+        ecosystem = self._ecosystem_vision()
+        siblings = self._sibling_context(gap_topic)
+
+        return {
+            "gap": gap,
+            "convergent_ideas": ideas,
+            "rfc_foundations": rfcs,
+            "similar_drafts": similar,
+            "top_rated": top_rated,
+            "wg_context": wg_context,
+            "ecosystem_vision": ecosystem,
+            "sibling_context": siblings,
+        }
+
+    def _find_gap(self, topic: str) -> dict | None:
+        """Find a gap by topic string (fuzzy match)."""
+        gaps = self.db.all_gaps()
+        topic_lower = topic.lower()
+        # Exact match first
+        for g in gaps:
+            if g["topic"].lower() == topic_lower:
+                return g
+        # Substring match
+        for g in gaps:
+            if topic_lower in g["topic"].lower() or topic_lower in g["description"].lower():
+                return g
+        # Word overlap match
+        topic_words = set(topic_lower.split())
+        best = None
+        best_score = 0
+        for g in gaps:
+            gap_words = set(g["topic"].lower().split()) | set(g["description"].lower().split())
+            overlap = len(topic_words & gap_words)
+            if overlap > best_score:
+                best_score = overlap
+                best = g
+        return best if best_score >= 2 else None
+
+    def _convergent_ideas(self, gap: dict, limit: int = 20) -> list[dict]:
+        """Find ideas that converge on this gap topic via keyword matching."""
+        all_ideas = self.db.all_ideas()
+        if not all_ideas:
+            return []
+
+        # Build search terms from gap topic + description
+        search_text = (gap["topic"] + " " + gap["description"]).lower()
+        search_words = set(search_text.split())
+        # Remove common words
+        stop_words = {"the", "a", "an", "and", "or", "in", "of", "for", "to", "is",
+                       "are", "that", "this", "with", "not", "by", "on", "at", "from",
+                       "as", "be", "it", "no", "but", "has", "have", "do", "does"}
+        search_words -= stop_words
+
+        scored = []
+        for idea in all_ideas:
+            idea_text = (idea["title"] + " " + idea["description"]).lower()
+            idea_words = set(idea_text.split())
+            overlap = len(search_words & idea_words)
+            if overlap >= 1:
+                scored.append((overlap, idea))
+
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [item for _, item in scored[:limit]]
+
+    def _rfc_foundations(self, category: str, limit: int = 10) -> list[tuple[str, int]]:
+        """Get most-referenced RFCs, optionally filtered by category."""
+        top_refs = self.db.top_referenced(ref_type="rfc", limit=limit * 2)
+        if not category:
+            return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
+
+        # Filter to RFCs referenced by drafts in this category
+        category_lower = category.lower()
+        pairs = self.db.drafts_with_ratings(limit=500)
+        category_drafts = set()
+        for draft, rating in pairs:
+            for cat in rating.categories:
+                if category_lower in cat.lower():
+                    category_drafts.add(draft.name)
+
+        if not category_drafts:
+            return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
+
+        filtered = []
+        for ref_id, count, draft_names in top_refs:
+            cat_count = sum(1 for d in draft_names if d in category_drafts)
+            if cat_count > 0:
+                filtered.append((ref_id, cat_count))
+
+        filtered.sort(key=lambda x: x[1], reverse=True)
+        return filtered[:limit]
+
+    def _similar_drafts(self, gap_desc: str, limit: int = 8) -> list[tuple[str, float]]:
+        """Find semantically similar existing drafts via embeddings."""
+        all_embeddings = self.db.all_embeddings()
+        if not all_embeddings:
+            return []
+
+        # Try to embed the gap description via Ollama
+        try:
+            import ollama as ollama_lib
+            client = ollama_lib.Client(host=self.config.ollama_url)
+            resp = client.embed(
+                model=self.config.ollama_embed_model,
+                input=gap_desc[:8000],
+            )
+            gap_vec = np.array(resp["embeddings"][0], dtype=np.float32)
+        except Exception as e:
+            console.print(f"[yellow]Ollama embedding failed, skipping similarity: {e}[/]")
+            return []
+
+        similarities = []
+        for name, vec in all_embeddings.items():
+            sim = _cosine_similarity(gap_vec, vec)
+            similarities.append((name, sim))
+
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:limit]
+
+    def _top_rated_in_category(self, category: str, limit: int = 5) -> list[tuple]:
+        """Get top-rated drafts in a category."""
+        pairs = self.db.drafts_with_ratings(limit=500)
+        if not category:
+            return [
+                (draft.name, draft.title, rating.composite_score)
+                for draft, rating in pairs[:limit]
+            ]
+
+        category_lower = category.lower()
+        matching = []
+        for draft, rating in pairs:
+            for cat in rating.categories:
+                if category_lower in cat.lower():
+                    matching.append((draft.name, draft.title, rating.composite_score))
+                    break
+
+        return matching[:limit]
+
+    def _wg_context(self) -> str:
+        """Summarize WG adoption status."""
+        adoption = self.db.draft_adoption_status()
+        wg_counts: dict[str, int] = {}
+        adopted_count = 0
+        for d in adoption:
+            if d["wg_adopted"]:
+                adopted_count += 1
+                wg = d["wg_name"]
+                wg_counts[wg] = wg_counts.get(wg, 0) + 1
+
+        total = len(adoption)
+        if not wg_counts:
+            return f"{total} drafts, none WG-adopted yet."
+
+        top_wgs = sorted(wg_counts.items(), key=lambda x: x[1], reverse=True)[:5]
+        wg_lines = ", ".join(f"{wg} ({n})" for wg, n in top_wgs)
+        return f"{total} drafts, {adopted_count} WG-adopted. Top WGs: {wg_lines}"
+
+    def _ecosystem_vision(self) -> str:
+        """Load ecosystem vision document if it exists."""
+        vision_path = Path(self.config.data_dir) / "reports" / "holistic-agent-ecosystem-draft-outlines.md"
+        if not vision_path.exists():
+            return "(No ecosystem vision document found)"
+
+        text = vision_path.read_text()
+        # Return the pitch section (compact) rather than the full document
+        if "## 8. One-Page Pitch" in text:
+            pitch = text.split("## 8. One-Page Pitch")[1].strip()
+            return pitch[:2000]
+        # Fallback: return the vision summary
+        if "## 1. Vision Summary" in text:
+            parts = text.split("## 1. Vision Summary")[1]
+            if "## 2." in parts:
+                parts = parts.split("## 2.")[0]
+            return parts.strip()[:2000]
+        return text[:2000]
+
+    def _sibling_context(self, gap_topic: str) -> list[dict]:
+        """Get outlines of sibling drafts from the same family."""
+        # Check all family drafts
+        families = self.db.get_generated_drafts()
+        if not families:
+            return []
+
+        # Find which family this gap_topic belongs to
+        topic_lower = gap_topic.lower()
+        family_name = ""
+        for gd in families:
+            if topic_lower in gd.get("gap_topic", "").lower():
+                family_name = gd.get("family_name", "")
+                break
+
+        if not family_name:
+            return []
+
+        siblings = self.db.get_family_drafts(family_name)
+        result = []
+        for s in siblings:
+            if s.get("gap_topic", "").lower() == topic_lower:
+                continue  # Skip self
+            outline = {}
+            if s.get("outline_json"):
+                try:
+                    outline = json.loads(s["outline_json"]) if isinstance(s["outline_json"], str) else s["outline_json"]
+                except (json.JSONDecodeError, TypeError):
+                    pass
+            result.append({
+                "role": s.get("family_role", ""),
+                "title": s.get("title", ""),
+                "abstract": s.get("abstract", ""),
+                "outline": outline,
+            })
+        return result
--- a/src/ietf_analyzer/pipeline/family.py
+++ b/src/ietf_analyzer/pipeline/family.py
@@ -0,0 +1,219 @@
+"""Family coordinator — orchestrates generation of the 5-draft ecosystem."""
+
+from __future__ import annotations
+
+import json
+
+from rich.console import Console
+
+from ..config import Config
+from ..db import Database
+from .generator import PipelineGenerator
+from .quality import QualityGates
+
+console = Console()
+
+FAMILY_DRAFTS = [
+    {
+        "role": "AEM",
+        "topic": "Agent Ecosystem Model",
+        "description": (
+            "Core architecture and terminology for the agent ecosystem. "
+            "Defines shared concepts: DAG execution model, HITL points, "
+            "assurance levels, protocol agnosticism. Foundation for all "
+            "companion drafts."
+        ),
+    },
+    {
+        "role": "ATD",
+        "topic": "Agent Task DAG",
+        "description": (
+            "Execution model using DAG structure with checkpoints and rollback. "
+            "Defines node semantics (pending/running/done/failed/rolled-back), "
+            "resource hints, circuit breakers, and rollback protocol. "
+            "Uses ECT as token and DAG format."
+        ),
+    },
+    {
+        "role": "HITL",
+        "topic": "Human-in-the-Loop",
+        "description": (
+            "Human oversight as first-class primitive. Approval gates, "
+            "escalation paths, emergency override (PAUSE/CONSTRAIN/STOP/TAKEOVER), "
+            "and explainability hooks. Integrates with DAG as HITL nodes."
+        ),
+    },
+    {
+        "role": "AEPB",
+        "topic": "Agent Ecosystem Protocol Bindings",
+        "description": (
+            "Cross-protocol interoperability layer. Capability advertisement, "
+            "protocol binding requirements, translation gateways, negotiation. "
+            "Makes ecosystem semantics available over any A2A protocol."
+        ),
+    },
+    {
+        "role": "APAE",
+        "topic": "Agent Provenance Assurance Ecosystem",
+        "description": (
+            "Trust, verification, and provenance for dual-regime operation. "
+            "Assurance profiles (relaxed/standard/regulated), behavior verification, "
+            "dynamic trust scoring (AIMD model), provenance chains. "
+            "Same stack from K8s to fully proven."
+        ),
+    },
+]
+
+
+class FamilyCoordinator:
+    def __init__(self, config: Config, db: Database, analyzer):
+        self.config = config
+        self.db = db
+        self.analyzer = analyzer
+        self.generator = PipelineGenerator(config, db, analyzer)
+        self.quality = QualityGates(config, db, analyzer)
+
+    def generate_family(self, family_name: str = "agent-ecosystem", cheap: bool = False) -> list[dict]:
+        """Generate all 5 drafts in order. AEM first, then B-E with sibling context."""
+        console.print(f"\n[bold cyan]Generating draft family: {family_name}[/]")
+        console.print(f"Drafts: {len(FAMILY_DRAFTS)}, cheap={cheap}")
+
+        # Log the generation run
+        run_id = self.db.log_generation_run({
+            "family_name": family_name,
+            "gap_ids": [d["role"] for d in FAMILY_DRAFTS],
+            "model_used": self.config.claude_model_cheap if cheap else self.config.claude_model,
+            "status": "running",
+        })
+
+        results = []
+        total_in = 0
+        total_out = 0
+
+        for i, draft_spec in enumerate(FAMILY_DRAFTS):
+            console.print(
+                f"\n[bold]{'='*60}[/]"
+                f"\n[bold]Draft {i+1}/{len(FAMILY_DRAFTS)}: "
+                f"[cyan]{draft_spec['role']}[/] — {draft_spec['topic']}[/]"
+                f"\n[bold]{'='*60}[/]"
+            )
+
+            try:
+                result = self.generator.generate_full(
+                    gap_topic=draft_spec["topic"],
+                    cheap=cheap,
+                    family_name=family_name,
+                    family_role=draft_spec["role"],
+                )
+                results.append(result)
+
+                # Run quality gates
+                draft_id = result.get("id")
+                if draft_id:
+                    console.print(f"\n[dim]Running quality gates for {draft_spec['role']}...[/]")
+                    qr = self.quality.run_all(draft_id)
+                    result["quality_results"] = qr
+
+            except Exception as e:
+                console.print(f"[red]Failed to generate {draft_spec['role']}: {e}[/]")
+                results.append({
+                    "role": draft_spec["role"],
+                    "topic": draft_spec["topic"],
+                    "error": str(e),
+                })
+
+        # Update run
+        self.db.update_generation_run(
+            run_id,
+            status="completed",
+            completed_at=_now_iso(),
+        )
+
+        # Summary
+        console.print(f"\n[bold cyan]{'='*60}[/]")
+        console.print(f"[bold]Family generation complete: {family_name}[/]")
+        successful = [r for r in results if "error" not in r]
+        console.print(f"  Generated: {len(successful)}/{len(FAMILY_DRAFTS)} drafts")
+        for r in results:
+            if "error" in r:
+                console.print(f"  [red]FAIL[/] {r['role']}: {r['error']}")
+            else:
+                console.print(f"  [green]OK[/] {r.get('family_role', '?')}: {r.get('title', '?')}")
+
+        return results
+
+    def check_consistency(self, family_name: str) -> dict:
+        """Check terminology consistency across family drafts."""
+        drafts = self.db.get_family_drafts(family_name)
+        if not drafts:
+            return {"consistent": False, "details": "No drafts found for family"}
+
+        # Collect terminology from all outlines
+        all_terms: dict[str, dict[str, str]] = {}  # term -> {role: definition}
+        for gd in drafts:
+            role = gd.get("family_role", "?")
+            outline_raw = gd.get("outline_json", "{}")
+            try:
+                outline = json.loads(outline_raw) if isinstance(outline_raw, str) else outline_raw
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+            terms = outline.get("terminology", {})
+            if not isinstance(terms, dict):
+                continue
+            for term, defn in terms.items():
+                term_lower = term.lower()
+                if term_lower not in all_terms:
+                    all_terms[term_lower] = {}
+                all_terms[term_lower][role] = defn
+
+        # Find terms used in multiple drafts
+        shared_terms = {t: roles for t, roles in all_terms.items() if len(roles) > 1}
+        if not shared_terms:
+            return {
+                "consistent": True,
+                "shared_terms": 0,
+                "details": "No shared terminology found across drafts",
+            }
+
+        # Check for inconsistencies (simple: different definitions for same term)
+        inconsistencies = []
+        for term, roles in shared_terms.items():
+            definitions = list(roles.values())
+            # Rough check: if definitions differ significantly
+            unique_defs = set(d.lower().strip().rstrip(".") for d in definitions)
+            if len(unique_defs) > 1:
+                inconsistencies.append({
+                    "term": term,
+                    "definitions": roles,
+                })
+
+        consistent = len(inconsistencies) == 0
+        details_parts = [f"{len(shared_terms)} shared terms across drafts"]
+        if inconsistencies:
+            details_parts.append(f"{len(inconsistencies)} inconsistencies found:")
+            for inc in inconsistencies:
+                details_parts.append(f"  '{inc['term']}': {inc['definitions']}")
+
+        console.print(f"\n[bold]Consistency check: {family_name}[/]")
+        console.print(f"  Shared terms: {len(shared_terms)}")
+        console.print(f"  Inconsistencies: {len(inconsistencies)}")
+        if consistent:
+            console.print("  [green]All terminology consistent[/]")
+        else:
+            for inc in inconsistencies:
+                console.print(f"  [yellow]Inconsistent: '{inc['term']}'[/]")
+                for role, defn in inc["definitions"].items():
+                    console.print(f"    {role}: {defn[:80]}")
+
+        return {
+            "consistent": consistent,
+            "shared_terms": len(shared_terms),
+            "inconsistencies": inconsistencies,
+            "details": "; ".join(details_parts),
+        }
+
+
+def _now_iso() -> str:
+    from datetime import datetime, timezone
+    return datetime.now(timezone.utc).isoformat()
--- a/src/ietf_analyzer/pipeline/formatter.py
+++ b/src/ietf_analyzer/pipeline/formatter.py
@@ -0,0 +1,203 @@
+"""Draft formatter — assembles outline + sections into I-D text format."""
+
+from __future__ import annotations
+
+import textwrap
+from datetime import datetime, timezone, timedelta
+
+
+class DraftFormatter:
+    @staticmethod
+    def format_draft(outline: dict, sections: list[str], family_name: str = "") -> str:
+        """Assemble outline + sections into I-D text format."""
+        title = outline["title"]
+        draft_name = DraftFormatter._make_draft_name(title, family_name)
+
+        parts = []
+        parts.append(DraftFormatter._header_block(outline, draft_name))
+        parts.append("")
+        parts.append("Abstract")
+        parts.append("")
+        parts.append(DraftFormatter._wrap_text(outline.get("abstract", "")))
+        parts.append("")
+        parts.append(DraftFormatter._status_memo(outline))
+        parts.append("")
+
+        # Terminology section (if outline has terminology)
+        terms = outline.get("terminology", {})
+        if terms:
+            parts.append(DraftFormatter._terminology_section(outline))
+            parts.append("")
+
+        # Table of Contents
+        parts.append("Table of Contents")
+        parts.append("")
+        section_list = outline.get("sections", [])
+        for i, section in enumerate(section_list, 1):
+            stitle = section.get("title", f"Section {i}")
+            dots = "." * max(1, 60 - len(stitle))
+            parts.append(f"   {i}.  {stitle}  {dots}  {i + 2}")
+        ref_num = len(section_list) + 1
+        parts.append(f"   {ref_num}.  References  {'.' * (60 - len('References'))}  {ref_num + 2}")
+        parts.append("")
+
+        # Sections
+        for i, (section_info, section_text) in enumerate(
+            zip(section_list, sections), 1
+        ):
+            stitle = section_info.get("title", f"Section {i}")
+            parts.append(f"{i}.  {stitle}")
+            parts.append("")
+            parts.append(DraftFormatter._wrap_text(section_text))
+            parts.append("")
+
+        # References section
+        parts.append(DraftFormatter._references_section(outline))
+        parts.append("")
+
+        # Author's Address
+        parts.append("Author's Address")
+        parts.append("")
+        parts.append("   Generated by IETF Draft Analyzer")
+        if family_name:
+            parts.append(f"   Family: {family_name}")
+        parts.append(f"   {datetime.now(timezone.utc).strftime('%Y-%m-%d')}")
+        parts.append("")
+
+        return "\n".join(parts)
+
+    @staticmethod
+    def _make_draft_name(title: str, family_name: str = "") -> str:
+        """Generate a draft name from title."""
+        words = title.lower().split()
+        slug = "-".join(w for w in words[:4] if w.isalnum())
+        if family_name:
+            return f"draft-{family_name}-{slug}-00"
+        return f"draft-ai-{slug}-00"
+
+    @staticmethod
+    def _header_block(outline: dict, draft_name: str) -> str:
+        """Proper I-D header."""
+        now = datetime.now(timezone.utc)
+        expires = now + timedelta(days=185)
+        date_str = now.strftime("%B %Y")
+        exp_str = expires.strftime("%B %d, %Y")
+        status = outline.get("intended_status", "Informational")
+        wg = outline.get("target_wg", "individual")
+        title = outline["title"]
+
+        lines = []
+        lines.append(f"Internet-Draft{' ' * 45}{wg}")
+        lines.append(f"Intended status: {status:<44s}{date_str}")
+        lines.append(f"Expires: {exp_str}")
+        lines.append("")
+        lines.append("")
+        lines.append(f"         {title}")
+        lines.append(f"         {draft_name}")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _status_memo(outline: dict) -> str:
+        """Status of This Memo boilerplate."""
+        status = outline.get("intended_status", "Informational")
+        lines = []
+        lines.append("Status of This Memo")
+        lines.append("")
+        lines.append(DraftFormatter._wrap_text(
+            "This Internet-Draft is submitted in full conformance with the "
+            "provisions of BCP 78 and BCP 79."
+        ))
+        lines.append("")
+        lines.append(DraftFormatter._wrap_text(
+            f"This document is intended to have {status} status. "
+            "Distribution of this memo is unlimited."
+        ))
+        return "\n".join(lines)
+
+    @staticmethod
+    def _references_section(outline: dict) -> str:
+        """Normative + Informative References from outline data."""
+        lines = []
+        norm_refs = outline.get("normative_refs", [])
+        info_refs = outline.get("informative_refs", [])
+
+        ref_num = len(outline.get("sections", [])) + 1
+        lines.append(f"{ref_num}.  References")
+        lines.append("")
+
+        if norm_refs:
+            lines.append(f"{ref_num}.1.  Normative References")
+            lines.append("")
+            for ref in norm_refs:
+                lines.append(f"   [{ref}]")
+                lines.append(f"         {ref}")
+                lines.append("")
+
+        if info_refs:
+            sub = "2" if norm_refs else "1"
+            lines.append(f"{ref_num}.{sub}.  Informative References")
+            lines.append("")
+            for ref in info_refs:
+                lines.append(f"   [{ref}]")
+                lines.append(f"         {ref}")
+                lines.append("")
+
+        if not norm_refs and not info_refs:
+            lines.append("   (No references specified)")
+            lines.append("")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def _terminology_section(outline: dict) -> str:
+        """Terminology section from outline terminology dict."""
+        terms = outline.get("terminology", {})
+        if not terms:
+            return ""
+
+        lines = []
+        lines.append("Terminology")
+        lines.append("")
+        lines.append(DraftFormatter._wrap_text(
+            'The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL '
+            'NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", '
+            '"MAY", and "OPTIONAL" in this document are to be interpreted as '
+            'described in BCP 14 [RFC2119] [RFC8174] when, and only when, they '
+            'appear in all capitals, as shown here.'
+        ))
+        lines.append("")
+
+        for term, definition in terms.items():
+            lines.append(f"   {term}")
+            lines.append(DraftFormatter._wrap_text(definition, indent=6))
+            lines.append("")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def _wrap_text(text: str, indent: int = 3, width: int = 69) -> str:
+        """72-char line wrapping for I-D format."""
+        prefix = " " * indent
+        paragraphs = text.strip().split("\n\n")
+        wrapped = []
+        for para in paragraphs:
+            # Preserve list items
+            if para.strip().startswith("-") or para.strip().startswith("*"):
+                inner_lines = para.strip().split("\n")
+                for line in inner_lines:
+                    line = line.strip()
+                    sub_lines = textwrap.wrap(
+                        line, width=width,
+                        initial_indent=prefix,
+                        subsequent_indent=prefix + "  ",
+                    )
+                    wrapped.append("\n".join(sub_lines))
+            else:
+                para = " ".join(para.split())  # Normalize whitespace
+                lines = textwrap.wrap(
+                    para, width=width,
+                    initial_indent=prefix,
+                    subsequent_indent=prefix,
+                )
+                wrapped.append("\n".join(lines))
+        return "\n\n".join(wrapped)
--- a/src/ietf_analyzer/pipeline/generator.py
+++ b/src/ietf_analyzer/pipeline/generator.py
@@ -0,0 +1,269 @@
+"""Pipeline generator — enhanced outline + section generation with rich context."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from datetime import datetime, timezone
+
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from ..config import Config
+from ..db import Database
+from .context import ContextBuilder
+from .prompts import OUTLINE_PROMPT_V2, SECTION_PROMPT_V2
+from .formatter import DraftFormatter
+
+console = Console()
+
+
+def _prompt_hash(text: str) -> str:
+    return hashlib.sha256(text.encode()).hexdigest()[:16]
+
+
+class PipelineGenerator:
+    def __init__(self, config: Config, db: Database, analyzer):
+        self.config = config
+        self.db = db
+        self.analyzer = analyzer
+        self.context_builder = ContextBuilder(config, db)
+
+    def _format_ideas_for_prompt(self, ideas: list[dict]) -> str:
+        if not ideas:
+            return "(none found)"
+        lines = []
+        for idea in ideas:
+            lines.append(
+                f"- [{idea.get('type', '?')}] {idea['title']}: "
+                f"{idea['description']} (from {idea.get('draft_name', '?')})"
+            )
+        return "\n".join(lines)
+
+    def _format_rfcs_for_prompt(self, rfcs: list[tuple[str, int]]) -> str:
+        if not rfcs:
+            return "(none found)"
+        return "\n".join(f"- RFC {ref_id} (cited by {count} drafts)" for ref_id, count in rfcs)
+
+    def _format_similar_for_prompt(self, similar: list[tuple[str, float]]) -> str:
+        if not similar:
+            return "(none found)"
+        lines = []
+        for name, sim in similar:
+            draft = self.db.get_draft(name)
+            title = draft.title if draft else name
+            lines.append(f"- {name}: {title} (similarity: {sim:.2f})")
+        return "\n".join(lines)
+
+    def _format_top_rated_for_prompt(self, top_rated: list[tuple]) -> str:
+        if not top_rated:
+            return "(none found)"
+        return "\n".join(
+            f"- {name}: {title} (score: {score:.1f})"
+            for name, title, score in top_rated
+        )
+
+    def _format_siblings_for_prompt(self, siblings: list[dict]) -> str:
+        if not siblings:
+            return "(none — this is the first draft in the family)"
+        lines = []
+        for s in siblings:
+            role = s.get("role", "?")
+            title = s.get("title", "?")
+            abstract = s.get("abstract", "")[:200]
+            outline = s.get("outline", {})
+            sections = outline.get("sections", [])
+            section_titles = [sec.get("title", "") for sec in sections]
+            lines.append(
+                f"- [{role}] {title}\n"
+                f"  Abstract: {abstract}\n"
+                f"  Sections: {', '.join(section_titles)}"
+            )
+        return "\n".join(lines)
+
+    def _format_terminology_for_prompt(self, outline: dict) -> str:
+        terms = outline.get("terminology", {})
+        if not terms:
+            return "(none defined yet)"
+        return "\n".join(f"- **{term}**: {defn}" for term, defn in terms.items())
+
+    def generate_outline(self, context: dict, cheap: bool = False) -> dict:
+        """Generate outline from assembled context. Returns outline dict."""
+        gap = context["gap"]
+
+        prompt = OUTLINE_PROMPT_V2.format(
+            gap_topic=gap["topic"],
+            gap_description=gap["description"],
+            gap_category=gap.get("category", ""),
+            gap_evidence=gap.get("evidence", ""),
+            gap_severity=gap.get("severity", "medium"),
+            convergent_ideas=self._format_ideas_for_prompt(context["convergent_ideas"]),
+            rfc_foundations=self._format_rfcs_for_prompt(context["rfc_foundations"]),
+            similar_drafts=self._format_similar_for_prompt(context["similar_drafts"]),
+            top_rated=self._format_top_rated_for_prompt(context["top_rated"]),
+            wg_context=context["wg_context"],
+            ecosystem_vision=context["ecosystem_vision"],
+            sibling_context=self._format_siblings_for_prompt(context["sibling_context"]),
+        )
+
+        phash = _prompt_hash("pipeline-outline-" + prompt)
+        cache_key = f"_pipeline_{gap['topic']}_"
+
+        # Check cache
+        cached = self.db.get_cached_response(cache_key, phash)
+        if cached:
+            try:
+                return json.loads(cached)
+            except (json.JSONDecodeError, KeyError):
+                pass
+
+        text, in_tok, out_tok = self.analyzer._call_claude(
+            prompt, max_tokens=4096, cheap=cheap
+        )
+        text = self.analyzer._extract_json(text)
+        outline = json.loads(text)
+
+        self.db.cache_response(
+            cache_key, phash,
+            self.config.claude_model_cheap if cheap else self.config.claude_model,
+            prompt, text, in_tok, out_tok,
+        )
+
+        return outline
+
+    def generate_section(self, outline: dict, section_idx: int, context: dict, cheap: bool = False) -> str:
+        """Generate a single section with relevant ideas and refs."""
+        sections = outline["sections"]
+        section = sections[section_idx]
+
+        outline_text = "\n".join(
+            f"{i+1}. {s['title']}: {s.get('summary', '')}"
+            for i, s in enumerate(sections)
+        )
+
+        # Find ideas relevant to this section
+        key_ideas = section.get("key_ideas", [])
+        relevant_ideas = []
+        if key_ideas and context["convergent_ideas"]:
+            for idea in context["convergent_ideas"]:
+                for key in key_ideas:
+                    if key.lower() in idea["title"].lower() or key.lower() in idea["description"].lower():
+                        relevant_ideas.append(idea)
+                        break
+        if not relevant_ideas:
+            # Use top 3 convergent ideas as fallback
+            relevant_ideas = context["convergent_ideas"][:3]
+
+        # Format RFC refs
+        rfc_refs = ""
+        norm_refs = outline.get("normative_refs", [])
+        info_refs = outline.get("informative_refs", [])
+        all_refs = norm_refs + info_refs
+        if all_refs:
+            rfc_refs = "\n".join(f"- {ref}" for ref in all_refs[:10])
+        else:
+            rfc_refs = self._format_rfcs_for_prompt(context["rfc_foundations"][:5])
+
+        # Format cross-references to siblings
+        cross_refs = self._format_siblings_for_prompt(context["sibling_context"])
+
+        prompt = SECTION_PROMPT_V2.format(
+            draft_title=outline["title"],
+            abstract=outline["abstract"],
+            outline_text=outline_text,
+            section_num=section_idx + 1,
+            section_title=section["title"],
+            section_summary=section.get("summary", ""),
+            relevant_ideas=self._format_ideas_for_prompt(relevant_ideas),
+            rfc_refs=rfc_refs,
+            cross_refs=cross_refs,
+            terminology=self._format_terminology_for_prompt(outline),
+        )
+
+        phash = _prompt_hash("pipeline-section-" + prompt)
+        cache_key = f"_pipeline_{outline['title']}_s{section_idx}_"
+
+        # Check cache
+        cached = self.db.get_cached_response(cache_key, phash)
+        if cached:
+            return cached
+
+        text, in_tok, out_tok = self.analyzer._call_claude(
+            prompt, max_tokens=2048, cheap=cheap
+        )
+
+        self.db.cache_response(
+            cache_key, phash,
+            self.config.claude_model_cheap if cheap else self.config.claude_model,
+            prompt, text, in_tok, out_tok,
+        )
+
+        return text
+
+    def generate_full(self, gap_topic: str, cheap: bool = False,
+                      family_name: str = "", family_role: str = "") -> dict:
+        """Full pipeline: context -> outline -> sections -> assemble -> store in DB."""
+        console.print(f"\n[bold]Pipeline: {gap_topic}[/]")
+
+        # Step 1: Build context
+        console.print("[dim]Step 1/4:[/] Building context...")
+        context = self.context_builder.build_context(gap_topic)
+        console.print(
+            f"  Ideas: {len(context['convergent_ideas'])}, "
+            f"RFCs: {len(context['rfc_foundations'])}, "
+            f"Similar: {len(context['similar_drafts'])}, "
+            f"Siblings: {len(context['sibling_context'])}"
+        )
+
+        # Step 2: Generate outline
+        console.print("[dim]Step 2/4:[/] Generating outline...")
+        outline = self.generate_outline(context, cheap=cheap)
+        console.print(f"  Title: [cyan]{outline['title']}[/]")
+        console.print(f"  Sections: {len(outline['sections'])}")
+        console.print(f"  Status: {outline.get('intended_status', '?')}")
+
+        # Step 3: Generate sections
+        console.print("[dim]Step 3/4:[/] Generating sections...")
+        sections = []
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Writing...", total=len(outline["sections"]))
+            for i, s in enumerate(outline["sections"]):
+                progress.update(task, description=f"Section: {s['title'][:30]}")
+                text = self.generate_section(outline, i, context, cheap=cheap)
+                sections.append(text)
+                progress.advance(task)
+
+        # Step 4: Assemble and store
+        console.print("[dim]Step 4/4:[/] Assembling draft...")
+        full_text = DraftFormatter.format_draft(outline, sections, family_name=family_name)
+
+        # Generate draft name from title
+        words = outline["title"].lower().split()
+        slug = "-".join(w for w in words[:4] if w.isalnum())
+        draft_name = f"draft-ai-{slug}-00"
+
+        data = {
+            "gap_topic": gap_topic,
+            "draft_name": draft_name,
+            "title": outline["title"],
+            "abstract": outline.get("abstract", ""),
+            "outline": outline,
+            "sections": sections,
+            "full_text": full_text,
+            "family_name": family_name,
+            "family_role": family_role,
+            "version": 0,
+            "status": "draft",
+        }
+
+        draft_id = self.db.upsert_generated_draft(data)
+        console.print(f"  Stored as generated_draft id={draft_id}, name={draft_name}")
+
+        data["id"] = draft_id
+        return data
--- a/src/ietf_analyzer/pipeline/prompts.py
+++ b/src/ietf_analyzer/pipeline/prompts.py
@@ -0,0 +1,92 @@
+"""Prompt templates for the gap-to-draft generation pipeline."""
+
+from __future__ import annotations
+
+OUTLINE_PROMPT_V2 = """\
+You are writing an IETF Internet-Draft to address a gap in the AI/agent standardization landscape.
+
+## Gap to Address
+Topic: {gap_topic}
+Description: {gap_description}
+Category: {gap_category}
+Evidence: {gap_evidence}
+Severity: {gap_severity}
+
+## Convergent Ideas from Existing Drafts
+These ideas from the current landscape converge on this topic — build on them, don't duplicate:
+{convergent_ideas}
+
+## RFC Foundations
+Most-referenced RFCs in this space — cite where relevant:
+{rfc_foundations}
+
+## Similar Existing Drafts
+These drafts are closest to this gap — differentiate from them:
+{similar_drafts}
+
+## Top-Rated Drafts in Category
+Drafts the community considers strong in this area:
+{top_rated}
+
+## Working Group Context
+{wg_context}
+
+## Ecosystem Vision
+{ecosystem_vision}
+
+## Sibling Drafts (same family)
+{sibling_context}
+
+Generate a detailed outline for an Internet-Draft that fills this gap.
+Return JSON:
+{{
+  "title": "full draft title",
+  "abstract": "150-250 word abstract",
+  "sections": [
+    {{"title": "section title", "summary": "2-3 sentence summary of content", "key_ideas": ["idea titles to incorporate"]}}
+  ],
+  "normative_refs": ["RFC NNNN", "draft-name"],
+  "informative_refs": ["RFC NNNN", "draft-name"],
+  "terminology": {{"term": "definition"}},
+  "target_wg": "suggested IETF working group",
+  "intended_status": "informational|standards-track|experimental"
+}}
+
+Requirements:
+- Include standard sections: Introduction, Terminology, Problem Statement, then 2-4 technical sections, Security Considerations, IANA Considerations
+- Reference specific RFCs and drafts from the context above
+- Use terminology consistent with sibling drafts if any
+- Abstract should clearly state the problem, approach, and contribution
+JSON only, no fences."""
+
+SECTION_PROMPT_V2 = """\
+Write the following section of an Internet-Draft titled "{draft_title}".
+
+Abstract: {abstract}
+
+Full outline:
+{outline_text}
+
+Write section {section_num}: {section_title}
+Summary: {section_summary}
+
+## Relevant Ideas to Incorporate
+{relevant_ideas}
+
+## RFC References to Cite
+{rfc_refs}
+
+## Cross-References to Sister Drafts
+{cross_refs}
+
+## Terminology
+{terminology}
+
+Follow IETF Internet-Draft conventions:
+- Formal, precise technical language
+- Use RFC 2119 keywords (MUST, SHOULD, MAY) where appropriate
+- Reference existing RFCs and drafts where relevant (use [RFCNNNN] format)
+- 3-6 paragraphs per section
+- Use the terminology definitions provided above consistently
+
+Write the section content only (no section number or title). Plain text."""
--- a/src/ietf_analyzer/pipeline/quality.py
+++ b/src/ietf_analyzer/pipeline/quality.py
@@ -0,0 +1,277 @@
+"""Quality gates for generated drafts — novelty, references, format, self-rating."""
+
+from __future__ import annotations
+
+import json
+import re
+from datetime import datetime, timezone
+
+import numpy as np
+from rich.console import Console
+
+from ..config import Config
+from ..db import Database
+
+console = Console()
+
+
+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    dot = np.dot(a, b)
+    norm = np.linalg.norm(a) * np.linalg.norm(b)
+    if norm == 0:
+        return 0.0
+    return float(dot / norm)
+
+
+REQUIRED_SECTIONS = ["introduction", "security considerations", "iana considerations"]
+
+
+class QualityGates:
+    def __init__(self, config: Config, db: Database, analyzer):
+        self.config = config
+        self.db = db
+        self.analyzer = analyzer
+
+    def run_all(self, draft_id: int) -> dict:
+        """Run all quality gates. Returns {gate_name: {passed: bool, score: float, details: str}}"""
+        results = {}
+        results["novelty"] = self.check_novelty(draft_id)
+        results["references"] = self.check_references(draft_id)
+        results["format"] = self.check_format(draft_id)
+        results["self_rating"] = self.check_self_rating(draft_id)
+
+        passed = sum(1 for r in results.values() if r["passed"])
+        total = len(results)
+        console.print(
+            f"Quality gates: [{'green' if passed == total else 'yellow'}]"
+            f"{passed}/{total} passed[/]"
+        )
+        for name, result in results.items():
+            status = "[green]PASS[/]" if result["passed"] else "[red]FAIL[/]"
+            console.print(f"  {status} {name}: {result['details']}")
+
+        return results
+
+    def check_novelty(self, draft_id: int) -> dict:
+        """Embed generated abstract, compare against all existing drafts.
+        Flag if max_similarity > 0.90."""
+        gd = self.db.get_generated_draft(draft_id)
+        if not gd:
+            return {"passed": False, "score": 0.0, "details": "Draft not found"}
+
+        abstract = gd.get("abstract", "")
+        title = gd.get("title", "")
+        text_to_embed = f"{title}\n\n{abstract}"
+
+        if not text_to_embed.strip():
+            return {"passed": False, "score": 0.0, "details": "No abstract to check"}
+
+        # Embed via Ollama
+        try:
+            import ollama as ollama_lib
+            client = ollama_lib.Client(host=self.config.ollama_url)
+            resp = client.embed(
+                model=self.config.ollama_embed_model,
+                input=text_to_embed[:8000],
+            )
+            gen_vec = np.array(resp["embeddings"][0], dtype=np.float32)
+        except Exception as e:
+            return {"passed": True, "score": 0.0,
+                    "details": f"Ollama unavailable, skipping novelty check: {e}"}
+
+        all_embeddings = self.db.all_embeddings()
+        if not all_embeddings:
+            return {"passed": True, "score": 1.0, "details": "No existing embeddings to compare"}
+
+        max_sim = 0.0
+        most_similar = ""
+        for name, vec in all_embeddings.items():
+            sim = _cosine_similarity(gen_vec, vec)
+            if sim > max_sim:
+                max_sim = sim
+                most_similar = name
+
+        passed = max_sim < 0.90
+        return {
+            "passed": passed,
+            "score": 1.0 - max_sim,
+            "details": (
+                f"Max similarity: {max_sim:.3f} with {most_similar}"
+                + ("" if passed else " — too similar, needs differentiation")
+            ),
+        }
+
+    def check_references(self, draft_id: int) -> dict:
+        """Extract RFC/draft refs via regex, cross-check against draft_refs table."""
+        gd = self.db.get_generated_draft(draft_id)
+        if not gd:
+            return {"passed": False, "score": 0.0, "details": "Draft not found"}
+
+        full_text = gd.get("full_text", "")
+        if not full_text:
+            return {"passed": False, "score": 0.0, "details": "No full text"}
+
+        # Extract references from generated text
+        rfc_pattern = re.compile(r'\[?RFC\s*(\d{3,5})\]?', re.IGNORECASE)
+        draft_pattern = re.compile(r'(draft-[a-z0-9-]+)', re.IGNORECASE)
+
+        found_rfcs = set(rfc_pattern.findall(full_text))
+        found_drafts = set(draft_pattern.findall(full_text))
+
+        total_refs = len(found_rfcs) + len(found_drafts)
+
+        # Cross-check: how many of these RFCs are actually in our DB?
+        known_rfcs = set()
+        for ref_id in found_rfcs:
+            drafts = self.db.drafts_referencing("rfc", ref_id)
+            if drafts:
+                known_rfcs.add(ref_id)
+
+        # Cross-check: how many referenced drafts exist in our DB?
+        known_drafts = set()
+        for dname in found_drafts:
+            if self.db.get_draft(dname):
+                known_drafts.add(dname)
+
+        verified = len(known_rfcs) + len(known_drafts)
+        score = verified / total_refs if total_refs > 0 else 0.0
+
+        passed = total_refs >= 3 and score >= 0.3
+        return {
+            "passed": passed,
+            "score": score,
+            "details": (
+                f"{total_refs} refs found ({len(found_rfcs)} RFCs, {len(found_drafts)} drafts), "
+                f"{verified} verified in DB ({score:.0%})"
+            ),
+        }
+
+    def check_format(self, draft_id: int) -> dict:
+        """Check line length <= 72, required sections present, no markdown leaked."""
+        gd = self.db.get_generated_draft(draft_id)
+        if not gd:
+            return {"passed": False, "score": 0.0, "details": "Draft not found"}
+
+        full_text = gd.get("full_text", "")
+        if not full_text:
+            return {"passed": False, "score": 0.0, "details": "No full text"}
+
+        issues = []
+
+        # Check line length
+        lines = full_text.split("\n")
+        long_lines = [i + 1 for i, line in enumerate(lines) if len(line) > 72]
+        if long_lines:
+            issues.append(f"{len(long_lines)} lines exceed 72 chars")
+
+        # Check required sections
+        text_lower = full_text.lower()
+        for section in REQUIRED_SECTIONS:
+            if section not in text_lower:
+                issues.append(f"Missing required section: {section}")
+
+        # Check for leaked markdown
+        markdown_patterns = [
+            (r'^#{1,3}\s', "markdown headers (# )"),
+            (r'\*\*[^*]+\*\*', "bold markdown (**text**)"),
+            (r'```', "code fences (```)"),
+            (r'\[([^\]]+)\]\(http', "markdown links"),
+        ]
+        for pattern, desc in markdown_patterns:
+            if re.search(pattern, full_text, re.MULTILINE):
+                issues.append(f"Leaked markdown: {desc}")
+
+        if not issues:
+            return {"passed": True, "score": 1.0, "details": "All format checks pass"}
+
+        score = max(0.0, 1.0 - len(issues) * 0.25)
+        return {
+            "passed": len(issues) <= 1,  # Allow one minor issue
+            "score": score,
+            "details": "; ".join(issues),
+        }
+
+    def check_self_rating(self, draft_id: int) -> dict:
+        """Feed through existing rate_draft() pipeline. Score on same 1-5 scale."""
+        gd = self.db.get_generated_draft(draft_id)
+        if not gd:
+            return {"passed": False, "score": 0.0, "details": "Draft not found"}
+
+        # Create a temporary prompt matching the analyzer's rating format
+        title = gd.get("title", "")
+        abstract = gd.get("abstract", "")
+        draft_name = gd.get("draft_name", "")
+
+        from ..analyzer import RATE_PROMPT_COMPACT, CATEGORIES_SHORT, _prompt_hash
+
+        prompt = RATE_PROMPT_COMPACT.format(
+            name=draft_name,
+            title=title,
+            time=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
+            pages="?",
+            abstract=abstract[:2000],
+            categories=", ".join(CATEGORIES_SHORT),
+        )
+
+        phash = _prompt_hash("self-rate-" + prompt)
+        cache_key = f"_selfrate_{draft_id}_"
+
+        # Check cache
+        cached = self.db.get_cached_response(cache_key, phash)
+        if cached:
+            try:
+                data = json.loads(cached)
+                return self._parse_self_rating(data, draft_id)
+            except (json.JSONDecodeError, KeyError):
+                pass
+
+        try:
+            text, in_tok, out_tok = self.analyzer._call_claude(prompt, max_tokens=512, cheap=True)
+            text = self.analyzer._extract_json(text)
+            data = json.loads(text)
+
+            self.db.cache_response(
+                cache_key, phash,
+                self.config.claude_model_cheap,
+                prompt, text, in_tok, out_tok,
+            )
+
+            return self._parse_self_rating(data, draft_id)
+        except Exception as e:
+            return {"passed": False, "score": 0.0,
+                    "details": f"Self-rating failed: {e}"}
+
+    def _parse_self_rating(self, data: dict, draft_id: int) -> dict:
+        """Parse self-rating result and update the generated draft."""
+        novelty = int(data.get("n", data.get("novelty", 3)))
+        maturity = int(data.get("m", data.get("maturity", 3)))
+        relevance = int(data.get("r", data.get("relevance", 3)))
+        overlap = int(data.get("o", data.get("overlap", 3)))
+        momentum = int(data.get("mo", data.get("momentum", 3)))
+
+        composite = (
+            novelty * 0.30
+            + relevance * 0.25
+            + maturity * 0.20
+            + momentum * 0.15
+            + (6 - overlap) * 0.10
+        )
+
+        # Store rating on the generated draft
+        gd = self.db.get_generated_draft(draft_id)
+        if gd:
+            self.db.conn.execute(
+                "UPDATE generated_drafts SET rating_json = ?, quality_score = ? WHERE id = ?",
+                (json.dumps(data), composite, draft_id),
+            )
+            self.db.conn.commit()
+
+        passed = composite >= 2.5
+        return {
+            "passed": passed,
+            "score": composite / 5.0,
+            "details": (
+                f"Composite: {composite:.1f}/5 "
+                f"(N:{novelty} M:{maturity} O:{overlap} Mo:{momentum} R:{relevance})"
+            ),
+        }
--- a/src/ietf_analyzer/reports.py
+++ b/src/ietf_analyzer/reports.py
--- a/src/ietf_analyzer/sources/init.py
+++ b/src/ietf_analyzer/sources/init.py
@@ -0,0 +1,14 @@
+"""Multi-source document fetcher registry."""
+
+from .base import SourceDocument, SourceFetcher
+from .ietf import IETFFetcher
+from .w3c import W3CFetcher
+
+FETCHERS = {"ietf": IETFFetcher, "w3c": W3CFetcher}
+
+
+def get_fetcher(source_name: str, config=None):
+    cls = FETCHERS.get(source_name)
+    if cls is None:
+        raise ValueError(f"Unknown source: {source_name}")
+    return cls(config)
--- a/src/ietf_analyzer/sources/base.py
+++ b/src/ietf_analyzer/sources/base.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Protocol
+
+
+@dataclass
+class SourceDocument:
+    """Generic document from any standards body."""
+
+    name: str  # Unique identifier (e.g. "draft-foo-bar", "webnn-api")
+    title: str
+    abstract: str
+    source: str  # "ietf", "w3c", etc.
+    source_id: str = ""  # Body-specific ID
+    source_url: str = ""  # Canonical URL
+    full_text: str | None = None
+    time: str = ""  # ISO date
+    doc_status: str = ""  # "active", "published", "expired", etc.
+    extra: dict = field(default_factory=dict)  # Body-specific metadata
+
+
+class SourceFetcher(Protocol):
+    """Protocol for standards body fetchers."""
+
+    def search(
+        self, keywords: list[str], since: str | None = None
+    ) -> list[SourceDocument]: ...
+
+    def download_text(self, doc: SourceDocument) -> str | None: ...
+
+    def close(self) -> None: ...
--- a/src/ietf_analyzer/sources/ietf.py
+++ b/src/ietf_analyzer/sources/ietf.py
@@ -0,0 +1,82 @@
+"""IETF Datatracker adapter — delegates to existing Fetcher."""
+
+from __future__ import annotations
+
+from ..config import Config
+from ..fetcher import Fetcher
+from ..models import Draft
+from .base import SourceDocument
+
+
+class IETFFetcher:
+    """IETF Datatracker adapter wrapping the existing Fetcher class."""
+
+    def __init__(self, config: Config | None = None):
+        self.config = config or Config.load()
+        self._fetcher = Fetcher(self.config)
+
+    def search(
+        self, keywords: list[str], since: str | None = None
+    ) -> list[SourceDocument]:
+        """Search Datatracker, convert Draft -> SourceDocument."""
+        drafts = self._fetcher.search_drafts(keywords=keywords, since=since)
+        return [self._draft_to_doc(d) for d in drafts]
+
+    def download_text(self, doc: SourceDocument) -> str | None:
+        """Download full text for a SourceDocument."""
+        draft = self._doc_to_draft(doc)
+        return self._fetcher.download_full_text(draft)
+
+    def close(self) -> None:
+        self._fetcher.close()
+
+    @staticmethod
+    def _draft_to_doc(draft: Draft) -> SourceDocument:
+        return SourceDocument(
+            name=draft.name,
+            title=draft.title,
+            abstract=draft.abstract,
+            source="ietf",
+            source_id=str(draft.dt_id) if draft.dt_id else "",
+            source_url=draft.datatracker_url,
+            full_text=draft.full_text,
+            time=draft.time or "",
+            doc_status="active",
+            extra={
+                "rev": draft.rev,
+                "pages": draft.pages,
+                "words": draft.words,
+                "group": draft.group,
+                "group_uri": draft.group_uri,
+                "expires": draft.expires,
+                "ad": draft.ad,
+                "shepherd": draft.shepherd,
+                "states": draft.states,
+                "fetched_at": draft.fetched_at,
+            },
+        )
+
+    @staticmethod
+    def _doc_to_draft(doc: SourceDocument) -> Draft:
+        extra = doc.extra or {}
+        return Draft(
+            name=doc.name,
+            rev=extra.get("rev", "00"),
+            title=doc.title,
+            abstract=doc.abstract,
+            time=doc.time,
+            dt_id=int(doc.source_id) if doc.source_id else None,
+            pages=extra.get("pages"),
+            words=extra.get("words"),
+            group=extra.get("group"),
+            group_uri=extra.get("group_uri"),
+            expires=extra.get("expires"),
+            ad=extra.get("ad"),
+            shepherd=extra.get("shepherd"),
+            states=extra.get("states", []),
+            full_text=doc.full_text,
+            fetched_at=extra.get("fetched_at"),
+            source="ietf",
+            source_id=doc.source_id,
+            source_url=doc.source_url,
+        )
--- a/src/ietf_analyzer/sources/w3c.py
+++ b/src/ietf_analyzer/sources/w3c.py
@@ -0,0 +1,187 @@
+"""Fetch specs from W3C public API."""
+
+from __future__ import annotations
+
+import re
+import time as time_mod
+
+import httpx
+from rich.console import Console
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+)
+
+from ..config import Config
+from .base import SourceDocument
+
+W3C_API = "https://api.w3.org"
+
+console = Console()
+
+
+def _strip_html(html: str) -> str:
+    """Minimal HTML tag stripper — no heavy dependencies."""
+    text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL)
+    text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL)
+    text = re.sub(r"<[^>]+>", " ", text)
+    text = re.sub(r"&nbsp;", " ", text)
+    text = re.sub(r"&amp;", "&", text)
+    text = re.sub(r"&lt;", "<", text)
+    text = re.sub(r"&gt;", ">", text)
+    text = re.sub(r"&#\d+;", "", text)
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+
+
+class W3CFetcher:
+    """Fetch specs from the W3C public API (no auth needed)."""
+
+    def __init__(self, config: Config | None = None):
+        self.config = config or Config.load()
+        self.client = httpx.Client(timeout=30, follow_redirects=True)
+        self.groups = self.config.w3c_groups
+
+    def search(
+        self, keywords: list[str], since: str | None = None
+    ) -> list[SourceDocument]:
+        """Fetch specs from AI-relevant W3C groups, filtered by keywords."""
+        seen: dict[str, SourceDocument] = {}
+        kw_lower = [k.lower() for k in keywords]
+
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Fetching W3C specs...", total=len(self.groups))
+
+            for group in self.groups:
+                progress.update(task, description=f"W3C group: {group}")
+                specs = self._fetch_group_specs(group)
+                for spec in specs:
+                    # Client-side keyword filter on title + description
+                    haystack = (spec.title + " " + spec.abstract).lower()
+                    if any(kw in haystack for kw in kw_lower):
+                        if since and spec.time and spec.time < since:
+                            continue
+                        if spec.name not in seen:
+                            seen[spec.name] = spec
+                progress.advance(task)
+
+        console.print(f"Found [bold green]{len(seen)}[/] W3C specs matching keywords")
+        return list(seen.values())
+
+    def _fetch_group_specs(self, group_shortname: str) -> list[SourceDocument]:
+        """Fetch all specifications for a W3C group."""
+        url = f"{W3C_API}/groups/{group_shortname}/specifications"
+        specs: list[SourceDocument] = []
+
+        try:
+            page = 1
+            while True:
+                resp = self.client.get(
+                    url,
+                    params={"format": "json", "page": page},
+                    headers={"Accept": "application/json"},
+                )
+                resp.raise_for_status()
+                data = resp.json()
+
+                spec_list = data if isinstance(data, list) else data.get("_links", {}).get("specifications", [])
+                if not spec_list:
+                    # Try alternate response shape
+                    spec_list = data.get("specifications", [])
+                if not spec_list:
+                    break
+
+                for item in spec_list:
+                    href = item.get("href", "")
+                    shortname = item.get("shortname", "")
+                    title = item.get("title", shortname)
+
+                    if not shortname and href:
+                        # Extract shortname from href like /specifications/webnn
+                        parts = href.rstrip("/").split("/")
+                        shortname = parts[-1] if parts else ""
+
+                    if not shortname:
+                        continue
+
+                    # Fetch spec detail for abstract/description
+                    detail = self._fetch_spec_detail(shortname)
+                    abstract = detail.get("description", title)
+                    spec_url = detail.get("editor-draft", detail.get("url", f"https://www.w3.org/TR/{shortname}/"))
+                    status = detail.get("status", "")
+                    date = detail.get("date", "")
+
+                    specs.append(
+                        SourceDocument(
+                            name=f"w3c-{shortname}",
+                            title=title,
+                            abstract=abstract,
+                            source="w3c",
+                            source_id=shortname,
+                            source_url=spec_url,
+                            time=date,
+                            doc_status=status,
+                            extra={"group": group_shortname},
+                        )
+                    )
+                    time_mod.sleep(0.3)
+
+                # Check pagination
+                pages = data.get("pages", 1) if isinstance(data, dict) else 1
+                if page >= pages:
+                    break
+                page += 1
+                time_mod.sleep(0.3)
+
+        except httpx.HTTPError as e:
+            console.print(f"[yellow]W3C API error for {group_shortname}: {e}[/]")
+
+        return specs
+
+    def _fetch_spec_detail(self, shortname: str) -> dict:
+        """Fetch detail for a single spec."""
+        try:
+            resp = self.client.get(
+                f"{W3C_API}/specifications/{shortname}",
+                headers={"Accept": "application/json"},
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            return {
+                "description": data.get("description", ""),
+                "title": data.get("title", shortname),
+                "editor-draft": data.get("editor-draft", ""),
+                "url": data.get("_links", {}).get("latest-version", {}).get("href", ""),
+                "status": data.get("_links", {}).get("latest-version", {}).get("status", ""),
+                "date": data.get("_links", {}).get("latest-version", {}).get("date", ""),
+            }
+        except httpx.HTTPError:
+            return {}
+
+    def download_text(self, doc: SourceDocument) -> str | None:
+        """Fetch spec URL content and strip HTML to plain text."""
+        url = doc.source_url
+        if not url:
+            return None
+        try:
+            resp = self.client.get(url)
+            resp.raise_for_status()
+            content_type = resp.headers.get("content-type", "")
+            if "html" in content_type:
+                return _strip_html(resp.text)[:50000]
+            return resp.text[:50000]
+        except httpx.HTTPError as e:
+            console.print(f"[dim]Could not download text for {doc.name}: {e}[/]")
+            return None
+
+    def close(self) -> None:
+        self.client.close()
--- a/src/ietf_analyzer/visualize.py
+++ b/src/ietf_analyzer/visualize.py
@@ -449,9 +449,10 @@ class Visualizer:
        if len(G.nodes) == 0:
            raise RuntimeError(f"No edges with min_shared={min_shared}.")

-        # Get affiliations for coloring
+        # Get affiliations for coloring (normalized)
+        from .orgs import normalize_org
        top_authors = self.db.top_authors(limit=200)
-        author_aff = {name: aff for name, aff, _, _ in top_authors}
+        author_aff = {name: normalize_org(aff) for name, aff, _, _ in top_authors}

        # Node sizing by degree
        degrees = dict(G.degree())
@@ -650,7 +651,8 @@ class Visualizer:
        """
        import plotly.express as px

-        orgs = self.db.top_orgs(limit=20)
+        from .orgs import top_orgs_normalized
+        orgs = top_orgs_normalized(self.db, limit=20)
        if not orgs:
            raise RuntimeError("No author data. Run `ietf authors --fetch` first.")