IETF Draft Analyzer v0.1.0 — track, categorize, and rate AI/agent drafts

Python CLI tool that fetches AI/agent-related Internet-Drafts from the IETF Datatracker, rates them using Claude, generates embeddings via Ollama for similarity/clustering, and produces markdown reports. Features: - Fetch drafts by keyword from Datatracker API with full text download - Batch analysis with Claude (token-optimized, responses cached in SQLite) - Embedding-based similarity search and overlap cluster detection - Reports: overview, landscape by category, overlap clusters, weekly digest - SQLite with FTS5 for full-text search across 260 tracked drafts Initial analysis of 260 drafts reveals OAuth agent auth (13 drafts) and agent gateway/collaboration (10 drafts) as the most crowded clusters, while AI safety/alignment is underserved with the highest quality scores. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 00:36:45 +01:00
commit 6771a4c235
17 changed files with 2823 additions and 0 deletions
--- a/src/ietf_analyzer/init.py
+++ b/src/ietf_analyzer/init.py
@@ -0,0 +1 @@
+"""IETF Draft Analyzer — Track, categorize, and rate AI/agent-related Internet-Drafts."""
--- a/src/ietf_analyzer/analyzer.py
+++ b/src/ietf_analyzer/analyzer.py
@@ -0,0 +1,276 @@
+"""Claude-based analysis — summarization, rating, categorization, overlap detection."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from datetime import datetime, timezone
+
+import anthropic
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from .config import Config
+from .db import Database
+from .models import Draft, Rating
+
+console = Console()
+
+CATEGORIES_SHORT = [
+    "A2A protocols",       # Agent-to-agent communication protocols
+    "AI safety/alignment", # AI safety / guardrails / alignment
+    "ML traffic mgmt",     # ML-based traffic management / optimization
+    "Autonomous netops",   # Autonomous network operations
+    "Agent identity/auth", # Identity / authentication for AI agents
+    "Data formats/interop",# Data formats / semantics for AI interop
+    "Policy/governance",   # Policy / governance / ethical frameworks
+    "Model serving/inference", # AI model serving / inference protocols
+    "Agent discovery/reg", # Agent discovery / registration
+    "Human-agent interaction",
+    "Other AI/agent",
+]
+
+# Compact prompt — abstract only, saves ~10x tokens vs full-text
+RATE_PROMPT_COMPACT = """\
+Rate this IETF draft. JSON only.
+
+{name} | {title} | {time} | {pages}pg
+Abstract: {abstract}
+
+Return JSON: {{"s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap note","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
+
+Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
+Categories: {categories}
+JSON only, no fences."""
+
+# Batch prompt — rate multiple drafts in one call
+BATCH_PROMPT = """\
+Rate each IETF draft below. Return a JSON array with one object per draft, in order.
+
+{drafts_block}
+
+Per-draft JSON: {{"name":"draft-name","s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap with known drafts","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
+
+Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
+Categories: {categories}
+Return ONLY a JSON array, no fences."""
+
+COMPARE_PROMPT = """\
+Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
+
+{drafts_section}
+
+Be specific about concrete mechanisms and design choices."""
+
+
+def _prompt_hash(text: str) -> str:
+    return hashlib.sha256(text.encode()).hexdigest()[:16]
+
+
+class Analyzer:
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        try:
+            self.client = anthropic.Anthropic()
+        except Exception:
+            console.print(
+                "[red bold]No Anthropic API key found.[/]\n"
+                "Set ANTHROPIC_API_KEY environment variable or run:\n"
+                "  export ANTHROPIC_API_KEY=sk-ant-..."
+            )
+            raise SystemExit(1)
+
+    def _parse_rating(self, draft_name: str, data: dict) -> Rating:
+        """Parse a rating from compact JSON keys."""
+        return Rating(
+            draft_name=draft_name,
+            novelty=int(data.get("n", data.get("novelty", 3))),
+            maturity=int(data.get("m", data.get("maturity", 3))),
+            overlap=int(data.get("o", data.get("overlap", 3))),
+            momentum=int(data.get("mo", data.get("momentum", 3))),
+            relevance=int(data.get("r", data.get("relevance", 3))),
+            summary=data.get("s", data.get("summary", "")),
+            novelty_note=data.get("nn", data.get("novelty_note", "")),
+            maturity_note=data.get("mn", data.get("maturity_note", "")),
+            overlap_note=data.get("on", data.get("overlap_note", "")),
+            momentum_note=data.get("mon", data.get("momentum_note", "")),
+            relevance_note=data.get("rn", data.get("relevance_note", "")),
+            categories=data.get("c", data.get("categories", [])),
+            rated_at=datetime.now(timezone.utc).isoformat(),
+        )
+
+    def _call_claude(self, prompt: str, max_tokens: int = 512) -> tuple[str, int, int]:
+        """Call Claude and return (text, input_tokens, output_tokens)."""
+        resp = self.client.messages.create(
+            model=self.config.claude_model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        text = resp.content[0].text.strip()
+        return text, resp.usage.input_tokens, resp.usage.output_tokens
+
+    def _extract_json(self, text: str) -> str:
+        """Strip markdown fences if present."""
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1]
+            if text.endswith("```"):
+                text = text[:-3]
+        return text.strip()
+
+    def rate_draft(self, draft_name: str, use_cache: bool = True) -> Rating | None:
+        """Analyze and rate a single draft."""
+        draft = self.db.get_draft(draft_name)
+        if draft is None:
+            console.print(f"[red]Draft not found: {draft_name}[/]")
+            return None
+
+        prompt = RATE_PROMPT_COMPACT.format(
+            name=draft.name, title=draft.title, time=draft.date,
+            pages=draft.pages or "?",
+            abstract=draft.abstract[:2000],
+            categories=", ".join(CATEGORIES_SHORT),
+        )
+        phash = _prompt_hash(prompt)
+
+        # Check cache
+        if use_cache:
+            cached = self.db.get_cached_response(draft_name, phash)
+            if cached:
+                try:
+                    data = json.loads(cached)
+                    rating = self._parse_rating(draft_name, data)
+                    self.db.upsert_rating(rating)
+                    draft.categories = rating.categories
+                    self.db.upsert_draft(draft)
+                    return rating
+                except (json.JSONDecodeError, KeyError):
+                    pass  # Re-analyze if cache is corrupt
+
+        try:
+            text, in_tok, out_tok = self._call_claude(prompt, max_tokens=512)
+            text = self._extract_json(text)
+            data = json.loads(text)
+
+            # Cache the raw response
+            self.db.cache_response(
+                draft_name, phash, self.config.claude_model,
+                prompt, text, in_tok, out_tok,
+            )
+        except (json.JSONDecodeError, anthropic.APIError, IndexError, KeyError) as e:
+            console.print(f"[red]Failed {draft_name}: {e}[/]")
+            return None
+
+        rating = self._parse_rating(draft_name, data)
+        self.db.upsert_rating(rating)
+        draft.categories = rating.categories
+        self.db.upsert_draft(draft)
+        return rating
+
+    def rate_batch(self, drafts: list[Draft], batch_size: int = 5) -> int:
+        """Rate multiple drafts in batched API calls to save tokens."""
+        count = 0
+        for i in range(0, len(drafts), batch_size):
+            batch = drafts[i:i + batch_size]
+
+            # Build batch prompt
+            drafts_block = ""
+            for d in batch:
+                drafts_block += f"\n---\n{d.name} | {d.title} | {d.date} | {d.pages or '?'}pg\nAbstract: {d.abstract[:1500]}\n"
+
+            prompt = BATCH_PROMPT.format(
+                drafts_block=drafts_block,
+                categories=", ".join(CATEGORIES_SHORT),
+            )
+            phash = _prompt_hash(prompt)
+
+            try:
+                text, in_tok, out_tok = self._call_claude(
+                    prompt, max_tokens=400 * len(batch)
+                )
+                text = self._extract_json(text)
+                results = json.loads(text)
+                if not isinstance(results, list):
+                    results = [results]
+
+                for j, data in enumerate(results):
+                    draft_name = data.get("name", batch[j].name if j < len(batch) else None)
+                    if not draft_name:
+                        continue
+                    # Cache each result individually
+                    self.db.cache_response(
+                        draft_name, _prompt_hash(f"batch-{phash}-{draft_name}"),
+                        self.config.claude_model, f"batch[{i}]", json.dumps(data),
+                        in_tok // len(results), out_tok // len(results),
+                    )
+                    rating = self._parse_rating(draft_name, data)
+                    self.db.upsert_rating(rating)
+                    draft = self.db.get_draft(draft_name)
+                    if draft:
+                        draft.categories = rating.categories
+                        self.db.upsert_draft(draft)
+                    count += 1
+            except (json.JSONDecodeError, anthropic.APIError) as e:
+                console.print(f"[red]Batch {i//batch_size+1} failed: {e}[/]")
+                # Fallback: rate individually
+                for d in batch:
+                    r = self.rate_draft(d.name)
+                    if r:
+                        count += 1
+
+        return count
+
+    def rate_all_unrated(self, limit: int = 300, batch_size: int = 5) -> int:
+        """Rate all drafts that haven't been rated yet, using batching."""
+        unrated = self.db.unrated_drafts(limit=limit)
+        if not unrated:
+            console.print("All drafts already rated.")
+            return 0
+
+        console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Analyzing...", total=len(unrated))
+            for i in range(0, len(unrated), batch_size):
+                batch = unrated[i:i + batch_size]
+                names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
+                progress.update(task, description=f"Batch: {names}")
+                n = self.rate_batch(batch, batch_size=batch_size)
+                count += n
+                progress.advance(task, advance=len(batch))
+
+        in_tok, out_tok = self.db.total_tokens_used()
+        console.print(
+            f"Rated [bold green]{count}[/] drafts "
+            f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
+        )
+        return count
+
+    def compare_drafts(self, draft_names: list[str]) -> str:
+        """Compare multiple drafts and return analysis text."""
+        parts = []
+        for name in draft_names:
+            draft = self.db.get_draft(name)
+            if draft is None:
+                console.print(f"[yellow]Skipping unknown draft: {name}[/]")
+                continue
+            parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")
+
+        if len(parts) < 2:
+            return "Need at least 2 valid drafts to compare."
+
+        prompt = COMPARE_PROMPT.format(
+            drafts_section="\n\n---\n\n".join(parts)
+        )
+
+        try:
+            text, _, _ = self._call_claude(prompt, max_tokens=2048)
+            return text
+        except anthropic.APIError as e:
+            return f"Error: {e}"
--- a/src/ietf_analyzer/cli.py
+++ b/src/ietf_analyzer/cli.py
@@ -0,0 +1,405 @@
+"""CLI entry point — all user-facing commands."""
+
+from __future__ import annotations
+
+import click
+from rich.console import Console
+from rich.table import Table
+
+from .config import Config
+from .db import Database
+
+console = Console()
+
+
+def _get_config() -> Config:
+    cfg = Config.load()
+    return cfg
+
+
+@click.group()
+@click.version_option(version="0.1.0")
+def main():
+    """IETF Draft Analyzer — track, categorize, and rate AI/agent Internet-Drafts."""
+    pass
+
+
+# ── fetch ────────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.option("--keywords", "-k", multiple=True, help="Extra keywords to search for")
+@click.option("--since", "-s", help="Only fetch drafts newer than this date (YYYY-MM-DD)")
+@click.option("--download-text/--no-download-text", default=True, help="Download full text of drafts")
+def fetch(keywords: tuple[str, ...], since: str | None, download_text: bool):
+    """Fetch AI/agent drafts from IETF Datatracker."""
+    from .fetcher import Fetcher
+
+    cfg = _get_config()
+    db = Database(cfg)
+    fetcher = Fetcher(cfg)
+
+    kw_list = list(cfg.search_keywords)
+    if keywords:
+        kw_list.extend(keywords)
+
+    try:
+        drafts = fetcher.search_drafts(keywords=kw_list, since=since)
+        for draft in drafts:
+            db.upsert_draft(draft)
+        console.print(f"Stored [bold green]{len(drafts)}[/] drafts in database")
+
+        if download_text:
+            missing = db.drafts_without_text()
+            if missing:
+                console.print(f"Downloading text for [bold]{len(missing)}[/] drafts...")
+                texts = fetcher.download_texts(missing)
+                for name, text in texts.items():
+                    draft = db.get_draft(name)
+                    if draft:
+                        draft.full_text = text
+                        db.upsert_draft(draft)
+    finally:
+        fetcher.close()
+        db.close()
+
+
+# ── list ─────────────────────────────────────────────────────────────────────
+
+
+@main.command("list")
+@click.option("--limit", "-n", default=30, help="Number of drafts to show")
+@click.option("--sort", "-s", default="time DESC", help="Sort order (e.g. 'time DESC', 'name ASC')")
+def list_drafts(limit: int, sort: str):
+    """List tracked drafts."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        drafts = db.list_drafts(limit=limit, order_by=sort)
+        total = db.count_drafts()
+
+        table = Table(title=f"Tracked Drafts ({total} total, showing {len(drafts)})")
+        table.add_column("Date", style="dim", width=10)
+        table.add_column("Name", style="cyan", max_width=55)
+        table.add_column("Title", max_width=50)
+        table.add_column("Pg", justify="right", width=4)
+        table.add_column("Text", justify="center", width=4)
+        table.add_column("Rated", justify="center", width=5)
+
+        for d in drafts:
+            has_text = "\u2713" if d.full_text else ""
+            rated = "\u2713" if db.get_rating(d.name) else ""
+            table.add_row(d.date, d.name, d.title[:50], str(d.pages or ""), has_text, rated)
+
+        console.print(table)
+    finally:
+        db.close()
+
+
+# ── search ───────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.argument("query")
+@click.option("--limit", "-n", default=20, help="Max results")
+def search(query: str, limit: int):
+    """Full-text search across stored drafts."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        results = db.search_drafts(query, limit=limit)
+        if not results:
+            console.print(f"No results for [bold]{query}[/]")
+            return
+
+        table = Table(title=f"Search: {query} ({len(results)} results)")
+        table.add_column("Date", style="dim", width=10)
+        table.add_column("Name", style="cyan")
+        table.add_column("Title")
+
+        for d in results:
+            table.add_row(d.date, d.name, d.title[:60])
+
+        console.print(table)
+    finally:
+        db.close()
+
+
+# ── show ─────────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.argument("name")
+def show(name: str):
+    """Show detailed info for a draft."""
+    from .reports import Reporter
+
+    cfg = _get_config()
+    db = Database(cfg)
+    reporter = Reporter(cfg, db)
+    try:
+        draft = db.get_draft(name)
+        if draft is None:
+            console.print(f"[red]Draft not found: {name}[/]")
+            return
+
+        rating = db.get_rating(name)
+
+        console.print(f"\n[bold]{draft.title}[/]")
+        console.print(f"[dim]{draft.name}[/] rev {draft.rev}  |  {draft.date}  |  {draft.pages or '?'} pages")
+        console.print(f"Group: {draft.group or 'individual'}  |  {draft.datatracker_url}")
+        console.print(f"\n[italic]{draft.abstract}[/]\n")
+
+        if rating:
+            console.print("[bold]AI Assessment[/]")
+            console.print(f"  Score: [bold green]{rating.composite_score:.1f}[/]")
+            console.print(f"  Summary: {rating.summary}\n")
+
+            table = Table(show_header=True)
+            table.add_column("Dimension", width=12)
+            table.add_column("Score", justify="center", width=7)
+            table.add_column("Notes")
+            table.add_row("Novelty", f"{rating.novelty}/5", rating.novelty_note)
+            table.add_row("Maturity", f"{rating.maturity}/5", rating.maturity_note)
+            table.add_row("Overlap", f"{rating.overlap}/5", rating.overlap_note)
+            table.add_row("Momentum", f"{rating.momentum}/5", rating.momentum_note)
+            table.add_row("Relevance", f"{rating.relevance}/5", rating.relevance_note)
+            console.print(table)
+
+            if rating.categories:
+                console.print(f"\nCategories: {', '.join(rating.categories)}")
+        else:
+            console.print("[dim]Not yet rated — run: ietf analyze {name}[/]")
+
+        # Save detailed report too
+        path = reporter.draft_detail(name)
+        if path:
+            console.print(f"\n[dim]Report saved: {path}[/]")
+    finally:
+        db.close()
+
+
+# ── analyze ──────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.argument("name", required=False)
+@click.option("--all", "analyze_all", is_flag=True, help="Analyze all unrated drafts")
+@click.option("--limit", "-n", default=50, help="Max drafts to analyze (with --all)")
+def analyze(name: str | None, analyze_all: bool, limit: int):
+    """Analyze and rate drafts using Claude."""
+    from .analyzer import Analyzer
+
+    cfg = _get_config()
+    db = Database(cfg)
+    analyzer = Analyzer(cfg, db)
+
+    try:
+        if analyze_all:
+            count = analyzer.rate_all_unrated(limit=limit)
+            console.print(f"Analyzed [bold green]{count}[/] drafts")
+        elif name:
+            rating = analyzer.rate_draft(name)
+            if rating:
+                console.print(f"\n[bold green]Rating for {name}:[/]")
+                console.print(f"  Score: {rating.composite_score:.1f}")
+                console.print(f"  Summary: {rating.summary}")
+                console.print(f"  Novelty={rating.novelty} Maturity={rating.maturity} "
+                              f"Overlap={rating.overlap} Momentum={rating.momentum} "
+                              f"Relevance={rating.relevance}")
+            else:
+                console.print("[red]Analysis failed[/]")
+        else:
+            console.print("Provide a draft name or use --all")
+    finally:
+        db.close()
+
+
+# ── compare ──────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.argument("names", nargs=-1, required=True)
+def compare(names: tuple[str, ...]):
+    """Compare multiple drafts for overlap and unique contributions."""
+    from .analyzer import Analyzer
+
+    cfg = _get_config()
+    db = Database(cfg)
+    analyzer = Analyzer(cfg, db)
+
+    try:
+        result = analyzer.compare_drafts(list(names))
+        console.print(result)
+    finally:
+        db.close()
+
+
+# ── embed ────────────────────────────────────────────────────────────────────
+
+
+@main.command()
+def embed():
+    """Generate embeddings for all drafts (requires Ollama)."""
+    from .embeddings import Embedder
+
+    cfg = _get_config()
+    db = Database(cfg)
+    embedder = Embedder(cfg, db)
+
+    try:
+        count = embedder.embed_all_missing()
+        console.print(f"Embedded [bold green]{count}[/] drafts")
+    finally:
+        db.close()
+
+
+# ── similar ──────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.argument("name")
+@click.option("--top", "-n", default=10, help="Number of similar drafts to show")
+def similar(name: str, top: int):
+    """Find drafts most similar to a given draft."""
+    from .embeddings import Embedder
+
+    cfg = _get_config()
+    db = Database(cfg)
+    embedder = Embedder(cfg, db)
+
+    try:
+        results = embedder.find_similar(name, top_n=top)
+        if not results:
+            console.print(f"[yellow]No similar drafts found (need embeddings — run `ietf embed` first)[/]")
+            return
+
+        table = Table(title=f"Drafts similar to {name}")
+        table.add_column("Similarity", justify="right", width=10)
+        table.add_column("Draft", style="cyan")
+        table.add_column("Title")
+
+        for sim_name, score in results:
+            draft = db.get_draft(sim_name)
+            title = draft.title[:60] if draft else ""
+            table.add_row(f"{score:.3f}", sim_name, title)
+
+        console.print(table)
+    finally:
+        db.close()
+
+
+# ── clusters ─────────────────────────────────────────────────────────────────
+
+
+@main.command()
+@click.option("--threshold", "-t", default=0.85, help="Similarity threshold for clustering")
+def clusters(threshold: float):
+    """Find clusters of highly similar (potentially overlapping) drafts."""
+    from .embeddings import Embedder
+
+    cfg = _get_config()
+    db = Database(cfg)
+    embedder = Embedder(cfg, db)
+
+    try:
+        cluster_list = embedder.find_clusters(threshold=threshold)
+        if not cluster_list:
+            console.print("No clusters found at this threshold.")
+            return
+
+        console.print(f"\n[bold]Found {len(cluster_list)} clusters[/] (threshold={threshold})\n")
+        for i, cluster in enumerate(cluster_list, 1):
+            console.print(f"[bold cyan]Cluster {i}[/] ({len(cluster)} drafts):")
+            for name in cluster:
+                draft = db.get_draft(name)
+                title = draft.title[:60] if draft else ""
+                console.print(f"  - {name}  [dim]{title}[/]")
+            console.print()
+    finally:
+        db.close()
+
+
+# ── report ───────────────────────────────────────────────────────────────────
+
+
+@main.group()
+def report():
+    """Generate markdown reports."""
+    pass
+
+
+@report.command()
+def overview():
+    """Overview table of all rated drafts."""
+    from .reports import Reporter
+    cfg = _get_config()
+    db = Database(cfg)
+    reporter = Reporter(cfg, db)
+    try:
+        path = reporter.overview()
+        console.print(f"Report saved: [bold]{path}[/]")
+    finally:
+        db.close()
+
+
+@report.command()
+def landscape():
+    """Category-grouped landscape view."""
+    from .reports import Reporter
+    cfg = _get_config()
+    db = Database(cfg)
+    reporter = Reporter(cfg, db)
+    try:
+        path = reporter.landscape()
+        console.print(f"Report saved: [bold]{path}[/]")
+    finally:
+        db.close()
+
+
+@report.command()
+@click.option("--days", "-d", default=7, help="Look back N days")
+def digest(days: int):
+    """What's new digest."""
+    from .reports import Reporter
+    cfg = _get_config()
+    db = Database(cfg)
+    reporter = Reporter(cfg, db)
+    try:
+        path = reporter.digest(since_days=days)
+        console.print(f"Report saved: [bold]{path}[/]")
+    finally:
+        db.close()
+
+
+# ── config ───────────────────────────────────────────────────────────────────
+
+
+@main.command("config")
+@click.option("--set", "set_key", nargs=2, help="Set a config key (e.g. --set claude_model claude-opus-4-20250514)")
+def config_cmd(set_key: tuple[str, str] | None):
+    """Show or modify configuration."""
+    from dataclasses import asdict
+    cfg = _get_config()
+
+    if set_key:
+        key, value = set_key
+        if hasattr(cfg, key):
+            # Coerce types
+            current = getattr(cfg, key)
+            if isinstance(current, float):
+                value = float(value)
+            elif isinstance(current, int):
+                value = int(value)
+            elif isinstance(current, list):
+                import json
+                value = json.loads(value)
+            setattr(cfg, key, value)
+            cfg.save()
+            console.print(f"Set [bold]{key}[/] = {value}")
+        else:
+            console.print(f"[red]Unknown config key: {key}[/]")
+    else:
+        from dataclasses import asdict
+        for key, val in asdict(cfg).items():
+            console.print(f"  [bold]{key}:[/] {val}")
--- a/src/ietf_analyzer/config.py
+++ b/src/ietf_analyzer/config.py
@@ -0,0 +1,44 @@
+"""Configuration management."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field, asdict
+from pathlib import Path
+
+DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
+CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
+
+DEFAULT_KEYWORDS = [
+    "agent",
+    "ai-agent",
+    "llm",
+    "autonomous",
+    "machine-learning",
+    "artificial-intelligence",
+]
+
+
+@dataclass
+class Config:
+    data_dir: str = str(DEFAULT_DATA_DIR)
+    db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
+    ollama_url: str = "http://localhost:11434"
+    ollama_embed_model: str = "nomic-embed-text"
+    claude_model: str = "claude-sonnet-4-20250514"
+    search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))
+    # Only fetch drafts newer than this (ISO date string)
+    fetch_since: str = "2024-01-01"
+    # Polite delay between API requests (seconds)
+    fetch_delay: float = 0.5
+
+    def save(self) -> None:
+        Path(self.data_dir).mkdir(parents=True, exist_ok=True)
+        CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2))
+
+    @classmethod
+    def load(cls) -> Config:
+        if CONFIG_FILE.exists():
+            data = json.loads(CONFIG_FILE.read_text())
+            return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
+        return cls()
--- a/src/ietf_analyzer/db.py
+++ b/src/ietf_analyzer/db.py
@@ -0,0 +1,375 @@
+"""SQLite database layer with FTS5 full-text search."""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+from datetime import datetime, timezone
+from pathlib import Path
+
+import numpy as np
+
+from .config import Config
+from .models import Draft, Rating
+
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS drafts (
+    name TEXT PRIMARY KEY,
+    rev TEXT NOT NULL,
+    title TEXT NOT NULL,
+    abstract TEXT NOT NULL DEFAULT '',
+    time TEXT,
+    dt_id INTEGER,
+    pages INTEGER,
+    words INTEGER,
+    "group" TEXT,
+    group_uri TEXT,
+    expires TEXT,
+    ad TEXT,
+    shepherd TEXT,
+    states TEXT DEFAULT '[]',       -- JSON array
+    full_text TEXT,
+    categories TEXT DEFAULT '[]',   -- JSON array
+    tags TEXT DEFAULT '[]',         -- JSON array
+    fetched_at TEXT
+);
+
+CREATE TABLE IF NOT EXISTS ratings (
+    draft_name TEXT PRIMARY KEY REFERENCES drafts(name),
+    novelty INTEGER NOT NULL,
+    maturity INTEGER NOT NULL,
+    overlap INTEGER NOT NULL,
+    momentum INTEGER NOT NULL,
+    relevance INTEGER NOT NULL,
+    summary TEXT NOT NULL DEFAULT '',
+    novelty_note TEXT DEFAULT '',
+    maturity_note TEXT DEFAULT '',
+    overlap_note TEXT DEFAULT '',
+    momentum_note TEXT DEFAULT '',
+    relevance_note TEXT DEFAULT '',
+    categories TEXT DEFAULT '[]',   -- JSON array
+    rated_at TEXT
+);
+
+CREATE TABLE IF NOT EXISTS embeddings (
+    draft_name TEXT PRIMARY KEY REFERENCES drafts(name),
+    model TEXT NOT NULL,
+    vector BLOB NOT NULL,           -- numpy float32 array as bytes
+    created_at TEXT
+);
+
+CREATE TABLE IF NOT EXISTS llm_cache (
+    draft_name TEXT NOT NULL,
+    prompt_hash TEXT NOT NULL,
+    model TEXT NOT NULL,
+    request_json TEXT NOT NULL,      -- full prompt sent
+    response_json TEXT NOT NULL,     -- raw Claude response
+    input_tokens INTEGER,
+    output_tokens INTEGER,
+    created_at TEXT,
+    PRIMARY KEY (draft_name, prompt_hash)
+);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS drafts_fts USING fts5(
+    name, title, abstract, full_text,
+    content='drafts',
+    content_rowid='rowid'
+);
+
+-- Triggers to keep FTS index in sync
+CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
+    INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
+    VALUES (new.rowid, new.name, new.title, new.abstract, new.full_text);
+END;
+
+CREATE TRIGGER IF NOT EXISTS drafts_ad AFTER DELETE ON drafts BEGIN
+    INSERT INTO drafts_fts(drafts_fts, rowid, name, title, abstract, full_text)
+    VALUES ('delete', old.rowid, old.name, old.title, old.abstract, old.full_text);
+END;
+
+CREATE TRIGGER IF NOT EXISTS drafts_au AFTER UPDATE ON drafts BEGIN
+    INSERT INTO drafts_fts(drafts_fts, rowid, name, title, abstract, full_text)
+    VALUES ('delete', old.rowid, old.name, old.title, old.abstract, old.full_text);
+    INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
+    VALUES (new.rowid, new.name, new.title, new.abstract, new.full_text);
+END;
+"""
+
+
+class Database:
+    def __init__(self, config: Config | None = None):
+        self.config = config or Config.load()
+        self.db_path = self.config.db_path
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+        self._conn: sqlite3.Connection | None = None
+
+    @property
+    def conn(self) -> sqlite3.Connection:
+        if self._conn is None:
+            self._conn = sqlite3.connect(self.db_path)
+            self._conn.row_factory = sqlite3.Row
+            self._conn.execute("PRAGMA journal_mode=WAL")
+            self._conn.execute("PRAGMA foreign_keys=ON")
+            self._conn.executescript(SCHEMA)
+        return self._conn
+
+    def close(self) -> None:
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+
+    # --- Drafts ---
+
+    def upsert_draft(self, draft: Draft) -> None:
+        self.conn.execute(
+            """INSERT INTO drafts (name, rev, title, abstract, time, dt_id, pages, words,
+                "group", group_uri, expires, ad, shepherd, states, full_text, categories, tags, fetched_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(name) DO UPDATE SET
+                rev=excluded.rev, title=excluded.title, abstract=excluded.abstract,
+                time=excluded.time, dt_id=excluded.dt_id, pages=excluded.pages,
+                words=excluded.words, "group"=excluded."group", group_uri=excluded.group_uri,
+                expires=excluded.expires, ad=excluded.ad, shepherd=excluded.shepherd,
+                states=excluded.states,
+                full_text=COALESCE(excluded.full_text, full_text),
+                categories=excluded.categories, tags=excluded.tags,
+                fetched_at=excluded.fetched_at
+            """,
+            (
+                draft.name, draft.rev, draft.title, draft.abstract, draft.time,
+                draft.dt_id, draft.pages, draft.words, draft.group, draft.group_uri,
+                draft.expires, draft.ad, draft.shepherd,
+                json.dumps(draft.states), draft.full_text,
+                json.dumps(draft.categories), json.dumps(draft.tags),
+                draft.fetched_at or datetime.now(timezone.utc).isoformat(),
+            ),
+        )
+        self.conn.commit()
+
+    def get_draft(self, name: str) -> Draft | None:
+        row = self.conn.execute("SELECT * FROM drafts WHERE name = ?", (name,)).fetchone()
+        if row is None:
+            return None
+        return self._row_to_draft(row)
+
+    def list_drafts(
+        self,
+        limit: int = 100,
+        offset: int = 0,
+        order_by: str = "time DESC",
+    ) -> list[Draft]:
+        # Sanitize order_by to prevent injection
+        allowed = {"time", "name", "title", "pages", "words", "fetched_at"}
+        parts = order_by.split()
+        col = parts[0] if parts else "time"
+        direction = parts[1].upper() if len(parts) > 1 else "DESC"
+        if col not in allowed:
+            col = "time"
+        if direction not in ("ASC", "DESC"):
+            direction = "DESC"
+        safe_order = f'"{col}" {direction}' if col == "group" else f"{col} {direction}"
+
+        rows = self.conn.execute(
+            f"SELECT * FROM drafts ORDER BY {safe_order} LIMIT ? OFFSET ?",
+            (limit, offset),
+        ).fetchall()
+        return [self._row_to_draft(r) for r in rows]
+
+    def count_drafts(self) -> int:
+        return self.conn.execute("SELECT COUNT(*) FROM drafts").fetchone()[0]
+
+    def search_drafts(self, query: str, limit: int = 50) -> list[Draft]:
+        rows = self.conn.execute(
+            """SELECT d.* FROM drafts d
+            JOIN drafts_fts f ON d.rowid = f.rowid
+            WHERE drafts_fts MATCH ?
+            ORDER BY rank
+            LIMIT ?""",
+            (query, limit),
+        ).fetchall()
+        return [self._row_to_draft(r) for r in rows]
+
+    def drafts_without_text(self, limit: int = 100) -> list[Draft]:
+        rows = self.conn.execute(
+            "SELECT * FROM drafts WHERE full_text IS NULL LIMIT ?", (limit,)
+        ).fetchall()
+        return [self._row_to_draft(r) for r in rows]
+
+    # --- Ratings ---
+
+    def upsert_rating(self, rating: Rating) -> None:
+        self.conn.execute(
+            """INSERT INTO ratings (draft_name, novelty, maturity, overlap, momentum, relevance,
+                summary, novelty_note, maturity_note, overlap_note, momentum_note, relevance_note,
+                categories, rated_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(draft_name) DO UPDATE SET
+                novelty=excluded.novelty, maturity=excluded.maturity, overlap=excluded.overlap,
+                momentum=excluded.momentum, relevance=excluded.relevance, summary=excluded.summary,
+                novelty_note=excluded.novelty_note, maturity_note=excluded.maturity_note,
+                overlap_note=excluded.overlap_note, momentum_note=excluded.momentum_note,
+                relevance_note=excluded.relevance_note, categories=excluded.categories,
+                rated_at=excluded.rated_at
+            """,
+            (
+                rating.draft_name, rating.novelty, rating.maturity, rating.overlap,
+                rating.momentum, rating.relevance, rating.summary,
+                rating.novelty_note, rating.maturity_note, rating.overlap_note,
+                rating.momentum_note, rating.relevance_note,
+                json.dumps(rating.categories),
+                rating.rated_at or datetime.now(timezone.utc).isoformat(),
+            ),
+        )
+        self.conn.commit()
+
+    def get_rating(self, draft_name: str) -> Rating | None:
+        row = self.conn.execute(
+            "SELECT * FROM ratings WHERE draft_name = ?", (draft_name,)
+        ).fetchone()
+        if row is None:
+            return None
+        return self._row_to_rating(row)
+
+    def unrated_drafts(self, limit: int = 100) -> list[Draft]:
+        rows = self.conn.execute(
+            """SELECT d.* FROM drafts d
+            LEFT JOIN ratings r ON d.name = r.draft_name
+            WHERE r.draft_name IS NULL
+            LIMIT ?""",
+            (limit,),
+        ).fetchall()
+        return [self._row_to_draft(r) for r in rows]
+
+    def drafts_with_ratings(self, limit: int = 200) -> list[tuple[Draft, Rating]]:
+        rows = self.conn.execute(
+            """SELECT d.*, r.novelty, r.maturity, r.overlap, r.momentum, r.relevance,
+                r.summary, r.novelty_note, r.maturity_note, r.overlap_note,
+                r.momentum_note, r.relevance_note, r.categories as r_categories, r.rated_at
+            FROM drafts d
+            JOIN ratings r ON d.name = r.draft_name
+            ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20
+                      + r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC
+            LIMIT ?""",
+            (limit,),
+        ).fetchall()
+        results = []
+        for r in rows:
+            draft = self._row_to_draft(r)
+            rating = Rating(
+                draft_name=r["draft_name"] if "draft_name" in r.keys() else draft.name,
+                novelty=r["novelty"], maturity=r["maturity"], overlap=r["overlap"],
+                momentum=r["momentum"], relevance=r["relevance"], summary=r["summary"],
+                novelty_note=r["novelty_note"], maturity_note=r["maturity_note"],
+                overlap_note=r["overlap_note"], momentum_note=r["momentum_note"],
+                relevance_note=r["relevance_note"],
+                categories=json.loads(r["r_categories"]) if r["r_categories"] else [],
+                rated_at=r["rated_at"],
+            )
+            results.append((draft, rating))
+        return results
+
+    # --- Embeddings ---
+
+    def store_embedding(self, draft_name: str, model: str, vector: np.ndarray) -> None:
+        self.conn.execute(
+            """INSERT INTO embeddings (draft_name, model, vector, created_at)
+            VALUES (?, ?, ?, ?)
+            ON CONFLICT(draft_name) DO UPDATE SET
+                model=excluded.model, vector=excluded.vector, created_at=excluded.created_at
+            """,
+            (draft_name, model, vector.astype(np.float32).tobytes(),
+             datetime.now(timezone.utc).isoformat()),
+        )
+        self.conn.commit()
+
+    def get_embedding(self, draft_name: str) -> np.ndarray | None:
+        row = self.conn.execute(
+            "SELECT vector FROM embeddings WHERE draft_name = ?", (draft_name,)
+        ).fetchone()
+        if row is None:
+            return None
+        return np.frombuffer(row["vector"], dtype=np.float32)
+
+    def all_embeddings(self) -> dict[str, np.ndarray]:
+        rows = self.conn.execute("SELECT draft_name, vector FROM embeddings").fetchall()
+        return {
+            r["draft_name"]: np.frombuffer(r["vector"], dtype=np.float32)
+            for r in rows
+        }
+
+    def drafts_without_embeddings(self, limit: int = 500) -> list[str]:
+        rows = self.conn.execute(
+            """SELECT d.name FROM drafts d
+            LEFT JOIN embeddings e ON d.name = e.draft_name
+            WHERE e.draft_name IS NULL
+            LIMIT ?""",
+            (limit,),
+        ).fetchall()
+        return [r["name"] for r in rows]
+
+    # --- LLM Cache ---
+
+    def cache_response(
+        self, draft_name: str, prompt_hash: str, model: str,
+        request_json: str, response_json: str,
+        input_tokens: int = 0, output_tokens: int = 0,
+    ) -> None:
+        self.conn.execute(
+            """INSERT INTO llm_cache (draft_name, prompt_hash, model, request_json,
+                response_json, input_tokens, output_tokens, created_at)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(draft_name, prompt_hash) DO UPDATE SET
+                model=excluded.model, response_json=excluded.response_json,
+                input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens,
+                created_at=excluded.created_at
+            """,
+            (draft_name, prompt_hash, model, request_json, response_json,
+             input_tokens, output_tokens, datetime.now(timezone.utc).isoformat()),
+        )
+        self.conn.commit()
+
+    def get_cached_response(self, draft_name: str, prompt_hash: str) -> str | None:
+        row = self.conn.execute(
+            "SELECT response_json FROM llm_cache WHERE draft_name = ? AND prompt_hash = ?",
+            (draft_name, prompt_hash),
+        ).fetchone()
+        return row["response_json"] if row else None
+
+    def total_tokens_used(self) -> tuple[int, int]:
+        row = self.conn.execute(
+            "SELECT COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0) FROM llm_cache"
+        ).fetchone()
+        return (row[0], row[1])
+
+    # --- Helpers ---
+
+    @staticmethod
+    def _row_to_draft(row: sqlite3.Row) -> Draft:
+        d = dict(row)
+        return Draft(
+            name=d["name"], rev=d["rev"], title=d["title"], abstract=d["abstract"],
+            time=d["time"], dt_id=d.get("dt_id"), pages=d.get("pages"),
+            words=d.get("words"), group=d.get("group"), group_uri=d.get("group_uri"),
+            expires=d.get("expires"), ad=d.get("ad"), shepherd=d.get("shepherd"),
+            states=json.loads(d.get("states") or "[]"),
+            full_text=d.get("full_text"),
+            categories=json.loads(d.get("categories") or "[]"),
+            tags=json.loads(d.get("tags") or "[]"),
+            fetched_at=d.get("fetched_at"),
+        )
+
+    @staticmethod
+    def _row_to_rating(row: sqlite3.Row) -> Rating:
+        d = dict(row)
+        return Rating(
+            draft_name=d["draft_name"], novelty=d["novelty"], maturity=d["maturity"],
+            overlap=d["overlap"], momentum=d["momentum"], relevance=d["relevance"],
+            summary=d["summary"],
+            novelty_note=d.get("novelty_note", ""),
+            maturity_note=d.get("maturity_note", ""),
+            overlap_note=d.get("overlap_note", ""),
+            momentum_note=d.get("momentum_note", ""),
+            relevance_note=d.get("relevance_note", ""),
+            categories=json.loads(d.get("categories") or "[]"),
+            rated_at=d.get("rated_at"),
+        )
--- a/src/ietf_analyzer/embeddings.py
+++ b/src/ietf_analyzer/embeddings.py
@@ -0,0 +1,136 @@
+"""Embedding generation via Ollama and similarity computation."""
+
+from __future__ import annotations
+
+import numpy as np
+import ollama as ollama_lib
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from .config import Config
+from .db import Database
+
+console = Console()
+
+
+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    dot = np.dot(a, b)
+    norm = np.linalg.norm(a) * np.linalg.norm(b)
+    if norm == 0:
+        return 0.0
+    return float(dot / norm)
+
+
+class Embedder:
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        self.client = ollama_lib.Client(host=self.config.ollama_url)
+
+    def embed_text(self, text: str) -> np.ndarray:
+        """Generate an embedding for a single text string."""
+        # Truncate to ~8k tokens worth of text (roughly 32k chars)
+        truncated = text[:32000]
+        resp = self.client.embed(model=self.config.ollama_embed_model, input=truncated)
+        return np.array(resp["embeddings"][0], dtype=np.float32)
+
+    def embed_draft(self, draft_name: str) -> np.ndarray | None:
+        """Generate and store an embedding for a draft using its abstract + title."""
+        draft = self.db.get_draft(draft_name)
+        if draft is None:
+            console.print(f"[red]Draft not found: {draft_name}[/]")
+            return None
+
+        # Combine title + abstract + beginning of full text for richer embedding
+        parts = [draft.title, draft.abstract]
+        if draft.full_text:
+            # Include first ~4k chars of body
+            parts.append(draft.full_text[:4000])
+        text = "\n\n".join(p for p in parts if p)
+
+        vec = self.embed_text(text)
+        self.db.store_embedding(draft_name, self.config.ollama_embed_model, vec)
+        return vec
+
+    def embed_all_missing(self) -> int:
+        """Generate embeddings for all drafts that don't have one yet."""
+        missing = self.db.drafts_without_embeddings()
+        if not missing:
+            console.print("All drafts already have embeddings.")
+            return 0
+
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Generating embeddings...", total=len(missing))
+            for name in missing:
+                try:
+                    self.embed_draft(name)
+                    count += 1
+                except Exception as e:
+                    console.print(f"[red]Failed to embed {name}: {e}[/]")
+                progress.advance(task)
+
+        console.print(f"Generated [bold green]{count}[/] embeddings")
+        return count
+
+    def find_similar(self, draft_name: str, top_n: int = 10) -> list[tuple[str, float]]:
+        """Find the most similar drafts to a given draft."""
+        target_vec = self.db.get_embedding(draft_name)
+        if target_vec is None:
+            # Try generating it on the fly
+            target_vec = self.embed_draft(draft_name)
+            if target_vec is None:
+                return []
+
+        all_embeddings = self.db.all_embeddings()
+        similarities: list[tuple[str, float]] = []
+        for name, vec in all_embeddings.items():
+            if name == draft_name:
+                continue
+            sim = _cosine_similarity(target_vec, vec)
+            similarities.append((name, sim))
+
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:top_n]
+
+    def similarity_matrix(self) -> tuple[list[str], np.ndarray]:
+        """Compute pairwise similarity matrix for all embedded drafts."""
+        all_embeddings = self.db.all_embeddings()
+        names = sorted(all_embeddings.keys())
+        n = len(names)
+        matrix = np.zeros((n, n), dtype=np.float32)
+        for i in range(n):
+            for j in range(i, n):
+                sim = _cosine_similarity(all_embeddings[names[i]], all_embeddings[names[j]])
+                matrix[i, j] = sim
+                matrix[j, i] = sim
+        return names, matrix
+
+    def find_clusters(self, threshold: float = 0.85) -> list[list[str]]:
+        """Find clusters of highly similar drafts using simple greedy clustering."""
+        names, matrix = self.similarity_matrix()
+        if len(names) == 0:
+            return []
+
+        visited = set()
+        clusters: list[list[str]] = []
+
+        for i, name in enumerate(names):
+            if name in visited:
+                continue
+            cluster = [name]
+            visited.add(name)
+            for j in range(len(names)):
+                if names[j] not in visited and matrix[i, j] >= threshold:
+                    cluster.append(names[j])
+                    visited.add(names[j])
+            if len(cluster) > 1:
+                clusters.append(cluster)
+
+        return clusters
--- a/src/ietf_analyzer/fetcher.py
+++ b/src/ietf_analyzer/fetcher.py
@@ -0,0 +1,204 @@
+"""Datatracker API client — search, fetch metadata, download full text."""
+
+from __future__ import annotations
+
+import time as time_mod
+from datetime import datetime, timezone
+
+import httpx
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from .config import Config
+from .models import Draft
+
+API_BASE = "https://datatracker.ietf.org/api/v1"
+TEXT_BASE = "https://www.ietf.org/archive/id"
+SEARCH_FIELDS = ("name__contains", "abstract__contains")
+
+console = Console()
+
+
+class Fetcher:
+    def __init__(self, config: Config | None = None):
+        self.config = config or Config.load()
+        self.client = httpx.Client(timeout=30, follow_redirects=True)
+        self._group_cache: dict[str, str] = {}
+
+    def close(self) -> None:
+        self.client.close()
+
+    # --- Search & fetch metadata ---
+
+    def search_drafts(
+        self,
+        keywords: list[str] | None = None,
+        since: str | None = None,
+        limit_per_keyword: int = 200,
+    ) -> list[Draft]:
+        """Search for drafts matching keywords. Deduplicates by name."""
+        keywords = keywords or self.config.search_keywords
+        since = since or self.config.fetch_since
+        seen: dict[str, Draft] = {}
+
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            # Search both name and abstract for each keyword
+            searches = []
+            for kw in keywords:
+                for field in SEARCH_FIELDS:
+                    searches.append((kw, field))
+
+            task = progress.add_task("Searching Datatracker...", total=len(searches))
+
+            for kw, search_field in searches:
+                progress.update(task, description=f"Searching {search_field.split('__')[0]}: {kw}")
+                drafts = self._paginated_search(search_field, kw, since, limit_per_keyword)
+                for d in drafts:
+                    if d.name not in seen:
+                        seen[d.name] = d
+                progress.advance(task)
+
+        console.print(f"Found [bold green]{len(seen)}[/] unique drafts")
+        return list(seen.values())
+
+    def _paginated_search(
+        self,
+        search_field: str,
+        keyword: str,
+        since: str,
+        max_results: int,
+    ) -> list[Draft]:
+        results: list[Draft] = []
+        offset = 0
+        page_size = 100
+
+        while offset < max_results:
+            params = {
+                "format": "json",
+                search_field: keyword,
+                "time__gte": since,
+                "type__slug": "draft",
+                "limit": min(page_size, max_results - offset),
+                "offset": offset,
+            }
+            try:
+                resp = self.client.get(f"{API_BASE}/doc/document/", params=params)
+                resp.raise_for_status()
+            except httpx.HTTPError as e:
+                console.print(f"[red]API error: {e}[/]")
+                break
+
+            data = resp.json()
+            objects = data.get("objects", [])
+            if not objects:
+                break
+
+            for obj in objects:
+                results.append(self._api_obj_to_draft(obj))
+
+            offset += len(objects)
+            if not data.get("meta", {}).get("next"):
+                break
+
+            time_mod.sleep(self.config.fetch_delay)
+
+        return results
+
+    def fetch_draft(self, name: str) -> Draft | None:
+        """Fetch a single draft by name."""
+        try:
+            resp = self.client.get(
+                f"{API_BASE}/doc/document/{name}/", params={"format": "json"}
+            )
+            resp.raise_for_status()
+            return self._api_obj_to_draft(resp.json())
+        except httpx.HTTPError as e:
+            console.print(f"[red]Error fetching {name}: {e}[/]")
+            return None
+
+    # --- Full text ---
+
+    def download_full_text(self, draft: Draft) -> str | None:
+        """Download the plain text of a draft."""
+        url = draft.text_url
+        try:
+            resp = self.client.get(url)
+            resp.raise_for_status()
+            return resp.text
+        except httpx.HTTPError:
+            # Try without revision if it fails
+            try:
+                alt_url = f"{TEXT_BASE}/{draft.name}.txt"
+                resp = self.client.get(alt_url)
+                resp.raise_for_status()
+                return resp.text
+            except httpx.HTTPError as e:
+                console.print(f"[dim]Could not download text for {draft.name}: {e}[/]")
+                return None
+
+    def download_texts(self, drafts: list[Draft]) -> dict[str, str]:
+        """Download full text for multiple drafts. Returns {name: text}."""
+        results: dict[str, str] = {}
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Downloading draft texts...", total=len(drafts))
+            for draft in drafts:
+                text = self.download_full_text(draft)
+                if text:
+                    results[draft.name] = text
+                progress.advance(task)
+                time_mod.sleep(self.config.fetch_delay)
+        console.print(f"Downloaded [bold green]{len(results)}[/] / {len(drafts)} texts")
+        return results
+
+    # --- Group resolution ---
+
+    def resolve_group(self, group_uri: str) -> str:
+        """Resolve a group API URI to a group acronym/name."""
+        if not group_uri:
+            return ""
+        if group_uri in self._group_cache:
+            return self._group_cache[group_uri]
+        try:
+            resp = self.client.get(
+                f"https://datatracker.ietf.org{group_uri}", params={"format": "json"}
+            )
+            resp.raise_for_status()
+            name = resp.json().get("acronym", resp.json().get("name", ""))
+            self._group_cache[group_uri] = name
+            time_mod.sleep(self.config.fetch_delay)
+            return name
+        except httpx.HTTPError:
+            return ""
+
+    # --- Helpers ---
+
+    def _api_obj_to_draft(self, obj: dict) -> Draft:
+        return Draft(
+            name=obj.get("name", ""),
+            rev=obj.get("rev", "00"),
+            title=obj.get("title", ""),
+            abstract=obj.get("abstract", "").strip(),
+            time=obj.get("time", ""),
+            dt_id=obj.get("id"),
+            pages=obj.get("pages"),
+            words=obj.get("words"),
+            group=None,  # Resolved lazily
+            group_uri=obj.get("group", ""),
+            expires=obj.get("expires"),
+            ad=obj.get("ad"),
+            shepherd=obj.get("shepherd"),
+            states=[s for s in (obj.get("states") or []) if isinstance(s, str)],
+            fetched_at=datetime.now(timezone.utc).isoformat(),
+        )
--- a/src/ietf_analyzer/models.py
+++ b/src/ietf_analyzer/models.py
@@ -0,0 +1,72 @@
+"""Data models for drafts, ratings, and categories."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from datetime import datetime
+
+
+@dataclass
+class Draft:
+    name: str  # e.g. "draft-zheng-dispatch-agent-identity-management"
+    rev: str  # e.g. "00"
+    title: str
+    abstract: str
+    time: str  # ISO datetime from API
+    dt_id: int | None = None  # Datatracker document ID
+    pages: int | None = None
+    words: int | None = None
+    group: str | None = None  # Working group name (resolved)
+    group_uri: str | None = None  # Raw API URI
+    expires: str | None = None
+    ad: str | None = None  # Area director URI
+    shepherd: str | None = None
+    states: list[str] = field(default_factory=list)
+    full_text: str | None = None
+    categories: list[str] = field(default_factory=list)
+    tags: list[str] = field(default_factory=list)
+    fetched_at: str | None = None
+
+    @property
+    def text_url(self) -> str:
+        return f"https://www.ietf.org/archive/id/{self.name}-{self.rev}.txt"
+
+    @property
+    def datatracker_url(self) -> str:
+        return f"https://datatracker.ietf.org/doc/{self.name}/"
+
+    @property
+    def date(self) -> str:
+        """Return just the date portion of time."""
+        if self.time:
+            return self.time[:10]
+        return ""
+
+
+@dataclass
+class Rating:
+    draft_name: str
+    novelty: int  # 1-5
+    maturity: int  # 1-5
+    overlap: int  # 1-5 (5 = highly overlapping with others)
+    momentum: int  # 1-5
+    relevance: int  # 1-5
+    summary: str  # 2-4 sentence AI summary
+    novelty_note: str = ""
+    maturity_note: str = ""
+    overlap_note: str = ""
+    momentum_note: str = ""
+    relevance_note: str = ""
+    categories: list[str] = field(default_factory=list)
+    rated_at: str | None = None
+
+    @property
+    def composite_score(self) -> float:
+        """Weighted composite: novelty and relevance matter most."""
+        return (
+            self.novelty * 0.30
+            + self.relevance * 0.25
+            + self.maturity * 0.20
+            + self.momentum * 0.15
+            + (6 - self.overlap) * 0.10  # Invert: less overlap = better
+        )
--- a/src/ietf_analyzer/reports.py
+++ b/src/ietf_analyzer/reports.py
@@ -0,0 +1,177 @@
+"""Markdown report generation."""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from pathlib import Path
+
+from .config import Config
+from .db import Database
+from .models import Draft, Rating
+
+STAR = {1: "\u2581", 2: "\u2583", 3: "\u2585", 4: "\u2587", 5: "\u2588"}
+
+
+def _bar(score: int) -> str:
+    return STAR.get(score, "?")
+
+
+def _score_str(rating: Rating) -> str:
+    return f"{rating.composite_score:.1f}"
+
+
+class Reporter:
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        self.output_dir = Path(self.config.data_dir) / "reports"
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def overview(self) -> str:
+        """Generate a sortable overview table of all rated drafts."""
+        pairs = self.db.drafts_with_ratings()
+        total = self.db.count_drafts()
+        now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+
+        lines = [
+            f"# IETF AI/Agent Draft Overview",
+            f"*Generated {now} — {len(pairs)} rated / {total} tracked drafts*\n",
+            "| Score | Draft | Date | N | M | O | Mom | R | Summary |",
+            "|------:|-------|------|:-:|:-:|:-:|:---:|:-:|---------|",
+        ]
+
+        for draft, rating in pairs:
+            name_link = f"[{draft.name}]({draft.datatracker_url})"
+            lines.append(
+                f"| {_score_str(rating)} | {name_link} | {draft.date} "
+                f"| {_bar(rating.novelty)} | {_bar(rating.maturity)} "
+                f"| {_bar(rating.overlap)} | {_bar(rating.momentum)} "
+                f"| {_bar(rating.relevance)} | {rating.summary[:80]}... |"
+            )
+
+        lines.append("\n*N=Novelty, M=Maturity, O=Overlap, Mom=Momentum, R=Relevance (block height = score 1-5)*")
+
+        report = "\n".join(lines)
+        path = self.output_dir / "overview.md"
+        path.write_text(report)
+        return str(path)
+
+    def landscape(self) -> str:
+        """Generate a category-grouped landscape report."""
+        pairs = self.db.drafts_with_ratings()
+        now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+
+        # Group by category
+        by_cat: dict[str, list[tuple[Draft, Rating]]] = {}
+        for draft, rating in pairs:
+            cats = rating.categories or ["Uncategorized"]
+            for cat in cats:
+                by_cat.setdefault(cat, []).append((draft, rating))
+
+        lines = [
+            f"# IETF AI/Agent Draft Landscape",
+            f"*Generated {now}*\n",
+        ]
+
+        for cat in sorted(by_cat.keys()):
+            items = by_cat[cat]
+            items.sort(key=lambda x: x[1].composite_score, reverse=True)
+            lines.append(f"\n## {cat} ({len(items)} drafts)\n")
+            for draft, rating in items:
+                lines.append(
+                    f"- **[{draft.name}]({draft.datatracker_url})** "
+                    f"(score: {_score_str(rating)}) — {rating.summary[:100]}"
+                )
+
+        report = "\n".join(lines)
+        path = self.output_dir / "landscape.md"
+        path.write_text(report)
+        return str(path)
+
+    def draft_detail(self, draft_name: str) -> str:
+        """Generate a detailed report for a single draft."""
+        draft = self.db.get_draft(draft_name)
+        if draft is None:
+            return ""
+
+        rating = self.db.get_rating(draft_name)
+
+        lines = [
+            f"# {draft.title}",
+            f"**{draft.name}** rev {draft.rev}\n",
+            f"- **Date:** {draft.date}",
+            f"- **Pages:** {draft.pages or '?'}",
+            f"- **Group:** {draft.group or 'individual'}",
+            f"- **Datatracker:** {draft.datatracker_url}",
+            f"- **Text:** {draft.text_url}\n",
+            f"## Abstract\n{draft.abstract}\n",
+        ]
+
+        if rating:
+            lines.extend([
+                f"## AI Assessment (score: {_score_str(rating)})\n",
+                f"**Summary:** {rating.summary}\n",
+                f"| Dimension | Score | Notes |",
+                f"|-----------|:-----:|-------|",
+                f"| Novelty | {rating.novelty}/5 | {rating.novelty_note} |",
+                f"| Maturity | {rating.maturity}/5 | {rating.maturity_note} |",
+                f"| Overlap | {rating.overlap}/5 | {rating.overlap_note} |",
+                f"| Momentum | {rating.momentum}/5 | {rating.momentum_note} |",
+                f"| Relevance | {rating.relevance}/5 | {rating.relevance_note} |",
+                f"\n**Categories:** {', '.join(rating.categories) if rating.categories else 'none'}",
+            ])
+        else:
+            lines.append("*Not yet rated — run `ietf analyze` to generate a rating.*")
+
+        report = "\n".join(lines)
+        path = self.output_dir / f"{draft_name}.md"
+        path.write_text(report)
+        return str(path)
+
+    def digest(self, since_days: int = 7) -> str:
+        """Generate a digest of recently fetched/updated drafts."""
+        from datetime import timedelta
+        cutoff = (datetime.now(timezone.utc) - timedelta(days=since_days)).isoformat()
+        now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+
+        # Get recent drafts by fetched_at
+        all_drafts = self.db.list_drafts(limit=500, order_by="fetched_at DESC")
+        recent = [d for d in all_drafts if d.fetched_at and d.fetched_at >= cutoff]
+
+        lines = [
+            f"# Weekly Digest — IETF AI/Agent Drafts",
+            f"*Generated {now} — showing drafts fetched in last {since_days} days*\n",
+            f"**{len(recent)} drafts** in this period\n",
+        ]
+
+        if not recent:
+            lines.append("No new drafts found. Run `ietf fetch` to update.")
+        else:
+            # Split into rated and unrated
+            rated = []
+            unrated = []
+            for d in recent:
+                r = self.db.get_rating(d.name)
+                if r:
+                    rated.append((d, r))
+                else:
+                    unrated.append(d)
+
+            if rated:
+                rated.sort(key=lambda x: x[1].composite_score, reverse=True)
+                lines.append("## Top Rated New Drafts\n")
+                for draft, rating in rated[:10]:
+                    lines.append(
+                        f"1. **[{draft.name}]({draft.datatracker_url})** "
+                        f"(score: {_score_str(rating)}) — {rating.summary[:120]}"
+                    )
+
+            if unrated:
+                lines.append(f"\n## Awaiting Analysis ({len(unrated)} drafts)\n")
+                for d in unrated[:20]:
+                    lines.append(f"- [{d.name}]({d.datatracker_url}) — {d.title}")
+
+        report = "\n".join(lines)
+        path = self.output_dir / "digest.md"
+        path.write_text(report)
+        return str(path)
				`@@ -0,0 +1 @@`
				`"""IETF Draft Analyzer — Track, categorize, and rate AI/agent-related Internet-Drafts."""`