IETF Draft Analyzer v0.1.0 — track, categorize, and rate AI/agent drafts

Python CLI tool that fetches AI/agent-related Internet-Drafts from the IETF Datatracker, rates them using Claude, generates embeddings via Ollama for similarity/clustering, and produces markdown reports. Features: - Fetch drafts by keyword from Datatracker API with full text download - Batch analysis with Claude (token-optimized, responses cached in SQLite) - Embedding-based similarity search and overlap cluster detection - Reports: overview, landscape by category, overlap clusters, weekly digest - SQLite with FTS5 for full-text search across 260 tracked drafts Initial analysis of 260 drafts reveals OAuth agent auth (13 drafts) and agent gateway/collaboration (10 drafts) as the most crowded clusters, while AI safety/alignment is underserved with the highest quality scores. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 00:36:45 +01:00
commit 6771a4c235
17 changed files with 2823 additions and 0 deletions
--- a/src/ietf_analyzer/analyzer.py
+++ b/src/ietf_analyzer/analyzer.py
@@ -0,0 +1,276 @@
+"""Claude-based analysis — summarization, rating, categorization, overlap detection."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from datetime import datetime, timezone
+
+import anthropic
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from .config import Config
+from .db import Database
+from .models import Draft, Rating
+
+console = Console()
+
+CATEGORIES_SHORT = [
+    "A2A protocols",       # Agent-to-agent communication protocols
+    "AI safety/alignment", # AI safety / guardrails / alignment
+    "ML traffic mgmt",     # ML-based traffic management / optimization
+    "Autonomous netops",   # Autonomous network operations
+    "Agent identity/auth", # Identity / authentication for AI agents
+    "Data formats/interop",# Data formats / semantics for AI interop
+    "Policy/governance",   # Policy / governance / ethical frameworks
+    "Model serving/inference", # AI model serving / inference protocols
+    "Agent discovery/reg", # Agent discovery / registration
+    "Human-agent interaction",
+    "Other AI/agent",
+]
+
+# Compact prompt — abstract only, saves ~10x tokens vs full-text
+RATE_PROMPT_COMPACT = """\
+Rate this IETF draft. JSON only.
+
+{name} | {title} | {time} | {pages}pg
+Abstract: {abstract}
+
+Return JSON: {{"s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap note","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
+
+Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
+Categories: {categories}
+JSON only, no fences."""
+
+# Batch prompt — rate multiple drafts in one call
+BATCH_PROMPT = """\
+Rate each IETF draft below. Return a JSON array with one object per draft, in order.
+
+{drafts_block}
+
+Per-draft JSON: {{"name":"draft-name","s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap with known drafts","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
+
+Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
+Categories: {categories}
+Return ONLY a JSON array, no fences."""
+
+COMPARE_PROMPT = """\
+Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
+
+{drafts_section}
+
+Be specific about concrete mechanisms and design choices."""
+
+
+def _prompt_hash(text: str) -> str:
+    return hashlib.sha256(text.encode()).hexdigest()[:16]
+
+
+class Analyzer:
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        try:
+            self.client = anthropic.Anthropic()
+        except Exception:
+            console.print(
+                "[red bold]No Anthropic API key found.[/]\n"
+                "Set ANTHROPIC_API_KEY environment variable or run:\n"
+                "  export ANTHROPIC_API_KEY=sk-ant-..."
+            )
+            raise SystemExit(1)
+
+    def _parse_rating(self, draft_name: str, data: dict) -> Rating:
+        """Parse a rating from compact JSON keys."""
+        return Rating(
+            draft_name=draft_name,
+            novelty=int(data.get("n", data.get("novelty", 3))),
+            maturity=int(data.get("m", data.get("maturity", 3))),
+            overlap=int(data.get("o", data.get("overlap", 3))),
+            momentum=int(data.get("mo", data.get("momentum", 3))),
+            relevance=int(data.get("r", data.get("relevance", 3))),
+            summary=data.get("s", data.get("summary", "")),
+            novelty_note=data.get("nn", data.get("novelty_note", "")),
+            maturity_note=data.get("mn", data.get("maturity_note", "")),
+            overlap_note=data.get("on", data.get("overlap_note", "")),
+            momentum_note=data.get("mon", data.get("momentum_note", "")),
+            relevance_note=data.get("rn", data.get("relevance_note", "")),
+            categories=data.get("c", data.get("categories", [])),
+            rated_at=datetime.now(timezone.utc).isoformat(),
+        )
+
+    def _call_claude(self, prompt: str, max_tokens: int = 512) -> tuple[str, int, int]:
+        """Call Claude and return (text, input_tokens, output_tokens)."""
+        resp = self.client.messages.create(
+            model=self.config.claude_model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        text = resp.content[0].text.strip()
+        return text, resp.usage.input_tokens, resp.usage.output_tokens
+
+    def _extract_json(self, text: str) -> str:
+        """Strip markdown fences if present."""
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1]
+            if text.endswith("```"):
+                text = text[:-3]
+        return text.strip()
+
+    def rate_draft(self, draft_name: str, use_cache: bool = True) -> Rating | None:
+        """Analyze and rate a single draft."""
+        draft = self.db.get_draft(draft_name)
+        if draft is None:
+            console.print(f"[red]Draft not found: {draft_name}[/]")
+            return None
+
+        prompt = RATE_PROMPT_COMPACT.format(
+            name=draft.name, title=draft.title, time=draft.date,
+            pages=draft.pages or "?",
+            abstract=draft.abstract[:2000],
+            categories=", ".join(CATEGORIES_SHORT),
+        )
+        phash = _prompt_hash(prompt)
+
+        # Check cache
+        if use_cache:
+            cached = self.db.get_cached_response(draft_name, phash)
+            if cached:
+                try:
+                    data = json.loads(cached)
+                    rating = self._parse_rating(draft_name, data)
+                    self.db.upsert_rating(rating)
+                    draft.categories = rating.categories
+                    self.db.upsert_draft(draft)
+                    return rating
+                except (json.JSONDecodeError, KeyError):
+                    pass  # Re-analyze if cache is corrupt
+
+        try:
+            text, in_tok, out_tok = self._call_claude(prompt, max_tokens=512)
+            text = self._extract_json(text)
+            data = json.loads(text)
+
+            # Cache the raw response
+            self.db.cache_response(
+                draft_name, phash, self.config.claude_model,
+                prompt, text, in_tok, out_tok,
+            )
+        except (json.JSONDecodeError, anthropic.APIError, IndexError, KeyError) as e:
+            console.print(f"[red]Failed {draft_name}: {e}[/]")
+            return None
+
+        rating = self._parse_rating(draft_name, data)
+        self.db.upsert_rating(rating)
+        draft.categories = rating.categories
+        self.db.upsert_draft(draft)
+        return rating
+
+    def rate_batch(self, drafts: list[Draft], batch_size: int = 5) -> int:
+        """Rate multiple drafts in batched API calls to save tokens."""
+        count = 0
+        for i in range(0, len(drafts), batch_size):
+            batch = drafts[i:i + batch_size]
+
+            # Build batch prompt
+            drafts_block = ""
+            for d in batch:
+                drafts_block += f"\n---\n{d.name} | {d.title} | {d.date} | {d.pages or '?'}pg\nAbstract: {d.abstract[:1500]}\n"
+
+            prompt = BATCH_PROMPT.format(
+                drafts_block=drafts_block,
+                categories=", ".join(CATEGORIES_SHORT),
+            )
+            phash = _prompt_hash(prompt)
+
+            try:
+                text, in_tok, out_tok = self._call_claude(
+                    prompt, max_tokens=400 * len(batch)
+                )
+                text = self._extract_json(text)
+                results = json.loads(text)
+                if not isinstance(results, list):
+                    results = [results]
+
+                for j, data in enumerate(results):
+                    draft_name = data.get("name", batch[j].name if j < len(batch) else None)
+                    if not draft_name:
+                        continue
+                    # Cache each result individually
+                    self.db.cache_response(
+                        draft_name, _prompt_hash(f"batch-{phash}-{draft_name}"),
+                        self.config.claude_model, f"batch[{i}]", json.dumps(data),
+                        in_tok // len(results), out_tok // len(results),
+                    )
+                    rating = self._parse_rating(draft_name, data)
+                    self.db.upsert_rating(rating)
+                    draft = self.db.get_draft(draft_name)
+                    if draft:
+                        draft.categories = rating.categories
+                        self.db.upsert_draft(draft)
+                    count += 1
+            except (json.JSONDecodeError, anthropic.APIError) as e:
+                console.print(f"[red]Batch {i//batch_size+1} failed: {e}[/]")
+                # Fallback: rate individually
+                for d in batch:
+                    r = self.rate_draft(d.name)
+                    if r:
+                        count += 1
+
+        return count
+
+    def rate_all_unrated(self, limit: int = 300, batch_size: int = 5) -> int:
+        """Rate all drafts that haven't been rated yet, using batching."""
+        unrated = self.db.unrated_drafts(limit=limit)
+        if not unrated:
+            console.print("All drafts already rated.")
+            return 0
+
+        console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Analyzing...", total=len(unrated))
+            for i in range(0, len(unrated), batch_size):
+                batch = unrated[i:i + batch_size]
+                names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
+                progress.update(task, description=f"Batch: {names}")
+                n = self.rate_batch(batch, batch_size=batch_size)
+                count += n
+                progress.advance(task, advance=len(batch))
+
+        in_tok, out_tok = self.db.total_tokens_used()
+        console.print(
+            f"Rated [bold green]{count}[/] drafts "
+            f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
+        )
+        return count
+
+    def compare_drafts(self, draft_names: list[str]) -> str:
+        """Compare multiple drafts and return analysis text."""
+        parts = []
+        for name in draft_names:
+            draft = self.db.get_draft(name)
+            if draft is None:
+                console.print(f"[yellow]Skipping unknown draft: {name}[/]")
+                continue
+            parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")
+
+        if len(parts) < 2:
+            return "Need at least 2 valid drafts to compare."
+
+        prompt = COMPARE_PROMPT.format(
+            drafts_section="\n\n---\n\n".join(parts)
+        )
+
+        try:
+            text, _, _ = self._call_claude(prompt, max_tokens=2048)
+            return text
+        except anthropic.APIError as e:
+            return f"Error: {e}"