IETF Draft Analyzer v0.1.0 — track, categorize, and rate AI/agent drafts

Python CLI tool that fetches AI/agent-related Internet-Drafts from the IETF
Datatracker, rates them using Claude, generates embeddings via Ollama for
similarity/clustering, and produces markdown reports.

Features:
- Fetch drafts by keyword from Datatracker API with full text download
- Batch analysis with Claude (token-optimized, responses cached in SQLite)
- Embedding-based similarity search and overlap cluster detection
- Reports: overview, landscape by category, overlap clusters, weekly digest
- SQLite with FTS5 for full-text search across 260 tracked drafts

Initial analysis of 260 drafts reveals OAuth agent auth (13 drafts) and
agent gateway/collaboration (10 drafts) as the most crowded clusters,
while AI safety/alignment is underserved with the highest quality scores.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 00:36:45 +01:00
commit 6771a4c235
17 changed files with 2823 additions and 0 deletions

View File

@@ -0,0 +1,276 @@
"""Claude-based analysis — summarization, rating, categorization, overlap detection."""
from __future__ import annotations
import hashlib
import json
from datetime import datetime, timezone
import anthropic
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
from .config import Config
from .db import Database
from .models import Draft, Rating
console = Console()
CATEGORIES_SHORT = [
"A2A protocols", # Agent-to-agent communication protocols
"AI safety/alignment", # AI safety / guardrails / alignment
"ML traffic mgmt", # ML-based traffic management / optimization
"Autonomous netops", # Autonomous network operations
"Agent identity/auth", # Identity / authentication for AI agents
"Data formats/interop",# Data formats / semantics for AI interop
"Policy/governance", # Policy / governance / ethical frameworks
"Model serving/inference", # AI model serving / inference protocols
"Agent discovery/reg", # Agent discovery / registration
"Human-agent interaction",
"Other AI/agent",
]
# Compact prompt — abstract only, saves ~10x tokens vs full-text
RATE_PROMPT_COMPACT = """\
Rate this IETF draft. JSON only.
{name} | {title} | {time} | {pages}pg
Abstract: {abstract}
Return JSON: {{"s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap note","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
Categories: {categories}
JSON only, no fences."""
# Batch prompt — rate multiple drafts in one call
BATCH_PROMPT = """\
Rate each IETF draft below. Return a JSON array with one object per draft, in order.
{drafts_block}
Per-draft JSON: {{"name":"draft-name","s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap with known drafts","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
Categories: {categories}
Return ONLY a JSON array, no fences."""
COMPARE_PROMPT = """\
Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
{drafts_section}
Be specific about concrete mechanisms and design choices."""
def _prompt_hash(text: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()[:16]
class Analyzer:
def __init__(self, config: Config | None = None, db: Database | None = None):
self.config = config or Config.load()
self.db = db or Database(self.config)
try:
self.client = anthropic.Anthropic()
except Exception:
console.print(
"[red bold]No Anthropic API key found.[/]\n"
"Set ANTHROPIC_API_KEY environment variable or run:\n"
" export ANTHROPIC_API_KEY=sk-ant-..."
)
raise SystemExit(1)
def _parse_rating(self, draft_name: str, data: dict) -> Rating:
"""Parse a rating from compact JSON keys."""
return Rating(
draft_name=draft_name,
novelty=int(data.get("n", data.get("novelty", 3))),
maturity=int(data.get("m", data.get("maturity", 3))),
overlap=int(data.get("o", data.get("overlap", 3))),
momentum=int(data.get("mo", data.get("momentum", 3))),
relevance=int(data.get("r", data.get("relevance", 3))),
summary=data.get("s", data.get("summary", "")),
novelty_note=data.get("nn", data.get("novelty_note", "")),
maturity_note=data.get("mn", data.get("maturity_note", "")),
overlap_note=data.get("on", data.get("overlap_note", "")),
momentum_note=data.get("mon", data.get("momentum_note", "")),
relevance_note=data.get("rn", data.get("relevance_note", "")),
categories=data.get("c", data.get("categories", [])),
rated_at=datetime.now(timezone.utc).isoformat(),
)
def _call_claude(self, prompt: str, max_tokens: int = 512) -> tuple[str, int, int]:
"""Call Claude and return (text, input_tokens, output_tokens)."""
resp = self.client.messages.create(
model=self.config.claude_model,
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}],
)
text = resp.content[0].text.strip()
return text, resp.usage.input_tokens, resp.usage.output_tokens
def _extract_json(self, text: str) -> str:
"""Strip markdown fences if present."""
if text.startswith("```"):
text = text.split("\n", 1)[1]
if text.endswith("```"):
text = text[:-3]
return text.strip()
def rate_draft(self, draft_name: str, use_cache: bool = True) -> Rating | None:
"""Analyze and rate a single draft."""
draft = self.db.get_draft(draft_name)
if draft is None:
console.print(f"[red]Draft not found: {draft_name}[/]")
return None
prompt = RATE_PROMPT_COMPACT.format(
name=draft.name, title=draft.title, time=draft.date,
pages=draft.pages or "?",
abstract=draft.abstract[:2000],
categories=", ".join(CATEGORIES_SHORT),
)
phash = _prompt_hash(prompt)
# Check cache
if use_cache:
cached = self.db.get_cached_response(draft_name, phash)
if cached:
try:
data = json.loads(cached)
rating = self._parse_rating(draft_name, data)
self.db.upsert_rating(rating)
draft.categories = rating.categories
self.db.upsert_draft(draft)
return rating
except (json.JSONDecodeError, KeyError):
pass # Re-analyze if cache is corrupt
try:
text, in_tok, out_tok = self._call_claude(prompt, max_tokens=512)
text = self._extract_json(text)
data = json.loads(text)
# Cache the raw response
self.db.cache_response(
draft_name, phash, self.config.claude_model,
prompt, text, in_tok, out_tok,
)
except (json.JSONDecodeError, anthropic.APIError, IndexError, KeyError) as e:
console.print(f"[red]Failed {draft_name}: {e}[/]")
return None
rating = self._parse_rating(draft_name, data)
self.db.upsert_rating(rating)
draft.categories = rating.categories
self.db.upsert_draft(draft)
return rating
def rate_batch(self, drafts: list[Draft], batch_size: int = 5) -> int:
"""Rate multiple drafts in batched API calls to save tokens."""
count = 0
for i in range(0, len(drafts), batch_size):
batch = drafts[i:i + batch_size]
# Build batch prompt
drafts_block = ""
for d in batch:
drafts_block += f"\n---\n{d.name} | {d.title} | {d.date} | {d.pages or '?'}pg\nAbstract: {d.abstract[:1500]}\n"
prompt = BATCH_PROMPT.format(
drafts_block=drafts_block,
categories=", ".join(CATEGORIES_SHORT),
)
phash = _prompt_hash(prompt)
try:
text, in_tok, out_tok = self._call_claude(
prompt, max_tokens=400 * len(batch)
)
text = self._extract_json(text)
results = json.loads(text)
if not isinstance(results, list):
results = [results]
for j, data in enumerate(results):
draft_name = data.get("name", batch[j].name if j < len(batch) else None)
if not draft_name:
continue
# Cache each result individually
self.db.cache_response(
draft_name, _prompt_hash(f"batch-{phash}-{draft_name}"),
self.config.claude_model, f"batch[{i}]", json.dumps(data),
in_tok // len(results), out_tok // len(results),
)
rating = self._parse_rating(draft_name, data)
self.db.upsert_rating(rating)
draft = self.db.get_draft(draft_name)
if draft:
draft.categories = rating.categories
self.db.upsert_draft(draft)
count += 1
except (json.JSONDecodeError, anthropic.APIError) as e:
console.print(f"[red]Batch {i//batch_size+1} failed: {e}[/]")
# Fallback: rate individually
for d in batch:
r = self.rate_draft(d.name)
if r:
count += 1
return count
def rate_all_unrated(self, limit: int = 300, batch_size: int = 5) -> int:
"""Rate all drafts that haven't been rated yet, using batching."""
unrated = self.db.unrated_drafts(limit=limit)
if not unrated:
console.print("All drafts already rated.")
return 0
console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
count = 0
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
MofNCompleteColumn(),
console=console,
) as progress:
task = progress.add_task("Analyzing...", total=len(unrated))
for i in range(0, len(unrated), batch_size):
batch = unrated[i:i + batch_size]
names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
progress.update(task, description=f"Batch: {names}")
n = self.rate_batch(batch, batch_size=batch_size)
count += n
progress.advance(task, advance=len(batch))
in_tok, out_tok = self.db.total_tokens_used()
console.print(
f"Rated [bold green]{count}[/] drafts "
f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
)
return count
def compare_drafts(self, draft_names: list[str]) -> str:
"""Compare multiple drafts and return analysis text."""
parts = []
for name in draft_names:
draft = self.db.get_draft(name)
if draft is None:
console.print(f"[yellow]Skipping unknown draft: {name}[/]")
continue
parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")
if len(parts) < 2:
return "Need at least 2 valid drafts to compare."
prompt = COMPARE_PROMPT.format(
drafts_section="\n\n---\n\n".join(parts)
)
try:
text, _, _ = self._call_claude(prompt, max_tokens=2048)
return text
except anthropic.APIError as e:
return f"Error: {e}"