IETF Draft Analyzer v0.1.0 — track, categorize, and rate AI/agent drafts
Python CLI tool that fetches AI/agent-related Internet-Drafts from the IETF Datatracker, rates them using Claude, generates embeddings via Ollama for similarity/clustering, and produces markdown reports. Features: - Fetch drafts by keyword from Datatracker API with full text download - Batch analysis with Claude (token-optimized, responses cached in SQLite) - Embedding-based similarity search and overlap cluster detection - Reports: overview, landscape by category, overlap clusters, weekly digest - SQLite with FTS5 for full-text search across 260 tracked drafts Initial analysis of 260 drafts reveals OAuth agent auth (13 drafts) and agent gateway/collaboration (10 drafts) as the most crowded clusters, while AI safety/alignment is underserved with the highest quality scores. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
1
src/ietf_analyzer/__init__.py
Normal file
1
src/ietf_analyzer/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""IETF Draft Analyzer — Track, categorize, and rate AI/agent-related Internet-Drafts."""
|
||||
276
src/ietf_analyzer/analyzer.py
Normal file
276
src/ietf_analyzer/analyzer.py
Normal file
@@ -0,0 +1,276 @@
|
||||
"""Claude-based analysis — summarization, rating, categorization, overlap detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import anthropic
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
from .models import Draft, Rating
|
||||
|
||||
console = Console()
|
||||
|
||||
CATEGORIES_SHORT = [
|
||||
"A2A protocols", # Agent-to-agent communication protocols
|
||||
"AI safety/alignment", # AI safety / guardrails / alignment
|
||||
"ML traffic mgmt", # ML-based traffic management / optimization
|
||||
"Autonomous netops", # Autonomous network operations
|
||||
"Agent identity/auth", # Identity / authentication for AI agents
|
||||
"Data formats/interop",# Data formats / semantics for AI interop
|
||||
"Policy/governance", # Policy / governance / ethical frameworks
|
||||
"Model serving/inference", # AI model serving / inference protocols
|
||||
"Agent discovery/reg", # Agent discovery / registration
|
||||
"Human-agent interaction",
|
||||
"Other AI/agent",
|
||||
]
|
||||
|
||||
# Compact prompt — abstract only, saves ~10x tokens vs full-text
|
||||
RATE_PROMPT_COMPACT = """\
|
||||
Rate this IETF draft. JSON only.
|
||||
|
||||
{name} | {title} | {time} | {pages}pg
|
||||
Abstract: {abstract}
|
||||
|
||||
Return JSON: {{"s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap note","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
|
||||
|
||||
Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
|
||||
Categories: {categories}
|
||||
JSON only, no fences."""
|
||||
|
||||
# Batch prompt — rate multiple drafts in one call
|
||||
BATCH_PROMPT = """\
|
||||
Rate each IETF draft below. Return a JSON array with one object per draft, in order.
|
||||
|
||||
{drafts_block}
|
||||
|
||||
Per-draft JSON: {{"name":"draft-name","s":"2-3 sentence summary","n":<1-5>,"nn":"novelty note","m":<1-5>,"mn":"maturity note","o":<1-5>,"on":"overlap with known drafts","mo":<1-5>,"mon":"momentum note","r":<1-5>,"rn":"relevance note","c":["categories"]}}
|
||||
|
||||
Scale: 1=very low..5=very high. Overlap: 1=unique,5=heavy overlap.
|
||||
Categories: {categories}
|
||||
Return ONLY a JSON array, no fences."""
|
||||
|
||||
COMPARE_PROMPT = """\
|
||||
Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
|
||||
|
||||
{drafts_section}
|
||||
|
||||
Be specific about concrete mechanisms and design choices."""
|
||||
|
||||
|
||||
def _prompt_hash(text: str) -> str:
|
||||
return hashlib.sha256(text.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
class Analyzer:
|
||||
def __init__(self, config: Config | None = None, db: Database | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.db = db or Database(self.config)
|
||||
try:
|
||||
self.client = anthropic.Anthropic()
|
||||
except Exception:
|
||||
console.print(
|
||||
"[red bold]No Anthropic API key found.[/]\n"
|
||||
"Set ANTHROPIC_API_KEY environment variable or run:\n"
|
||||
" export ANTHROPIC_API_KEY=sk-ant-..."
|
||||
)
|
||||
raise SystemExit(1)
|
||||
|
||||
def _parse_rating(self, draft_name: str, data: dict) -> Rating:
|
||||
"""Parse a rating from compact JSON keys."""
|
||||
return Rating(
|
||||
draft_name=draft_name,
|
||||
novelty=int(data.get("n", data.get("novelty", 3))),
|
||||
maturity=int(data.get("m", data.get("maturity", 3))),
|
||||
overlap=int(data.get("o", data.get("overlap", 3))),
|
||||
momentum=int(data.get("mo", data.get("momentum", 3))),
|
||||
relevance=int(data.get("r", data.get("relevance", 3))),
|
||||
summary=data.get("s", data.get("summary", "")),
|
||||
novelty_note=data.get("nn", data.get("novelty_note", "")),
|
||||
maturity_note=data.get("mn", data.get("maturity_note", "")),
|
||||
overlap_note=data.get("on", data.get("overlap_note", "")),
|
||||
momentum_note=data.get("mon", data.get("momentum_note", "")),
|
||||
relevance_note=data.get("rn", data.get("relevance_note", "")),
|
||||
categories=data.get("c", data.get("categories", [])),
|
||||
rated_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
def _call_claude(self, prompt: str, max_tokens: int = 512) -> tuple[str, int, int]:
|
||||
"""Call Claude and return (text, input_tokens, output_tokens)."""
|
||||
resp = self.client.messages.create(
|
||||
model=self.config.claude_model,
|
||||
max_tokens=max_tokens,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
text = resp.content[0].text.strip()
|
||||
return text, resp.usage.input_tokens, resp.usage.output_tokens
|
||||
|
||||
def _extract_json(self, text: str) -> str:
|
||||
"""Strip markdown fences if present."""
|
||||
if text.startswith("```"):
|
||||
text = text.split("\n", 1)[1]
|
||||
if text.endswith("```"):
|
||||
text = text[:-3]
|
||||
return text.strip()
|
||||
|
||||
def rate_draft(self, draft_name: str, use_cache: bool = True) -> Rating | None:
|
||||
"""Analyze and rate a single draft."""
|
||||
draft = self.db.get_draft(draft_name)
|
||||
if draft is None:
|
||||
console.print(f"[red]Draft not found: {draft_name}[/]")
|
||||
return None
|
||||
|
||||
prompt = RATE_PROMPT_COMPACT.format(
|
||||
name=draft.name, title=draft.title, time=draft.date,
|
||||
pages=draft.pages or "?",
|
||||
abstract=draft.abstract[:2000],
|
||||
categories=", ".join(CATEGORIES_SHORT),
|
||||
)
|
||||
phash = _prompt_hash(prompt)
|
||||
|
||||
# Check cache
|
||||
if use_cache:
|
||||
cached = self.db.get_cached_response(draft_name, phash)
|
||||
if cached:
|
||||
try:
|
||||
data = json.loads(cached)
|
||||
rating = self._parse_rating(draft_name, data)
|
||||
self.db.upsert_rating(rating)
|
||||
draft.categories = rating.categories
|
||||
self.db.upsert_draft(draft)
|
||||
return rating
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass # Re-analyze if cache is corrupt
|
||||
|
||||
try:
|
||||
text, in_tok, out_tok = self._call_claude(prompt, max_tokens=512)
|
||||
text = self._extract_json(text)
|
||||
data = json.loads(text)
|
||||
|
||||
# Cache the raw response
|
||||
self.db.cache_response(
|
||||
draft_name, phash, self.config.claude_model,
|
||||
prompt, text, in_tok, out_tok,
|
||||
)
|
||||
except (json.JSONDecodeError, anthropic.APIError, IndexError, KeyError) as e:
|
||||
console.print(f"[red]Failed {draft_name}: {e}[/]")
|
||||
return None
|
||||
|
||||
rating = self._parse_rating(draft_name, data)
|
||||
self.db.upsert_rating(rating)
|
||||
draft.categories = rating.categories
|
||||
self.db.upsert_draft(draft)
|
||||
return rating
|
||||
|
||||
def rate_batch(self, drafts: list[Draft], batch_size: int = 5) -> int:
|
||||
"""Rate multiple drafts in batched API calls to save tokens."""
|
||||
count = 0
|
||||
for i in range(0, len(drafts), batch_size):
|
||||
batch = drafts[i:i + batch_size]
|
||||
|
||||
# Build batch prompt
|
||||
drafts_block = ""
|
||||
for d in batch:
|
||||
drafts_block += f"\n---\n{d.name} | {d.title} | {d.date} | {d.pages or '?'}pg\nAbstract: {d.abstract[:1500]}\n"
|
||||
|
||||
prompt = BATCH_PROMPT.format(
|
||||
drafts_block=drafts_block,
|
||||
categories=", ".join(CATEGORIES_SHORT),
|
||||
)
|
||||
phash = _prompt_hash(prompt)
|
||||
|
||||
try:
|
||||
text, in_tok, out_tok = self._call_claude(
|
||||
prompt, max_tokens=400 * len(batch)
|
||||
)
|
||||
text = self._extract_json(text)
|
||||
results = json.loads(text)
|
||||
if not isinstance(results, list):
|
||||
results = [results]
|
||||
|
||||
for j, data in enumerate(results):
|
||||
draft_name = data.get("name", batch[j].name if j < len(batch) else None)
|
||||
if not draft_name:
|
||||
continue
|
||||
# Cache each result individually
|
||||
self.db.cache_response(
|
||||
draft_name, _prompt_hash(f"batch-{phash}-{draft_name}"),
|
||||
self.config.claude_model, f"batch[{i}]", json.dumps(data),
|
||||
in_tok // len(results), out_tok // len(results),
|
||||
)
|
||||
rating = self._parse_rating(draft_name, data)
|
||||
self.db.upsert_rating(rating)
|
||||
draft = self.db.get_draft(draft_name)
|
||||
if draft:
|
||||
draft.categories = rating.categories
|
||||
self.db.upsert_draft(draft)
|
||||
count += 1
|
||||
except (json.JSONDecodeError, anthropic.APIError) as e:
|
||||
console.print(f"[red]Batch {i//batch_size+1} failed: {e}[/]")
|
||||
# Fallback: rate individually
|
||||
for d in batch:
|
||||
r = self.rate_draft(d.name)
|
||||
if r:
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def rate_all_unrated(self, limit: int = 300, batch_size: int = 5) -> int:
|
||||
"""Rate all drafts that haven't been rated yet, using batching."""
|
||||
unrated = self.db.unrated_drafts(limit=limit)
|
||||
if not unrated:
|
||||
console.print("All drafts already rated.")
|
||||
return 0
|
||||
|
||||
console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
|
||||
count = 0
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Analyzing...", total=len(unrated))
|
||||
for i in range(0, len(unrated), batch_size):
|
||||
batch = unrated[i:i + batch_size]
|
||||
names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
|
||||
progress.update(task, description=f"Batch: {names}")
|
||||
n = self.rate_batch(batch, batch_size=batch_size)
|
||||
count += n
|
||||
progress.advance(task, advance=len(batch))
|
||||
|
||||
in_tok, out_tok = self.db.total_tokens_used()
|
||||
console.print(
|
||||
f"Rated [bold green]{count}[/] drafts "
|
||||
f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
|
||||
)
|
||||
return count
|
||||
|
||||
def compare_drafts(self, draft_names: list[str]) -> str:
|
||||
"""Compare multiple drafts and return analysis text."""
|
||||
parts = []
|
||||
for name in draft_names:
|
||||
draft = self.db.get_draft(name)
|
||||
if draft is None:
|
||||
console.print(f"[yellow]Skipping unknown draft: {name}[/]")
|
||||
continue
|
||||
parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")
|
||||
|
||||
if len(parts) < 2:
|
||||
return "Need at least 2 valid drafts to compare."
|
||||
|
||||
prompt = COMPARE_PROMPT.format(
|
||||
drafts_section="\n\n---\n\n".join(parts)
|
||||
)
|
||||
|
||||
try:
|
||||
text, _, _ = self._call_claude(prompt, max_tokens=2048)
|
||||
return text
|
||||
except anthropic.APIError as e:
|
||||
return f"Error: {e}"
|
||||
405
src/ietf_analyzer/cli.py
Normal file
405
src/ietf_analyzer/cli.py
Normal file
@@ -0,0 +1,405 @@
|
||||
"""CLI entry point — all user-facing commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _get_config() -> Config:
|
||||
cfg = Config.load()
|
||||
return cfg
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.version_option(version="0.1.0")
|
||||
def main():
|
||||
"""IETF Draft Analyzer — track, categorize, and rate AI/agent Internet-Drafts."""
|
||||
pass
|
||||
|
||||
|
||||
# ── fetch ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--keywords", "-k", multiple=True, help="Extra keywords to search for")
|
||||
@click.option("--since", "-s", help="Only fetch drafts newer than this date (YYYY-MM-DD)")
|
||||
@click.option("--download-text/--no-download-text", default=True, help="Download full text of drafts")
|
||||
def fetch(keywords: tuple[str, ...], since: str | None, download_text: bool):
|
||||
"""Fetch AI/agent drafts from IETF Datatracker."""
|
||||
from .fetcher import Fetcher
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
fetcher = Fetcher(cfg)
|
||||
|
||||
kw_list = list(cfg.search_keywords)
|
||||
if keywords:
|
||||
kw_list.extend(keywords)
|
||||
|
||||
try:
|
||||
drafts = fetcher.search_drafts(keywords=kw_list, since=since)
|
||||
for draft in drafts:
|
||||
db.upsert_draft(draft)
|
||||
console.print(f"Stored [bold green]{len(drafts)}[/] drafts in database")
|
||||
|
||||
if download_text:
|
||||
missing = db.drafts_without_text()
|
||||
if missing:
|
||||
console.print(f"Downloading text for [bold]{len(missing)}[/] drafts...")
|
||||
texts = fetcher.download_texts(missing)
|
||||
for name, text in texts.items():
|
||||
draft = db.get_draft(name)
|
||||
if draft:
|
||||
draft.full_text = text
|
||||
db.upsert_draft(draft)
|
||||
finally:
|
||||
fetcher.close()
|
||||
db.close()
|
||||
|
||||
|
||||
# ── list ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command("list")
|
||||
@click.option("--limit", "-n", default=30, help="Number of drafts to show")
|
||||
@click.option("--sort", "-s", default="time DESC", help="Sort order (e.g. 'time DESC', 'name ASC')")
|
||||
def list_drafts(limit: int, sort: str):
|
||||
"""List tracked drafts."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
drafts = db.list_drafts(limit=limit, order_by=sort)
|
||||
total = db.count_drafts()
|
||||
|
||||
table = Table(title=f"Tracked Drafts ({total} total, showing {len(drafts)})")
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan", max_width=55)
|
||||
table.add_column("Title", max_width=50)
|
||||
table.add_column("Pg", justify="right", width=4)
|
||||
table.add_column("Text", justify="center", width=4)
|
||||
table.add_column("Rated", justify="center", width=5)
|
||||
|
||||
for d in drafts:
|
||||
has_text = "\u2713" if d.full_text else ""
|
||||
rated = "\u2713" if db.get_rating(d.name) else ""
|
||||
table.add_row(d.date, d.name, d.title[:50], str(d.pages or ""), has_text, rated)
|
||||
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── search ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("query")
|
||||
@click.option("--limit", "-n", default=20, help="Max results")
|
||||
def search(query: str, limit: int):
|
||||
"""Full-text search across stored drafts."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
results = db.search_drafts(query, limit=limit)
|
||||
if not results:
|
||||
console.print(f"No results for [bold]{query}[/]")
|
||||
return
|
||||
|
||||
table = Table(title=f"Search: {query} ({len(results)} results)")
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Title")
|
||||
|
||||
for d in results:
|
||||
table.add_row(d.date, d.name, d.title[:60])
|
||||
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── show ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("name")
|
||||
def show(name: str):
|
||||
"""Show detailed info for a draft."""
|
||||
from .reports import Reporter
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
reporter = Reporter(cfg, db)
|
||||
try:
|
||||
draft = db.get_draft(name)
|
||||
if draft is None:
|
||||
console.print(f"[red]Draft not found: {name}[/]")
|
||||
return
|
||||
|
||||
rating = db.get_rating(name)
|
||||
|
||||
console.print(f"\n[bold]{draft.title}[/]")
|
||||
console.print(f"[dim]{draft.name}[/] rev {draft.rev} | {draft.date} | {draft.pages or '?'} pages")
|
||||
console.print(f"Group: {draft.group or 'individual'} | {draft.datatracker_url}")
|
||||
console.print(f"\n[italic]{draft.abstract}[/]\n")
|
||||
|
||||
if rating:
|
||||
console.print("[bold]AI Assessment[/]")
|
||||
console.print(f" Score: [bold green]{rating.composite_score:.1f}[/]")
|
||||
console.print(f" Summary: {rating.summary}\n")
|
||||
|
||||
table = Table(show_header=True)
|
||||
table.add_column("Dimension", width=12)
|
||||
table.add_column("Score", justify="center", width=7)
|
||||
table.add_column("Notes")
|
||||
table.add_row("Novelty", f"{rating.novelty}/5", rating.novelty_note)
|
||||
table.add_row("Maturity", f"{rating.maturity}/5", rating.maturity_note)
|
||||
table.add_row("Overlap", f"{rating.overlap}/5", rating.overlap_note)
|
||||
table.add_row("Momentum", f"{rating.momentum}/5", rating.momentum_note)
|
||||
table.add_row("Relevance", f"{rating.relevance}/5", rating.relevance_note)
|
||||
console.print(table)
|
||||
|
||||
if rating.categories:
|
||||
console.print(f"\nCategories: {', '.join(rating.categories)}")
|
||||
else:
|
||||
console.print("[dim]Not yet rated — run: ietf analyze {name}[/]")
|
||||
|
||||
# Save detailed report too
|
||||
path = reporter.draft_detail(name)
|
||||
if path:
|
||||
console.print(f"\n[dim]Report saved: {path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── analyze ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("name", required=False)
|
||||
@click.option("--all", "analyze_all", is_flag=True, help="Analyze all unrated drafts")
|
||||
@click.option("--limit", "-n", default=50, help="Max drafts to analyze (with --all)")
|
||||
def analyze(name: str | None, analyze_all: bool, limit: int):
|
||||
"""Analyze and rate drafts using Claude."""
|
||||
from .analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
if analyze_all:
|
||||
count = analyzer.rate_all_unrated(limit=limit)
|
||||
console.print(f"Analyzed [bold green]{count}[/] drafts")
|
||||
elif name:
|
||||
rating = analyzer.rate_draft(name)
|
||||
if rating:
|
||||
console.print(f"\n[bold green]Rating for {name}:[/]")
|
||||
console.print(f" Score: {rating.composite_score:.1f}")
|
||||
console.print(f" Summary: {rating.summary}")
|
||||
console.print(f" Novelty={rating.novelty} Maturity={rating.maturity} "
|
||||
f"Overlap={rating.overlap} Momentum={rating.momentum} "
|
||||
f"Relevance={rating.relevance}")
|
||||
else:
|
||||
console.print("[red]Analysis failed[/]")
|
||||
else:
|
||||
console.print("Provide a draft name or use --all")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── compare ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("names", nargs=-1, required=True)
|
||||
def compare(names: tuple[str, ...]):
|
||||
"""Compare multiple drafts for overlap and unique contributions."""
|
||||
from .analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
result = analyzer.compare_drafts(list(names))
|
||||
console.print(result)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── embed ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
def embed():
|
||||
"""Generate embeddings for all drafts (requires Ollama)."""
|
||||
from .embeddings import Embedder
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
embedder = Embedder(cfg, db)
|
||||
|
||||
try:
|
||||
count = embedder.embed_all_missing()
|
||||
console.print(f"Embedded [bold green]{count}[/] drafts")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── similar ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("name")
|
||||
@click.option("--top", "-n", default=10, help="Number of similar drafts to show")
|
||||
def similar(name: str, top: int):
|
||||
"""Find drafts most similar to a given draft."""
|
||||
from .embeddings import Embedder
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
embedder = Embedder(cfg, db)
|
||||
|
||||
try:
|
||||
results = embedder.find_similar(name, top_n=top)
|
||||
if not results:
|
||||
console.print(f"[yellow]No similar drafts found (need embeddings — run `ietf embed` first)[/]")
|
||||
return
|
||||
|
||||
table = Table(title=f"Drafts similar to {name}")
|
||||
table.add_column("Similarity", justify="right", width=10)
|
||||
table.add_column("Draft", style="cyan")
|
||||
table.add_column("Title")
|
||||
|
||||
for sim_name, score in results:
|
||||
draft = db.get_draft(sim_name)
|
||||
title = draft.title[:60] if draft else ""
|
||||
table.add_row(f"{score:.3f}", sim_name, title)
|
||||
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── clusters ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--threshold", "-t", default=0.85, help="Similarity threshold for clustering")
|
||||
def clusters(threshold: float):
|
||||
"""Find clusters of highly similar (potentially overlapping) drafts."""
|
||||
from .embeddings import Embedder
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
embedder = Embedder(cfg, db)
|
||||
|
||||
try:
|
||||
cluster_list = embedder.find_clusters(threshold=threshold)
|
||||
if not cluster_list:
|
||||
console.print("No clusters found at this threshold.")
|
||||
return
|
||||
|
||||
console.print(f"\n[bold]Found {len(cluster_list)} clusters[/] (threshold={threshold})\n")
|
||||
for i, cluster in enumerate(cluster_list, 1):
|
||||
console.print(f"[bold cyan]Cluster {i}[/] ({len(cluster)} drafts):")
|
||||
for name in cluster:
|
||||
draft = db.get_draft(name)
|
||||
title = draft.title[:60] if draft else ""
|
||||
console.print(f" - {name} [dim]{title}[/]")
|
||||
console.print()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── report ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.group()
|
||||
def report():
|
||||
"""Generate markdown reports."""
|
||||
pass
|
||||
|
||||
|
||||
@report.command()
|
||||
def overview():
|
||||
"""Overview table of all rated drafts."""
|
||||
from .reports import Reporter
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
reporter = Reporter(cfg, db)
|
||||
try:
|
||||
path = reporter.overview()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@report.command()
|
||||
def landscape():
|
||||
"""Category-grouped landscape view."""
|
||||
from .reports import Reporter
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
reporter = Reporter(cfg, db)
|
||||
try:
|
||||
path = reporter.landscape()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@report.command()
|
||||
@click.option("--days", "-d", default=7, help="Look back N days")
|
||||
def digest(days: int):
|
||||
"""What's new digest."""
|
||||
from .reports import Reporter
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
reporter = Reporter(cfg, db)
|
||||
try:
|
||||
path = reporter.digest(since_days=days)
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── config ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command("config")
|
||||
@click.option("--set", "set_key", nargs=2, help="Set a config key (e.g. --set claude_model claude-opus-4-20250514)")
|
||||
def config_cmd(set_key: tuple[str, str] | None):
|
||||
"""Show or modify configuration."""
|
||||
from dataclasses import asdict
|
||||
cfg = _get_config()
|
||||
|
||||
if set_key:
|
||||
key, value = set_key
|
||||
if hasattr(cfg, key):
|
||||
# Coerce types
|
||||
current = getattr(cfg, key)
|
||||
if isinstance(current, float):
|
||||
value = float(value)
|
||||
elif isinstance(current, int):
|
||||
value = int(value)
|
||||
elif isinstance(current, list):
|
||||
import json
|
||||
value = json.loads(value)
|
||||
setattr(cfg, key, value)
|
||||
cfg.save()
|
||||
console.print(f"Set [bold]{key}[/] = {value}")
|
||||
else:
|
||||
console.print(f"[red]Unknown config key: {key}[/]")
|
||||
else:
|
||||
from dataclasses import asdict
|
||||
for key, val in asdict(cfg).items():
|
||||
console.print(f" [bold]{key}:[/] {val}")
|
||||
44
src/ietf_analyzer/config.py
Normal file
44
src/ietf_analyzer/config.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Configuration management."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from pathlib import Path
|
||||
|
||||
DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
||||
CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
|
||||
|
||||
DEFAULT_KEYWORDS = [
|
||||
"agent",
|
||||
"ai-agent",
|
||||
"llm",
|
||||
"autonomous",
|
||||
"machine-learning",
|
||||
"artificial-intelligence",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
data_dir: str = str(DEFAULT_DATA_DIR)
|
||||
db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
|
||||
ollama_url: str = "http://localhost:11434"
|
||||
ollama_embed_model: str = "nomic-embed-text"
|
||||
claude_model: str = "claude-sonnet-4-20250514"
|
||||
search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))
|
||||
# Only fetch drafts newer than this (ISO date string)
|
||||
fetch_since: str = "2024-01-01"
|
||||
# Polite delay between API requests (seconds)
|
||||
fetch_delay: float = 0.5
|
||||
|
||||
def save(self) -> None:
|
||||
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
|
||||
CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2))
|
||||
|
||||
@classmethod
|
||||
def load(cls) -> Config:
|
||||
if CONFIG_FILE.exists():
|
||||
data = json.loads(CONFIG_FILE.read_text())
|
||||
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
||||
return cls()
|
||||
375
src/ietf_analyzer/db.py
Normal file
375
src/ietf_analyzer/db.py
Normal file
@@ -0,0 +1,375 @@
|
||||
"""SQLite database layer with FTS5 full-text search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .config import Config
|
||||
from .models import Draft, Rating
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS drafts (
|
||||
name TEXT PRIMARY KEY,
|
||||
rev TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
abstract TEXT NOT NULL DEFAULT '',
|
||||
time TEXT,
|
||||
dt_id INTEGER,
|
||||
pages INTEGER,
|
||||
words INTEGER,
|
||||
"group" TEXT,
|
||||
group_uri TEXT,
|
||||
expires TEXT,
|
||||
ad TEXT,
|
||||
shepherd TEXT,
|
||||
states TEXT DEFAULT '[]', -- JSON array
|
||||
full_text TEXT,
|
||||
categories TEXT DEFAULT '[]', -- JSON array
|
||||
tags TEXT DEFAULT '[]', -- JSON array
|
||||
fetched_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS ratings (
|
||||
draft_name TEXT PRIMARY KEY REFERENCES drafts(name),
|
||||
novelty INTEGER NOT NULL,
|
||||
maturity INTEGER NOT NULL,
|
||||
overlap INTEGER NOT NULL,
|
||||
momentum INTEGER NOT NULL,
|
||||
relevance INTEGER NOT NULL,
|
||||
summary TEXT NOT NULL DEFAULT '',
|
||||
novelty_note TEXT DEFAULT '',
|
||||
maturity_note TEXT DEFAULT '',
|
||||
overlap_note TEXT DEFAULT '',
|
||||
momentum_note TEXT DEFAULT '',
|
||||
relevance_note TEXT DEFAULT '',
|
||||
categories TEXT DEFAULT '[]', -- JSON array
|
||||
rated_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS embeddings (
|
||||
draft_name TEXT PRIMARY KEY REFERENCES drafts(name),
|
||||
model TEXT NOT NULL,
|
||||
vector BLOB NOT NULL, -- numpy float32 array as bytes
|
||||
created_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS llm_cache (
|
||||
draft_name TEXT NOT NULL,
|
||||
prompt_hash TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
request_json TEXT NOT NULL, -- full prompt sent
|
||||
response_json TEXT NOT NULL, -- raw Claude response
|
||||
input_tokens INTEGER,
|
||||
output_tokens INTEGER,
|
||||
created_at TEXT,
|
||||
PRIMARY KEY (draft_name, prompt_hash)
|
||||
);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS drafts_fts USING fts5(
|
||||
name, title, abstract, full_text,
|
||||
content='drafts',
|
||||
content_rowid='rowid'
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS index in sync
|
||||
CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
|
||||
INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
|
||||
VALUES (new.rowid, new.name, new.title, new.abstract, new.full_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS drafts_ad AFTER DELETE ON drafts BEGIN
|
||||
INSERT INTO drafts_fts(drafts_fts, rowid, name, title, abstract, full_text)
|
||||
VALUES ('delete', old.rowid, old.name, old.title, old.abstract, old.full_text);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS drafts_au AFTER UPDATE ON drafts BEGIN
|
||||
INSERT INTO drafts_fts(drafts_fts, rowid, name, title, abstract, full_text)
|
||||
VALUES ('delete', old.rowid, old.name, old.title, old.abstract, old.full_text);
|
||||
INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
|
||||
VALUES (new.rowid, new.name, new.title, new.abstract, new.full_text);
|
||||
END;
|
||||
"""
|
||||
|
||||
|
||||
class Database:
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.db_path = self.config.db_path
|
||||
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self._conn: sqlite3.Connection | None = None
|
||||
|
||||
@property
|
||||
def conn(self) -> sqlite3.Connection:
|
||||
if self._conn is None:
|
||||
self._conn = sqlite3.connect(self.db_path)
|
||||
self._conn.row_factory = sqlite3.Row
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.execute("PRAGMA foreign_keys=ON")
|
||||
self._conn.executescript(SCHEMA)
|
||||
return self._conn
|
||||
|
||||
def close(self) -> None:
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
# --- Drafts ---
|
||||
|
||||
def upsert_draft(self, draft: Draft) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO drafts (name, rev, title, abstract, time, dt_id, pages, words,
|
||||
"group", group_uri, expires, ad, shepherd, states, full_text, categories, tags, fetched_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(name) DO UPDATE SET
|
||||
rev=excluded.rev, title=excluded.title, abstract=excluded.abstract,
|
||||
time=excluded.time, dt_id=excluded.dt_id, pages=excluded.pages,
|
||||
words=excluded.words, "group"=excluded."group", group_uri=excluded.group_uri,
|
||||
expires=excluded.expires, ad=excluded.ad, shepherd=excluded.shepherd,
|
||||
states=excluded.states,
|
||||
full_text=COALESCE(excluded.full_text, full_text),
|
||||
categories=excluded.categories, tags=excluded.tags,
|
||||
fetched_at=excluded.fetched_at
|
||||
""",
|
||||
(
|
||||
draft.name, draft.rev, draft.title, draft.abstract, draft.time,
|
||||
draft.dt_id, draft.pages, draft.words, draft.group, draft.group_uri,
|
||||
draft.expires, draft.ad, draft.shepherd,
|
||||
json.dumps(draft.states), draft.full_text,
|
||||
json.dumps(draft.categories), json.dumps(draft.tags),
|
||||
draft.fetched_at or datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_draft(self, name: str) -> Draft | None:
|
||||
row = self.conn.execute("SELECT * FROM drafts WHERE name = ?", (name,)).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return self._row_to_draft(row)
|
||||
|
||||
def list_drafts(
|
||||
self,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
order_by: str = "time DESC",
|
||||
) -> list[Draft]:
|
||||
# Sanitize order_by to prevent injection
|
||||
allowed = {"time", "name", "title", "pages", "words", "fetched_at"}
|
||||
parts = order_by.split()
|
||||
col = parts[0] if parts else "time"
|
||||
direction = parts[1].upper() if len(parts) > 1 else "DESC"
|
||||
if col not in allowed:
|
||||
col = "time"
|
||||
if direction not in ("ASC", "DESC"):
|
||||
direction = "DESC"
|
||||
safe_order = f'"{col}" {direction}' if col == "group" else f"{col} {direction}"
|
||||
|
||||
rows = self.conn.execute(
|
||||
f"SELECT * FROM drafts ORDER BY {safe_order} LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
).fetchall()
|
||||
return [self._row_to_draft(r) for r in rows]
|
||||
|
||||
def count_drafts(self) -> int:
|
||||
return self.conn.execute("SELECT COUNT(*) FROM drafts").fetchone()[0]
|
||||
|
||||
def search_drafts(self, query: str, limit: int = 50) -> list[Draft]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.* FROM drafts d
|
||||
JOIN drafts_fts f ON d.rowid = f.rowid
|
||||
WHERE drafts_fts MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT ?""",
|
||||
(query, limit),
|
||||
).fetchall()
|
||||
return [self._row_to_draft(r) for r in rows]
|
||||
|
||||
def drafts_without_text(self, limit: int = 100) -> list[Draft]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM drafts WHERE full_text IS NULL LIMIT ?", (limit,)
|
||||
).fetchall()
|
||||
return [self._row_to_draft(r) for r in rows]
|
||||
|
||||
# --- Ratings ---
|
||||
|
||||
def upsert_rating(self, rating: Rating) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO ratings (draft_name, novelty, maturity, overlap, momentum, relevance,
|
||||
summary, novelty_note, maturity_note, overlap_note, momentum_note, relevance_note,
|
||||
categories, rated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(draft_name) DO UPDATE SET
|
||||
novelty=excluded.novelty, maturity=excluded.maturity, overlap=excluded.overlap,
|
||||
momentum=excluded.momentum, relevance=excluded.relevance, summary=excluded.summary,
|
||||
novelty_note=excluded.novelty_note, maturity_note=excluded.maturity_note,
|
||||
overlap_note=excluded.overlap_note, momentum_note=excluded.momentum_note,
|
||||
relevance_note=excluded.relevance_note, categories=excluded.categories,
|
||||
rated_at=excluded.rated_at
|
||||
""",
|
||||
(
|
||||
rating.draft_name, rating.novelty, rating.maturity, rating.overlap,
|
||||
rating.momentum, rating.relevance, rating.summary,
|
||||
rating.novelty_note, rating.maturity_note, rating.overlap_note,
|
||||
rating.momentum_note, rating.relevance_note,
|
||||
json.dumps(rating.categories),
|
||||
rating.rated_at or datetime.now(timezone.utc).isoformat(),
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_rating(self, draft_name: str) -> Rating | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM ratings WHERE draft_name = ?", (draft_name,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return self._row_to_rating(row)
|
||||
|
||||
def unrated_drafts(self, limit: int = 100) -> list[Draft]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.* FROM drafts d
|
||||
LEFT JOIN ratings r ON d.name = r.draft_name
|
||||
WHERE r.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [self._row_to_draft(r) for r in rows]
|
||||
|
||||
def drafts_with_ratings(self, limit: int = 200) -> list[tuple[Draft, Rating]]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.*, r.novelty, r.maturity, r.overlap, r.momentum, r.relevance,
|
||||
r.summary, r.novelty_note, r.maturity_note, r.overlap_note,
|
||||
r.momentum_note, r.relevance_note, r.categories as r_categories, r.rated_at
|
||||
FROM drafts d
|
||||
JOIN ratings r ON d.name = r.draft_name
|
||||
ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20
|
||||
+ r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
results = []
|
||||
for r in rows:
|
||||
draft = self._row_to_draft(r)
|
||||
rating = Rating(
|
||||
draft_name=r["draft_name"] if "draft_name" in r.keys() else draft.name,
|
||||
novelty=r["novelty"], maturity=r["maturity"], overlap=r["overlap"],
|
||||
momentum=r["momentum"], relevance=r["relevance"], summary=r["summary"],
|
||||
novelty_note=r["novelty_note"], maturity_note=r["maturity_note"],
|
||||
overlap_note=r["overlap_note"], momentum_note=r["momentum_note"],
|
||||
relevance_note=r["relevance_note"],
|
||||
categories=json.loads(r["r_categories"]) if r["r_categories"] else [],
|
||||
rated_at=r["rated_at"],
|
||||
)
|
||||
results.append((draft, rating))
|
||||
return results
|
||||
|
||||
# --- Embeddings ---
|
||||
|
||||
def store_embedding(self, draft_name: str, model: str, vector: np.ndarray) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO embeddings (draft_name, model, vector, created_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(draft_name) DO UPDATE SET
|
||||
model=excluded.model, vector=excluded.vector, created_at=excluded.created_at
|
||||
""",
|
||||
(draft_name, model, vector.astype(np.float32).tobytes(),
|
||||
datetime.now(timezone.utc).isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_embedding(self, draft_name: str) -> np.ndarray | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT vector FROM embeddings WHERE draft_name = ?", (draft_name,)
|
||||
).fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
return np.frombuffer(row["vector"], dtype=np.float32)
|
||||
|
||||
def all_embeddings(self) -> dict[str, np.ndarray]:
|
||||
rows = self.conn.execute("SELECT draft_name, vector FROM embeddings").fetchall()
|
||||
return {
|
||||
r["draft_name"]: np.frombuffer(r["vector"], dtype=np.float32)
|
||||
for r in rows
|
||||
}
|
||||
|
||||
def drafts_without_embeddings(self, limit: int = 500) -> list[str]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.name FROM drafts d
|
||||
LEFT JOIN embeddings e ON d.name = e.draft_name
|
||||
WHERE e.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [r["name"] for r in rows]
|
||||
|
||||
# --- LLM Cache ---
|
||||
|
||||
def cache_response(
|
||||
self, draft_name: str, prompt_hash: str, model: str,
|
||||
request_json: str, response_json: str,
|
||||
input_tokens: int = 0, output_tokens: int = 0,
|
||||
) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO llm_cache (draft_name, prompt_hash, model, request_json,
|
||||
response_json, input_tokens, output_tokens, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(draft_name, prompt_hash) DO UPDATE SET
|
||||
model=excluded.model, response_json=excluded.response_json,
|
||||
input_tokens=excluded.input_tokens, output_tokens=excluded.output_tokens,
|
||||
created_at=excluded.created_at
|
||||
""",
|
||||
(draft_name, prompt_hash, model, request_json, response_json,
|
||||
input_tokens, output_tokens, datetime.now(timezone.utc).isoformat()),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_cached_response(self, draft_name: str, prompt_hash: str) -> str | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT response_json FROM llm_cache WHERE draft_name = ? AND prompt_hash = ?",
|
||||
(draft_name, prompt_hash),
|
||||
).fetchone()
|
||||
return row["response_json"] if row else None
|
||||
|
||||
def total_tokens_used(self) -> tuple[int, int]:
|
||||
row = self.conn.execute(
|
||||
"SELECT COALESCE(SUM(input_tokens),0), COALESCE(SUM(output_tokens),0) FROM llm_cache"
|
||||
).fetchone()
|
||||
return (row[0], row[1])
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
@staticmethod
|
||||
def _row_to_draft(row: sqlite3.Row) -> Draft:
|
||||
d = dict(row)
|
||||
return Draft(
|
||||
name=d["name"], rev=d["rev"], title=d["title"], abstract=d["abstract"],
|
||||
time=d["time"], dt_id=d.get("dt_id"), pages=d.get("pages"),
|
||||
words=d.get("words"), group=d.get("group"), group_uri=d.get("group_uri"),
|
||||
expires=d.get("expires"), ad=d.get("ad"), shepherd=d.get("shepherd"),
|
||||
states=json.loads(d.get("states") or "[]"),
|
||||
full_text=d.get("full_text"),
|
||||
categories=json.loads(d.get("categories") or "[]"),
|
||||
tags=json.loads(d.get("tags") or "[]"),
|
||||
fetched_at=d.get("fetched_at"),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _row_to_rating(row: sqlite3.Row) -> Rating:
|
||||
d = dict(row)
|
||||
return Rating(
|
||||
draft_name=d["draft_name"], novelty=d["novelty"], maturity=d["maturity"],
|
||||
overlap=d["overlap"], momentum=d["momentum"], relevance=d["relevance"],
|
||||
summary=d["summary"],
|
||||
novelty_note=d.get("novelty_note", ""),
|
||||
maturity_note=d.get("maturity_note", ""),
|
||||
overlap_note=d.get("overlap_note", ""),
|
||||
momentum_note=d.get("momentum_note", ""),
|
||||
relevance_note=d.get("relevance_note", ""),
|
||||
categories=json.loads(d.get("categories") or "[]"),
|
||||
rated_at=d.get("rated_at"),
|
||||
)
|
||||
136
src/ietf_analyzer/embeddings.py
Normal file
136
src/ietf_analyzer/embeddings.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Embedding generation via Ollama and similarity computation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import ollama as ollama_lib
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
||||
dot = np.dot(a, b)
|
||||
norm = np.linalg.norm(a) * np.linalg.norm(b)
|
||||
if norm == 0:
|
||||
return 0.0
|
||||
return float(dot / norm)
|
||||
|
||||
|
||||
class Embedder:
|
||||
def __init__(self, config: Config | None = None, db: Database | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.db = db or Database(self.config)
|
||||
self.client = ollama_lib.Client(host=self.config.ollama_url)
|
||||
|
||||
def embed_text(self, text: str) -> np.ndarray:
|
||||
"""Generate an embedding for a single text string."""
|
||||
# Truncate to ~8k tokens worth of text (roughly 32k chars)
|
||||
truncated = text[:32000]
|
||||
resp = self.client.embed(model=self.config.ollama_embed_model, input=truncated)
|
||||
return np.array(resp["embeddings"][0], dtype=np.float32)
|
||||
|
||||
def embed_draft(self, draft_name: str) -> np.ndarray | None:
|
||||
"""Generate and store an embedding for a draft using its abstract + title."""
|
||||
draft = self.db.get_draft(draft_name)
|
||||
if draft is None:
|
||||
console.print(f"[red]Draft not found: {draft_name}[/]")
|
||||
return None
|
||||
|
||||
# Combine title + abstract + beginning of full text for richer embedding
|
||||
parts = [draft.title, draft.abstract]
|
||||
if draft.full_text:
|
||||
# Include first ~4k chars of body
|
||||
parts.append(draft.full_text[:4000])
|
||||
text = "\n\n".join(p for p in parts if p)
|
||||
|
||||
vec = self.embed_text(text)
|
||||
self.db.store_embedding(draft_name, self.config.ollama_embed_model, vec)
|
||||
return vec
|
||||
|
||||
def embed_all_missing(self) -> int:
|
||||
"""Generate embeddings for all drafts that don't have one yet."""
|
||||
missing = self.db.drafts_without_embeddings()
|
||||
if not missing:
|
||||
console.print("All drafts already have embeddings.")
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Generating embeddings...", total=len(missing))
|
||||
for name in missing:
|
||||
try:
|
||||
self.embed_draft(name)
|
||||
count += 1
|
||||
except Exception as e:
|
||||
console.print(f"[red]Failed to embed {name}: {e}[/]")
|
||||
progress.advance(task)
|
||||
|
||||
console.print(f"Generated [bold green]{count}[/] embeddings")
|
||||
return count
|
||||
|
||||
def find_similar(self, draft_name: str, top_n: int = 10) -> list[tuple[str, float]]:
|
||||
"""Find the most similar drafts to a given draft."""
|
||||
target_vec = self.db.get_embedding(draft_name)
|
||||
if target_vec is None:
|
||||
# Try generating it on the fly
|
||||
target_vec = self.embed_draft(draft_name)
|
||||
if target_vec is None:
|
||||
return []
|
||||
|
||||
all_embeddings = self.db.all_embeddings()
|
||||
similarities: list[tuple[str, float]] = []
|
||||
for name, vec in all_embeddings.items():
|
||||
if name == draft_name:
|
||||
continue
|
||||
sim = _cosine_similarity(target_vec, vec)
|
||||
similarities.append((name, sim))
|
||||
|
||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
return similarities[:top_n]
|
||||
|
||||
def similarity_matrix(self) -> tuple[list[str], np.ndarray]:
|
||||
"""Compute pairwise similarity matrix for all embedded drafts."""
|
||||
all_embeddings = self.db.all_embeddings()
|
||||
names = sorted(all_embeddings.keys())
|
||||
n = len(names)
|
||||
matrix = np.zeros((n, n), dtype=np.float32)
|
||||
for i in range(n):
|
||||
for j in range(i, n):
|
||||
sim = _cosine_similarity(all_embeddings[names[i]], all_embeddings[names[j]])
|
||||
matrix[i, j] = sim
|
||||
matrix[j, i] = sim
|
||||
return names, matrix
|
||||
|
||||
def find_clusters(self, threshold: float = 0.85) -> list[list[str]]:
|
||||
"""Find clusters of highly similar drafts using simple greedy clustering."""
|
||||
names, matrix = self.similarity_matrix()
|
||||
if len(names) == 0:
|
||||
return []
|
||||
|
||||
visited = set()
|
||||
clusters: list[list[str]] = []
|
||||
|
||||
for i, name in enumerate(names):
|
||||
if name in visited:
|
||||
continue
|
||||
cluster = [name]
|
||||
visited.add(name)
|
||||
for j in range(len(names)):
|
||||
if names[j] not in visited and matrix[i, j] >= threshold:
|
||||
cluster.append(names[j])
|
||||
visited.add(names[j])
|
||||
if len(cluster) > 1:
|
||||
clusters.append(cluster)
|
||||
|
||||
return clusters
|
||||
204
src/ietf_analyzer/fetcher.py
Normal file
204
src/ietf_analyzer/fetcher.py
Normal file
@@ -0,0 +1,204 @@
|
||||
"""Datatracker API client — search, fetch metadata, download full text."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time as time_mod
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import httpx
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
|
||||
|
||||
from .config import Config
|
||||
from .models import Draft
|
||||
|
||||
API_BASE = "https://datatracker.ietf.org/api/v1"
|
||||
TEXT_BASE = "https://www.ietf.org/archive/id"
|
||||
SEARCH_FIELDS = ("name__contains", "abstract__contains")
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
class Fetcher:
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
self._group_cache: dict[str, str] = {}
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
|
||||
# --- Search & fetch metadata ---
|
||||
|
||||
def search_drafts(
|
||||
self,
|
||||
keywords: list[str] | None = None,
|
||||
since: str | None = None,
|
||||
limit_per_keyword: int = 200,
|
||||
) -> list[Draft]:
|
||||
"""Search for drafts matching keywords. Deduplicates by name."""
|
||||
keywords = keywords or self.config.search_keywords
|
||||
since = since or self.config.fetch_since
|
||||
seen: dict[str, Draft] = {}
|
||||
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
# Search both name and abstract for each keyword
|
||||
searches = []
|
||||
for kw in keywords:
|
||||
for field in SEARCH_FIELDS:
|
||||
searches.append((kw, field))
|
||||
|
||||
task = progress.add_task("Searching Datatracker...", total=len(searches))
|
||||
|
||||
for kw, search_field in searches:
|
||||
progress.update(task, description=f"Searching {search_field.split('__')[0]}: {kw}")
|
||||
drafts = self._paginated_search(search_field, kw, since, limit_per_keyword)
|
||||
for d in drafts:
|
||||
if d.name not in seen:
|
||||
seen[d.name] = d
|
||||
progress.advance(task)
|
||||
|
||||
console.print(f"Found [bold green]{len(seen)}[/] unique drafts")
|
||||
return list(seen.values())
|
||||
|
||||
def _paginated_search(
|
||||
self,
|
||||
search_field: str,
|
||||
keyword: str,
|
||||
since: str,
|
||||
max_results: int,
|
||||
) -> list[Draft]:
|
||||
results: list[Draft] = []
|
||||
offset = 0
|
||||
page_size = 100
|
||||
|
||||
while offset < max_results:
|
||||
params = {
|
||||
"format": "json",
|
||||
search_field: keyword,
|
||||
"time__gte": since,
|
||||
"type__slug": "draft",
|
||||
"limit": min(page_size, max_results - offset),
|
||||
"offset": offset,
|
||||
}
|
||||
try:
|
||||
resp = self.client.get(f"{API_BASE}/doc/document/", params=params)
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[red]API error: {e}[/]")
|
||||
break
|
||||
|
||||
data = resp.json()
|
||||
objects = data.get("objects", [])
|
||||
if not objects:
|
||||
break
|
||||
|
||||
for obj in objects:
|
||||
results.append(self._api_obj_to_draft(obj))
|
||||
|
||||
offset += len(objects)
|
||||
if not data.get("meta", {}).get("next"):
|
||||
break
|
||||
|
||||
time_mod.sleep(self.config.fetch_delay)
|
||||
|
||||
return results
|
||||
|
||||
def fetch_draft(self, name: str) -> Draft | None:
|
||||
"""Fetch a single draft by name."""
|
||||
try:
|
||||
resp = self.client.get(
|
||||
f"{API_BASE}/doc/document/{name}/", params={"format": "json"}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return self._api_obj_to_draft(resp.json())
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[red]Error fetching {name}: {e}[/]")
|
||||
return None
|
||||
|
||||
# --- Full text ---
|
||||
|
||||
def download_full_text(self, draft: Draft) -> str | None:
|
||||
"""Download the plain text of a draft."""
|
||||
url = draft.text_url
|
||||
try:
|
||||
resp = self.client.get(url)
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
except httpx.HTTPError:
|
||||
# Try without revision if it fails
|
||||
try:
|
||||
alt_url = f"{TEXT_BASE}/{draft.name}.txt"
|
||||
resp = self.client.get(alt_url)
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[dim]Could not download text for {draft.name}: {e}[/]")
|
||||
return None
|
||||
|
||||
def download_texts(self, drafts: list[Draft]) -> dict[str, str]:
|
||||
"""Download full text for multiple drafts. Returns {name: text}."""
|
||||
results: dict[str, str] = {}
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Downloading draft texts...", total=len(drafts))
|
||||
for draft in drafts:
|
||||
text = self.download_full_text(draft)
|
||||
if text:
|
||||
results[draft.name] = text
|
||||
progress.advance(task)
|
||||
time_mod.sleep(self.config.fetch_delay)
|
||||
console.print(f"Downloaded [bold green]{len(results)}[/] / {len(drafts)} texts")
|
||||
return results
|
||||
|
||||
# --- Group resolution ---
|
||||
|
||||
def resolve_group(self, group_uri: str) -> str:
|
||||
"""Resolve a group API URI to a group acronym/name."""
|
||||
if not group_uri:
|
||||
return ""
|
||||
if group_uri in self._group_cache:
|
||||
return self._group_cache[group_uri]
|
||||
try:
|
||||
resp = self.client.get(
|
||||
f"https://datatracker.ietf.org{group_uri}", params={"format": "json"}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
name = resp.json().get("acronym", resp.json().get("name", ""))
|
||||
self._group_cache[group_uri] = name
|
||||
time_mod.sleep(self.config.fetch_delay)
|
||||
return name
|
||||
except httpx.HTTPError:
|
||||
return ""
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
def _api_obj_to_draft(self, obj: dict) -> Draft:
|
||||
return Draft(
|
||||
name=obj.get("name", ""),
|
||||
rev=obj.get("rev", "00"),
|
||||
title=obj.get("title", ""),
|
||||
abstract=obj.get("abstract", "").strip(),
|
||||
time=obj.get("time", ""),
|
||||
dt_id=obj.get("id"),
|
||||
pages=obj.get("pages"),
|
||||
words=obj.get("words"),
|
||||
group=None, # Resolved lazily
|
||||
group_uri=obj.get("group", ""),
|
||||
expires=obj.get("expires"),
|
||||
ad=obj.get("ad"),
|
||||
shepherd=obj.get("shepherd"),
|
||||
states=[s for s in (obj.get("states") or []) if isinstance(s, str)],
|
||||
fetched_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
72
src/ietf_analyzer/models.py
Normal file
72
src/ietf_analyzer/models.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Data models for drafts, ratings, and categories."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class Draft:
|
||||
name: str # e.g. "draft-zheng-dispatch-agent-identity-management"
|
||||
rev: str # e.g. "00"
|
||||
title: str
|
||||
abstract: str
|
||||
time: str # ISO datetime from API
|
||||
dt_id: int | None = None # Datatracker document ID
|
||||
pages: int | None = None
|
||||
words: int | None = None
|
||||
group: str | None = None # Working group name (resolved)
|
||||
group_uri: str | None = None # Raw API URI
|
||||
expires: str | None = None
|
||||
ad: str | None = None # Area director URI
|
||||
shepherd: str | None = None
|
||||
states: list[str] = field(default_factory=list)
|
||||
full_text: str | None = None
|
||||
categories: list[str] = field(default_factory=list)
|
||||
tags: list[str] = field(default_factory=list)
|
||||
fetched_at: str | None = None
|
||||
|
||||
@property
|
||||
def text_url(self) -> str:
|
||||
return f"https://www.ietf.org/archive/id/{self.name}-{self.rev}.txt"
|
||||
|
||||
@property
|
||||
def datatracker_url(self) -> str:
|
||||
return f"https://datatracker.ietf.org/doc/{self.name}/"
|
||||
|
||||
@property
|
||||
def date(self) -> str:
|
||||
"""Return just the date portion of time."""
|
||||
if self.time:
|
||||
return self.time[:10]
|
||||
return ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class Rating:
|
||||
draft_name: str
|
||||
novelty: int # 1-5
|
||||
maturity: int # 1-5
|
||||
overlap: int # 1-5 (5 = highly overlapping with others)
|
||||
momentum: int # 1-5
|
||||
relevance: int # 1-5
|
||||
summary: str # 2-4 sentence AI summary
|
||||
novelty_note: str = ""
|
||||
maturity_note: str = ""
|
||||
overlap_note: str = ""
|
||||
momentum_note: str = ""
|
||||
relevance_note: str = ""
|
||||
categories: list[str] = field(default_factory=list)
|
||||
rated_at: str | None = None
|
||||
|
||||
@property
|
||||
def composite_score(self) -> float:
|
||||
"""Weighted composite: novelty and relevance matter most."""
|
||||
return (
|
||||
self.novelty * 0.30
|
||||
+ self.relevance * 0.25
|
||||
+ self.maturity * 0.20
|
||||
+ self.momentum * 0.15
|
||||
+ (6 - self.overlap) * 0.10 # Invert: less overlap = better
|
||||
)
|
||||
177
src/ietf_analyzer/reports.py
Normal file
177
src/ietf_analyzer/reports.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""Markdown report generation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
from .models import Draft, Rating
|
||||
|
||||
STAR = {1: "\u2581", 2: "\u2583", 3: "\u2585", 4: "\u2587", 5: "\u2588"}
|
||||
|
||||
|
||||
def _bar(score: int) -> str:
|
||||
return STAR.get(score, "?")
|
||||
|
||||
|
||||
def _score_str(rating: Rating) -> str:
|
||||
return f"{rating.composite_score:.1f}"
|
||||
|
||||
|
||||
class Reporter:
|
||||
def __init__(self, config: Config | None = None, db: Database | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.db = db or Database(self.config)
|
||||
self.output_dir = Path(self.config.data_dir) / "reports"
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def overview(self) -> str:
|
||||
"""Generate a sortable overview table of all rated drafts."""
|
||||
pairs = self.db.drafts_with_ratings()
|
||||
total = self.db.count_drafts()
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
|
||||
lines = [
|
||||
f"# IETF AI/Agent Draft Overview",
|
||||
f"*Generated {now} — {len(pairs)} rated / {total} tracked drafts*\n",
|
||||
"| Score | Draft | Date | N | M | O | Mom | R | Summary |",
|
||||
"|------:|-------|------|:-:|:-:|:-:|:---:|:-:|---------|",
|
||||
]
|
||||
|
||||
for draft, rating in pairs:
|
||||
name_link = f"[{draft.name}]({draft.datatracker_url})"
|
||||
lines.append(
|
||||
f"| {_score_str(rating)} | {name_link} | {draft.date} "
|
||||
f"| {_bar(rating.novelty)} | {_bar(rating.maturity)} "
|
||||
f"| {_bar(rating.overlap)} | {_bar(rating.momentum)} "
|
||||
f"| {_bar(rating.relevance)} | {rating.summary[:80]}... |"
|
||||
)
|
||||
|
||||
lines.append("\n*N=Novelty, M=Maturity, O=Overlap, Mom=Momentum, R=Relevance (block height = score 1-5)*")
|
||||
|
||||
report = "\n".join(lines)
|
||||
path = self.output_dir / "overview.md"
|
||||
path.write_text(report)
|
||||
return str(path)
|
||||
|
||||
def landscape(self) -> str:
|
||||
"""Generate a category-grouped landscape report."""
|
||||
pairs = self.db.drafts_with_ratings()
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
|
||||
# Group by category
|
||||
by_cat: dict[str, list[tuple[Draft, Rating]]] = {}
|
||||
for draft, rating in pairs:
|
||||
cats = rating.categories or ["Uncategorized"]
|
||||
for cat in cats:
|
||||
by_cat.setdefault(cat, []).append((draft, rating))
|
||||
|
||||
lines = [
|
||||
f"# IETF AI/Agent Draft Landscape",
|
||||
f"*Generated {now}*\n",
|
||||
]
|
||||
|
||||
for cat in sorted(by_cat.keys()):
|
||||
items = by_cat[cat]
|
||||
items.sort(key=lambda x: x[1].composite_score, reverse=True)
|
||||
lines.append(f"\n## {cat} ({len(items)} drafts)\n")
|
||||
for draft, rating in items:
|
||||
lines.append(
|
||||
f"- **[{draft.name}]({draft.datatracker_url})** "
|
||||
f"(score: {_score_str(rating)}) — {rating.summary[:100]}"
|
||||
)
|
||||
|
||||
report = "\n".join(lines)
|
||||
path = self.output_dir / "landscape.md"
|
||||
path.write_text(report)
|
||||
return str(path)
|
||||
|
||||
def draft_detail(self, draft_name: str) -> str:
|
||||
"""Generate a detailed report for a single draft."""
|
||||
draft = self.db.get_draft(draft_name)
|
||||
if draft is None:
|
||||
return ""
|
||||
|
||||
rating = self.db.get_rating(draft_name)
|
||||
|
||||
lines = [
|
||||
f"# {draft.title}",
|
||||
f"**{draft.name}** rev {draft.rev}\n",
|
||||
f"- **Date:** {draft.date}",
|
||||
f"- **Pages:** {draft.pages or '?'}",
|
||||
f"- **Group:** {draft.group or 'individual'}",
|
||||
f"- **Datatracker:** {draft.datatracker_url}",
|
||||
f"- **Text:** {draft.text_url}\n",
|
||||
f"## Abstract\n{draft.abstract}\n",
|
||||
]
|
||||
|
||||
if rating:
|
||||
lines.extend([
|
||||
f"## AI Assessment (score: {_score_str(rating)})\n",
|
||||
f"**Summary:** {rating.summary}\n",
|
||||
f"| Dimension | Score | Notes |",
|
||||
f"|-----------|:-----:|-------|",
|
||||
f"| Novelty | {rating.novelty}/5 | {rating.novelty_note} |",
|
||||
f"| Maturity | {rating.maturity}/5 | {rating.maturity_note} |",
|
||||
f"| Overlap | {rating.overlap}/5 | {rating.overlap_note} |",
|
||||
f"| Momentum | {rating.momentum}/5 | {rating.momentum_note} |",
|
||||
f"| Relevance | {rating.relevance}/5 | {rating.relevance_note} |",
|
||||
f"\n**Categories:** {', '.join(rating.categories) if rating.categories else 'none'}",
|
||||
])
|
||||
else:
|
||||
lines.append("*Not yet rated — run `ietf analyze` to generate a rating.*")
|
||||
|
||||
report = "\n".join(lines)
|
||||
path = self.output_dir / f"{draft_name}.md"
|
||||
path.write_text(report)
|
||||
return str(path)
|
||||
|
||||
def digest(self, since_days: int = 7) -> str:
|
||||
"""Generate a digest of recently fetched/updated drafts."""
|
||||
from datetime import timedelta
|
||||
cutoff = (datetime.now(timezone.utc) - timedelta(days=since_days)).isoformat()
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
|
||||
# Get recent drafts by fetched_at
|
||||
all_drafts = self.db.list_drafts(limit=500, order_by="fetched_at DESC")
|
||||
recent = [d for d in all_drafts if d.fetched_at and d.fetched_at >= cutoff]
|
||||
|
||||
lines = [
|
||||
f"# Weekly Digest — IETF AI/Agent Drafts",
|
||||
f"*Generated {now} — showing drafts fetched in last {since_days} days*\n",
|
||||
f"**{len(recent)} drafts** in this period\n",
|
||||
]
|
||||
|
||||
if not recent:
|
||||
lines.append("No new drafts found. Run `ietf fetch` to update.")
|
||||
else:
|
||||
# Split into rated and unrated
|
||||
rated = []
|
||||
unrated = []
|
||||
for d in recent:
|
||||
r = self.db.get_rating(d.name)
|
||||
if r:
|
||||
rated.append((d, r))
|
||||
else:
|
||||
unrated.append(d)
|
||||
|
||||
if rated:
|
||||
rated.sort(key=lambda x: x[1].composite_score, reverse=True)
|
||||
lines.append("## Top Rated New Drafts\n")
|
||||
for draft, rating in rated[:10]:
|
||||
lines.append(
|
||||
f"1. **[{draft.name}]({draft.datatracker_url})** "
|
||||
f"(score: {_score_str(rating)}) — {rating.summary[:120]}"
|
||||
)
|
||||
|
||||
if unrated:
|
||||
lines.append(f"\n## Awaiting Analysis ({len(unrated)} drafts)\n")
|
||||
for d in unrated[:20]:
|
||||
lines.append(f"- [{d.name}]({d.datatracker_url}) — {d.title}")
|
||||
|
||||
report = "\n".join(lines)
|
||||
path = self.output_dir / "digest.md"
|
||||
path.write_text(report)
|
||||
return str(path)
|
||||
Reference in New Issue
Block a user