Run pipeline, write Post 08, commit untracked files

Pipeline: - Extract ideas for 38 new drafts → 462 ideas total - Convergence analysis: 132 cross-org convergent ideas (33% rate) - Fetch authors for 102 drafts → 709 authors (up from 403) - Refresh gap analysis: 12 gaps across full 474-draft corpus - Update verified counts with new totals Post 08: - Complete rewrite of "Agents Building the Agent Analysis" (2,953 words) - Covers 3 phases: writing team → review cycle → fix cycle - Meta-irony table mapping team coordination to IETF gap names - Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch) Untracked files committed: - scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui - src/ietf_analyzer/classifier.py: two-stage Ollama classifier - src/webui/: analytics (GDPR-compliant), auth, obsidian_export - tests/test_obsidian_export.py (10 tests) - data/reports/: wg-analysis, generated draft for gap #37 Housekeeping: - .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 15:31:30 +01:00
parent 20c45a7eba
commit e247bfef8f
19 changed files with 2758 additions and 586 deletions
--- a/scripts/backfill-wg-names.py
+++ b/scripts/backfill-wg-names.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+"""Backfill working group names by resolving group_uri from Datatracker API."""
+
+import sqlite3
+import time
+import httpx
+
+DB_PATH = "data/drafts.db"
+
+conn = sqlite3.connect(DB_PATH)
+conn.row_factory = sqlite3.Row
+
+# Get distinct group_uris that don't have a group name yet
+rows = conn.execute("""
+    SELECT DISTINCT group_uri FROM drafts
+    WHERE group_uri IS NOT NULL AND group_uri != ''
+      AND ("group" IS NULL OR "group" = '')
+""").fetchall()
+
+uris = [r["group_uri"] for r in rows]
+print(f"Resolving {len(uris)} unique group URIs...")
+
+client = httpx.Client(timeout=30, follow_redirects=True)
+resolved = {}
+
+for uri in uris:
+    try:
+        resp = client.get(f"https://datatracker.ietf.org{uri}", params={"format": "json"})
+        resp.raise_for_status()
+        data = resp.json()
+        acronym = data.get("acronym", "")
+        name = data.get("name", "")
+        resolved[uri] = acronym or name or ""
+        print(f"  {uri} -> {resolved[uri]} ({name})")
+        time.sleep(0.3)
+    except Exception as e:
+        print(f"  {uri} -> ERROR: {e}")
+        resolved[uri] = ""
+
+client.close()
+
+# Update the database
+for uri, group_name in resolved.items():
+    if group_name:
+        conn.execute(
+            'UPDATE drafts SET "group" = ? WHERE group_uri = ?',
+            (group_name, uri),
+        )
+
+conn.commit()
+
+# Show summary
+rows = conn.execute("""
+    SELECT "group", COUNT(*) as cnt FROM drafts
+    WHERE "group" IS NOT NULL AND "group" != ''
+    GROUP BY "group" ORDER BY cnt DESC
+""").fetchall()
+
+print(f"\nWorking groups resolved ({len(rows)} groups):")
+for r in rows:
+    print(f"  {r[0]:30s} {r[1]} drafts")
+
+total = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NOT NULL AND "group" != ""').fetchone()[0]
+none_count = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NULL OR "group" = ""').fetchone()[0]
+print(f"\nTotal with WG: {total}, individual/unresolved: {none_count}")
+conn.close()
--- a/scripts/classify-unrated.py
+++ b/scripts/classify-unrated.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+"""Classify unrated drafts using Ollama two-stage filter."""
+
+import sqlite3
+import sys
+sys.path.insert(0, "src")
+
+from ietf_analyzer.classifier import Classifier
+from ietf_analyzer.config import Config
+
+cfg = Config.load()
+conn = sqlite3.connect(cfg.db_path)
+conn.row_factory = sqlite3.Row
+
+# Get unrated drafts
+rows = conn.execute("""
+    SELECT name, title, abstract, source FROM drafts
+    WHERE name NOT IN (SELECT draft_name FROM ratings)
+    ORDER BY source, name
+""").fetchall()
+
+drafts = [dict(r) for r in rows]
+print(f"Classifying {len(drafts)} unrated drafts...\n")
+
+with Classifier(cfg) as clf:
+    relevant, irrelevant = clf.classify_batch(drafts, verbose=True)
+
+print(f"\n--- RELEVANT ({len(relevant)}) ---")
+for d in relevant:
+    print(f"  [{d['source']}] {d['name']}")
+    print(f"    {d['title'][:100]}")
+
+print(f"\n--- IRRELEVANT ({len(irrelevant)}) ---")
+for d in irrelevant:
+    print(f"  [{d['source']}] {d['name']}")
+    print(f"    {d['title'][:100]}")
+
+print(f"\nSummary: {len(relevant)} relevant, {len(irrelevant)} irrelevant out of {len(drafts)}")
+conn.close()
--- a/scripts/compare-classifiers.py
+++ b/scripts/compare-classifiers.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""Compare Ollama classifier vs Claude ratings to find disagreements."""
+
+import sqlite3
+import sys
+sys.path.insert(0, "src")
+
+from ietf_analyzer.classifier import Classifier
+from ietf_analyzer.config import Config
+
+cfg = Config.load()
+conn = sqlite3.connect(cfg.db_path)
+conn.row_factory = sqlite3.Row
+
+# Get all rated drafts with their Claude ratings
+rows = conn.execute("""
+    SELECT d.name, d.title, d.abstract, r.relevance, r.false_positive,
+           r.novelty, r.maturity, r.overlap, r.momentum,
+           (r.novelty + r.maturity + (5 - r.overlap) + r.momentum + r.relevance) / 5.0 as composite
+    FROM drafts d JOIN ratings r ON d.name = r.draft_name
+    WHERE d.abstract IS NOT NULL AND d.abstract != ''
+    ORDER BY d.name
+""").fetchall()
+
+print(f"Comparing Ollama classifier vs Claude ratings on {len(rows)} drafts...\n")
+
+with Classifier(cfg) as clf:
+    agree = 0
+    disagree_ollama_yes_claude_no = []  # Ollama says relevant, Claude says FP
+    disagree_ollama_no_claude_yes = []  # Ollama says irrelevant, Claude says relevant
+
+    for i, r in enumerate(rows):
+        is_rel, sim, method = clf.classify(r["title"], r["abstract"])
+
+        # Claude's view: false_positive=1 OR relevance<=2 means "not really relevant"
+        claude_relevant = not r["false_positive"] and r["relevance"] >= 3
+
+        if is_rel == claude_relevant:
+            agree += 1
+        elif is_rel and not claude_relevant:
+            disagree_ollama_yes_claude_no.append({
+                "name": r["name"], "title": r["title"][:60],
+                "sim": sim, "method": method,
+                "relevance": r["relevance"], "fp": r["false_positive"],
+                "composite": r["composite"],
+            })
+        else:
+            disagree_ollama_no_claude_yes.append({
+                "name": r["name"], "title": r["title"][:60],
+                "sim": sim, "method": method,
+                "relevance": r["relevance"], "fp": r["false_positive"],
+                "composite": r["composite"],
+            })
+
+        if (i + 1) % 50 == 0:
+            print(f"  Processed {i+1}/{len(rows)}...")
+
+print(f"\n{'='*70}")
+print(f"AGREEMENT: {agree}/{len(rows)} ({100*agree/len(rows):.1f}%)")
+print(f"{'='*70}")
+
+print(f"\nOllama=RELEVANT but Claude=NOT relevant ({len(disagree_ollama_yes_claude_no)}):")
+print(f"  (These are cases where Ollama wastes Claude tokens on irrelevant drafts)")
+for d in sorted(disagree_ollama_yes_claude_no, key=lambda x: x["sim"], reverse=True)[:15]:
+    fp_label = " [FP]" if d["fp"] else ""
+    print(f"  sim={d['sim']:.3f} ({d['method']:18s}) rel={d['relevance']}{fp_label} | {d['name']}")
+    print(f"    {d['title']}")
+
+print(f"\nOllama=IRRELEVANT but Claude=RELEVANT ({len(disagree_ollama_no_claude_yes)}):")
+print(f"  (These are cases where Ollama would have incorrectly filtered out good drafts)")
+for d in sorted(disagree_ollama_no_claude_yes, key=lambda x: x["relevance"], reverse=True)[:15]:
+    print(f"  sim={d['sim']:.3f} ({d['method']:18s}) rel={d['relevance']} comp={d['composite']:.1f} | {d['name']}")
+    print(f"    {d['title']}")
+
+# Summary stats
+total_fp_by_claude = sum(1 for r in rows if r["false_positive"] or r["relevance"] <= 2)
+total_relevant_by_claude = len(rows) - total_fp_by_claude
+print(f"\n{'='*70}")
+print(f"Claude thinks: {total_relevant_by_claude} relevant, {total_fp_by_claude} not relevant")
+print(f"Ollama would let through: {agree + len(disagree_ollama_yes_claude_no) - len(disagree_ollama_no_claude_yes)} (saves {len(disagree_ollama_no_claude_yes) - len(disagree_ollama_yes_claude_no)} Claude calls)")
+print(f"\nToken savings if Ollama pre-filters:")
+print(f"  Correctly rejected: {agree - total_relevant_by_claude + len(rows) - agree - len(disagree_ollama_yes_claude_no)} drafts")
+print(f"  Incorrectly rejected (missed): {len(disagree_ollama_no_claude_yes)} drafts")
+print(f"  Incorrectly passed (wasted): {len(disagree_ollama_yes_claude_no)} drafts")
+
+conn.close()
--- a/scripts/download-relevant-text.py
+++ b/scripts/download-relevant-text.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+"""Download full text for the 9 classifier-relevant unrated drafts."""
+
+import sqlite3
+import time
+import sys
+sys.path.insert(0, "src")
+
+import httpx
+from ietf_analyzer.config import Config
+
+cfg = Config.load()
+conn = sqlite3.connect(cfg.db_path)
+conn.row_factory = sqlite3.Row
+
+# The 9 relevant drafts from classifier
+relevant_names = [
+    "draft-bondar-wca",
+    "draft-latour-pre-registration",
+    "draft-li-trustworthy-routing-discovery",
+    "draft-scrm-aiproto-usecases",
+    "draft-song-dmsc-problem-statement",
+    "draft-wiethuechter-drip-det-moc",
+    "draft-wiethuechter-drip-det-tada",
+    "draft-zzn-dvs",
+    "w3c-cuap",
+]
+
+client = httpx.Client(timeout=30, follow_redirects=True)
+
+for name in relevant_names:
+    row = conn.execute("SELECT name, rev, source, source_url, full_text FROM drafts WHERE name=?", (name,)).fetchone()
+    if not row:
+        print(f"  SKIP {name}: not in DB")
+        continue
+    if row["full_text"]:
+        print(f"  SKIP {name}: already has text")
+        continue
+
+    if row["source"] == "w3c":
+        url = row["source_url"] or ""
+        if not url:
+            print(f"  SKIP {name}: no source_url for W3C doc")
+            continue
+    else:
+        rev = row["rev"] or "00"
+        url = f"https://www.ietf.org/archive/id/{name}-{rev}.txt"
+
+    print(f"  Fetching {name} from {url}...")
+    try:
+        resp = client.get(url)
+        if resp.status_code == 200:
+            text = resp.text[:500000]  # cap at 500K
+            conn.execute("UPDATE drafts SET full_text=? WHERE name=?", (text, name))
+            conn.commit()
+            print(f"    OK ({len(text)} chars)")
+        else:
+            print(f"    FAIL: HTTP {resp.status_code}")
+    except Exception as e:
+        print(f"    ERROR: {e}")
+    time.sleep(0.5)
+
+client.close()
+conn.close()
+print("\nDone.")
--- a/scripts/run-webui.sh
+++ b/scripts/run-webui.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+# Start the IETF Draft Analyzer Web Dashboard
+#
+# Usage:
+#   ./scripts/run-webui.sh          # Production (admin disabled)
+#   ./scripts/run-webui.sh --dev    # Development (admin enabled)
+cd "$(dirname "$0")/.."
+python src/webui/app.py "$@"