Run pipeline, write Post 08, commit untracked files
Pipeline: - Extract ideas for 38 new drafts → 462 ideas total - Convergence analysis: 132 cross-org convergent ideas (33% rate) - Fetch authors for 102 drafts → 709 authors (up from 403) - Refresh gap analysis: 12 gaps across full 474-draft corpus - Update verified counts with new totals Post 08: - Complete rewrite of "Agents Building the Agent Analysis" (2,953 words) - Covers 3 phases: writing team → review cycle → fix cycle - Meta-irony table mapping team coordination to IETF gap names - Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch) Untracked files committed: - scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui - src/ietf_analyzer/classifier.py: two-stage Ollama classifier - src/webui/: analytics (GDPR-compliant), auth, obsidian_export - tests/test_obsidian_export.py (10 tests) - data/reports/: wg-analysis, generated draft for gap #37 Housekeeping: - .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
66
scripts/backfill-wg-names.py
Normal file
66
scripts/backfill-wg-names.py
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill working group names by resolving group_uri from Datatracker API."""
|
||||
|
||||
import sqlite3
|
||||
import time
|
||||
import httpx
|
||||
|
||||
DB_PATH = "data/drafts.db"
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Get distinct group_uris that don't have a group name yet
|
||||
rows = conn.execute("""
|
||||
SELECT DISTINCT group_uri FROM drafts
|
||||
WHERE group_uri IS NOT NULL AND group_uri != ''
|
||||
AND ("group" IS NULL OR "group" = '')
|
||||
""").fetchall()
|
||||
|
||||
uris = [r["group_uri"] for r in rows]
|
||||
print(f"Resolving {len(uris)} unique group URIs...")
|
||||
|
||||
client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
resolved = {}
|
||||
|
||||
for uri in uris:
|
||||
try:
|
||||
resp = client.get(f"https://datatracker.ietf.org{uri}", params={"format": "json"})
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
acronym = data.get("acronym", "")
|
||||
name = data.get("name", "")
|
||||
resolved[uri] = acronym or name or ""
|
||||
print(f" {uri} -> {resolved[uri]} ({name})")
|
||||
time.sleep(0.3)
|
||||
except Exception as e:
|
||||
print(f" {uri} -> ERROR: {e}")
|
||||
resolved[uri] = ""
|
||||
|
||||
client.close()
|
||||
|
||||
# Update the database
|
||||
for uri, group_name in resolved.items():
|
||||
if group_name:
|
||||
conn.execute(
|
||||
'UPDATE drafts SET "group" = ? WHERE group_uri = ?',
|
||||
(group_name, uri),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
# Show summary
|
||||
rows = conn.execute("""
|
||||
SELECT "group", COUNT(*) as cnt FROM drafts
|
||||
WHERE "group" IS NOT NULL AND "group" != ''
|
||||
GROUP BY "group" ORDER BY cnt DESC
|
||||
""").fetchall()
|
||||
|
||||
print(f"\nWorking groups resolved ({len(rows)} groups):")
|
||||
for r in rows:
|
||||
print(f" {r[0]:30s} {r[1]} drafts")
|
||||
|
||||
total = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NOT NULL AND "group" != ""').fetchone()[0]
|
||||
none_count = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NULL OR "group" = ""').fetchone()[0]
|
||||
print(f"\nTotal with WG: {total}, individual/unresolved: {none_count}")
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user