#!/usr/bin/env python3 """Backfill working group names by resolving group_uri from Datatracker API.""" import sqlite3 import time import httpx DB_PATH = "data/drafts.db" conn = sqlite3.connect(DB_PATH) conn.row_factory = sqlite3.Row # Get distinct group_uris that don't have a group name yet rows = conn.execute(""" SELECT DISTINCT group_uri FROM drafts WHERE group_uri IS NOT NULL AND group_uri != '' AND ("group" IS NULL OR "group" = '') """).fetchall() uris = [r["group_uri"] for r in rows] print(f"Resolving {len(uris)} unique group URIs...") client = httpx.Client(timeout=30, follow_redirects=True) resolved = {} for uri in uris: try: resp = client.get(f"https://datatracker.ietf.org{uri}", params={"format": "json"}) resp.raise_for_status() data = resp.json() acronym = data.get("acronym", "") name = data.get("name", "") resolved[uri] = acronym or name or "" print(f" {uri} -> {resolved[uri]} ({name})") time.sleep(0.3) except Exception as e: print(f" {uri} -> ERROR: {e}") resolved[uri] = "" client.close() # Update the database for uri, group_name in resolved.items(): if group_name: conn.execute( 'UPDATE drafts SET "group" = ? WHERE group_uri = ?', (group_name, uri), ) conn.commit() # Show summary rows = conn.execute(""" SELECT "group", COUNT(*) as cnt FROM drafts WHERE "group" IS NOT NULL AND "group" != '' GROUP BY "group" ORDER BY cnt DESC """).fetchall() print(f"\nWorking groups resolved ({len(rows)} groups):") for r in rows: print(f" {r[0]:30s} {r[1]} drafts") total = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NOT NULL AND "group" != ""').fetchone()[0] none_count = conn.execute('SELECT COUNT(*) FROM drafts WHERE "group" IS NULL OR "group" = ""').fetchone()[0] print(f"\nTotal with WG: {total}, individual/unresolved: {none_count}") conn.close()