v0.2.0: visualizations, interactive browser, arXiv paper, gap analysis

New features: - 12 interactive visualizations (ietf viz): t-SNE landscape, similarity heatmap, score distributions, timeline, bubble explorer, radar charts, author network graph, category treemap, quality vs overlap, org bar chart, ideas chart, and interactive draft browser - Interactive draft browser (browser.html): filterable by category, keyword, score sliders with sortable table and expandable detail rows - arXiv paper (paper/main.tex): 13-page manuscript with all findings - Gap analysis: 12 identified under-addressed areas - Author network: collaboration graph, org contributions, cross-org analysis - Draft generation from gaps (ietf draft-gen) - Auto-load .env for API keys (python-dotenv) New modules: visualize.py, authors.py, draftgen.py New reports: timeline, overlap-matrix, authors, gaps New deps: plotly, matplotlib, seaborn, scipy, scikit-learn, networkx, python-dotenv Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 13:37:55 +01:00
parent f44f9265bd
commit be9cf9c5d9
32 changed files with 4447 additions and 4 deletions
--- a/src/ietf_analyzer/authors.py
+++ b/src/ietf_analyzer/authors.py
@@ -0,0 +1,137 @@
+"""Author network — fetch authors from Datatracker, build collaboration graph."""
+
+from __future__ import annotations
+
+import time as time_mod
+from datetime import datetime, timezone
+
+import httpx
+from rich.console import Console
+from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+from .config import Config
+from .db import Database
+from .models import Author
+
+API_BASE = "https://datatracker.ietf.org/api/v1"
+
+console = Console()
+
+
+class AuthorNetwork:
+    def __init__(self, config: Config | None = None, db: Database | None = None):
+        self.config = config or Config.load()
+        self.db = db or Database(self.config)
+        self.client = httpx.Client(timeout=30, follow_redirects=True)
+        self._person_cache: dict[int, Author] = {}
+
+    def close(self) -> None:
+        self.client.close()
+
+    def _extract_person_id(self, person_uri: str) -> int | None:
+        """Extract person_id from a URI like /api/v1/person/person/12345/."""
+        if not person_uri:
+            return None
+        parts = person_uri.strip("/").split("/")
+        try:
+            return int(parts[-1])
+        except (ValueError, IndexError):
+            return None
+
+    def fetch_person(self, person_id: int) -> Author | None:
+        """Fetch a person's details from Datatracker."""
+        if person_id in self._person_cache:
+            return self._person_cache[person_id]
+
+        try:
+            resp = self.client.get(
+                f"{API_BASE}/person/person/{person_id}/",
+                params={"format": "json"},
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            author = Author(
+                person_id=person_id,
+                name=data.get("name", ""),
+                ascii_name=data.get("ascii", ""),
+                affiliation="",  # Will be set from documentauthor
+                resource_uri=data.get("resource_uri", ""),
+                fetched_at=datetime.now(timezone.utc).isoformat(),
+            )
+            self._person_cache[person_id] = author
+            time_mod.sleep(self.config.fetch_delay)
+            return author
+        except httpx.HTTPError as e:
+            console.print(f"[dim]Could not fetch person {person_id}: {e}[/]")
+            return None
+
+    def fetch_authors_for_draft(self, draft_name: str) -> list[tuple[Author, int, str]]:
+        """Fetch authors for a single draft. Returns [(Author, order, affiliation)]."""
+        try:
+            resp = self.client.get(
+                f"{API_BASE}/doc/documentauthor/",
+                params={"document__name": draft_name, "format": "json", "limit": 50},
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        except httpx.HTTPError as e:
+            console.print(f"[dim]Could not fetch authors for {draft_name}: {e}[/]")
+            return []
+
+        results: list[tuple[Author, int, str]] = []
+        for obj in data.get("objects", []):
+            person_uri = obj.get("person", "")
+            person_id = self._extract_person_id(person_uri)
+            if person_id is None:
+                continue
+
+            affiliation = obj.get("affiliation", "")
+            order = obj.get("order", 1)
+
+            author = self.fetch_person(person_id)
+            if author is None:
+                continue
+
+            # Use the affiliation from the document author record
+            author_with_aff = Author(
+                person_id=author.person_id,
+                name=author.name,
+                ascii_name=author.ascii_name,
+                affiliation=affiliation or author.affiliation,
+                resource_uri=author.resource_uri,
+                fetched_at=author.fetched_at,
+            )
+            results.append((author_with_aff, order, affiliation))
+
+        time_mod.sleep(self.config.fetch_delay)
+        return results
+
+    def fetch_all_authors(self, limit: int = 500) -> int:
+        """Fetch authors for all drafts missing author data."""
+        missing = self.db.drafts_without_authors(limit=limit)
+        if not missing:
+            console.print("All drafts already have author data.")
+            return 0
+
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Fetching authors...", total=len(missing))
+            for draft_name in missing:
+                progress.update(task, description=f"Authors: {draft_name.split('-')[-1][:15]}")
+                authors = self.fetch_authors_for_draft(draft_name)
+                for author, order, affiliation in authors:
+                    self.db.upsert_author(author)
+                    self.db.upsert_draft_author(draft_name, author.person_id, order, affiliation)
+                if authors:
+                    count += 1
+                progress.advance(task)
+
+        console.print(f"Fetched authors for [bold green]{count}[/] drafts "
+                      f"({self.db.author_count()} unique authors)")
+        return count