Files
ietf-draft-analyzer/src/ietf_analyzer/authors.py
Chris Nennemann be9cf9c5d9 v0.2.0: visualizations, interactive browser, arXiv paper, gap analysis
New features:
- 12 interactive visualizations (ietf viz): t-SNE landscape, similarity
  heatmap, score distributions, timeline, bubble explorer, radar charts,
  author network graph, category treemap, quality vs overlap, org bar chart,
  ideas chart, and interactive draft browser
- Interactive draft browser (browser.html): filterable by category, keyword,
  score sliders with sortable table and expandable detail rows
- arXiv paper (paper/main.tex): 13-page manuscript with all findings
- Gap analysis: 12 identified under-addressed areas
- Author network: collaboration graph, org contributions, cross-org analysis
- Draft generation from gaps (ietf draft-gen)
- Auto-load .env for API keys (python-dotenv)

New modules: visualize.py, authors.py, draftgen.py
New reports: timeline, overlap-matrix, authors, gaps
New deps: plotly, matplotlib, seaborn, scipy, scikit-learn, networkx, python-dotenv

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 13:37:55 +01:00

138 lines
5.1 KiB
Python

"""Author network — fetch authors from Datatracker, build collaboration graph."""
from __future__ import annotations
import time as time_mod
from datetime import datetime, timezone
import httpx
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
from .config import Config
from .db import Database
from .models import Author
API_BASE = "https://datatracker.ietf.org/api/v1"
console = Console()
class AuthorNetwork:
def __init__(self, config: Config | None = None, db: Database | None = None):
self.config = config or Config.load()
self.db = db or Database(self.config)
self.client = httpx.Client(timeout=30, follow_redirects=True)
self._person_cache: dict[int, Author] = {}
def close(self) -> None:
self.client.close()
def _extract_person_id(self, person_uri: str) -> int | None:
"""Extract person_id from a URI like /api/v1/person/person/12345/."""
if not person_uri:
return None
parts = person_uri.strip("/").split("/")
try:
return int(parts[-1])
except (ValueError, IndexError):
return None
def fetch_person(self, person_id: int) -> Author | None:
"""Fetch a person's details from Datatracker."""
if person_id in self._person_cache:
return self._person_cache[person_id]
try:
resp = self.client.get(
f"{API_BASE}/person/person/{person_id}/",
params={"format": "json"},
)
resp.raise_for_status()
data = resp.json()
author = Author(
person_id=person_id,
name=data.get("name", ""),
ascii_name=data.get("ascii", ""),
affiliation="", # Will be set from documentauthor
resource_uri=data.get("resource_uri", ""),
fetched_at=datetime.now(timezone.utc).isoformat(),
)
self._person_cache[person_id] = author
time_mod.sleep(self.config.fetch_delay)
return author
except httpx.HTTPError as e:
console.print(f"[dim]Could not fetch person {person_id}: {e}[/]")
return None
def fetch_authors_for_draft(self, draft_name: str) -> list[tuple[Author, int, str]]:
"""Fetch authors for a single draft. Returns [(Author, order, affiliation)]."""
try:
resp = self.client.get(
f"{API_BASE}/doc/documentauthor/",
params={"document__name": draft_name, "format": "json", "limit": 50},
)
resp.raise_for_status()
data = resp.json()
except httpx.HTTPError as e:
console.print(f"[dim]Could not fetch authors for {draft_name}: {e}[/]")
return []
results: list[tuple[Author, int, str]] = []
for obj in data.get("objects", []):
person_uri = obj.get("person", "")
person_id = self._extract_person_id(person_uri)
if person_id is None:
continue
affiliation = obj.get("affiliation", "")
order = obj.get("order", 1)
author = self.fetch_person(person_id)
if author is None:
continue
# Use the affiliation from the document author record
author_with_aff = Author(
person_id=author.person_id,
name=author.name,
ascii_name=author.ascii_name,
affiliation=affiliation or author.affiliation,
resource_uri=author.resource_uri,
fetched_at=author.fetched_at,
)
results.append((author_with_aff, order, affiliation))
time_mod.sleep(self.config.fetch_delay)
return results
def fetch_all_authors(self, limit: int = 500) -> int:
"""Fetch authors for all drafts missing author data."""
missing = self.db.drafts_without_authors(limit=limit)
if not missing:
console.print("All drafts already have author data.")
return 0
count = 0
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
MofNCompleteColumn(),
console=console,
) as progress:
task = progress.add_task("Fetching authors...", total=len(missing))
for draft_name in missing:
progress.update(task, description=f"Authors: {draft_name.split('-')[-1][:15]}")
authors = self.fetch_authors_for_draft(draft_name)
for author, order, affiliation in authors:
self.db.upsert_author(author)
self.db.upsert_draft_author(draft_name, author.person_id, order, affiliation)
if authors:
count += 1
progress.advance(task)
console.print(f"Fetched authors for [bold green]{count}[/] drafts "
f"({self.db.author_count()} unique authors)")
return count