"""Author network — fetch authors from Datatracker, build collaboration graph.""" from __future__ import annotations import time as time_mod from datetime import datetime, timezone import httpx from rich.console import Console from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn from .config import Config from .db import Database from .models import Author API_BASE = "https://datatracker.ietf.org/api/v1" console = Console() class AuthorNetwork: def __init__(self, config: Config | None = None, db: Database | None = None): self.config = config or Config.load() self.db = db or Database(self.config) self.client = httpx.Client(timeout=30, follow_redirects=True) self._person_cache: dict[int, Author] = {} def close(self) -> None: self.client.close() def _extract_person_id(self, person_uri: str) -> int | None: """Extract person_id from a URI like /api/v1/person/person/12345/.""" if not person_uri: return None parts = person_uri.strip("/").split("/") try: return int(parts[-1]) except (ValueError, IndexError): return None def fetch_person(self, person_id: int) -> Author | None: """Fetch a person's details from Datatracker.""" if person_id in self._person_cache: return self._person_cache[person_id] try: resp = self.client.get( f"{API_BASE}/person/person/{person_id}/", params={"format": "json"}, ) resp.raise_for_status() data = resp.json() author = Author( person_id=person_id, name=data.get("name", ""), ascii_name=data.get("ascii", ""), affiliation="", # Will be set from documentauthor resource_uri=data.get("resource_uri", ""), fetched_at=datetime.now(timezone.utc).isoformat(), ) self._person_cache[person_id] = author time_mod.sleep(self.config.fetch_delay) return author except httpx.HTTPError as e: console.print(f"[dim]Could not fetch person {person_id}: {e}[/]") return None def fetch_authors_for_draft(self, draft_name: str) -> list[tuple[Author, int, str]]: """Fetch authors for a single draft. Returns [(Author, order, affiliation)].""" try: resp = self.client.get( f"{API_BASE}/doc/documentauthor/", params={"document__name": draft_name, "format": "json", "limit": 50}, ) resp.raise_for_status() data = resp.json() except httpx.HTTPError as e: console.print(f"[dim]Could not fetch authors for {draft_name}: {e}[/]") return [] results: list[tuple[Author, int, str]] = [] for obj in data.get("objects", []): person_uri = obj.get("person", "") person_id = self._extract_person_id(person_uri) if person_id is None: continue affiliation = obj.get("affiliation", "") order = obj.get("order", 1) author = self.fetch_person(person_id) if author is None: continue # Use the affiliation from the document author record author_with_aff = Author( person_id=author.person_id, name=author.name, ascii_name=author.ascii_name, affiliation=affiliation or author.affiliation, resource_uri=author.resource_uri, fetched_at=author.fetched_at, ) results.append((author_with_aff, order, affiliation)) time_mod.sleep(self.config.fetch_delay) return results def fetch_all_authors(self, limit: int = 500) -> int: """Fetch authors for all drafts missing author data.""" missing = self.db.drafts_without_authors(limit=limit) if not missing: console.print("All drafts already have author data.") return 0 count = 0 with Progress( SpinnerColumn(), TextColumn("[progress.description]{task.description}"), BarColumn(), MofNCompleteColumn(), console=console, ) as progress: task = progress.add_task("Fetching authors...", total=len(missing)) for draft_name in missing: progress.update(task, description=f"Authors: {draft_name.split('-')[-1][:15]}") authors = self.fetch_authors_for_draft(draft_name) for author, order, affiliation in authors: self.db.upsert_author(author) self.db.upsert_draft_author(draft_name, author.person_id, order, affiliation) if authors: count += 1 progress.advance(task) console.print(f"Fetched authors for [bold green]{count}[/] drafts " f"({self.db.author_count()} unique authors)") return count