v0.2.0: visualizations, interactive browser, arXiv paper, gap analysis

New features:
- 12 interactive visualizations (ietf viz): t-SNE landscape, similarity
  heatmap, score distributions, timeline, bubble explorer, radar charts,
  author network graph, category treemap, quality vs overlap, org bar chart,
  ideas chart, and interactive draft browser
- Interactive draft browser (browser.html): filterable by category, keyword,
  score sliders with sortable table and expandable detail rows
- arXiv paper (paper/main.tex): 13-page manuscript with all findings
- Gap analysis: 12 identified under-addressed areas
- Author network: collaboration graph, org contributions, cross-org analysis
- Draft generation from gaps (ietf draft-gen)
- Auto-load .env for API keys (python-dotenv)

New modules: visualize.py, authors.py, draftgen.py
New reports: timeline, overlap-matrix, authors, gaps
New deps: plotly, matplotlib, seaborn, scipy, scikit-learn, networkx, python-dotenv

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 13:37:55 +01:00
parent f44f9265bd
commit be9cf9c5d9
32 changed files with 4447 additions and 4 deletions

View File

@@ -10,7 +10,7 @@ from pathlib import Path
import numpy as np
from .config import Config
from .models import Draft, Rating
from .models import Author, Draft, Rating
SCHEMA = """
CREATE TABLE IF NOT EXISTS drafts (
@@ -76,6 +76,47 @@ CREATE VIRTUAL TABLE IF NOT EXISTS drafts_fts USING fts5(
content_rowid='rowid'
);
-- Authors (fetched from Datatracker)
CREATE TABLE IF NOT EXISTS authors (
person_id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
ascii_name TEXT,
affiliation TEXT DEFAULT '',
resource_uri TEXT,
fetched_at TEXT
);
CREATE TABLE IF NOT EXISTS draft_authors (
draft_name TEXT NOT NULL REFERENCES drafts(name),
person_id INTEGER NOT NULL REFERENCES authors(person_id),
author_order INTEGER DEFAULT 1,
affiliation TEXT DEFAULT '',
PRIMARY KEY (draft_name, person_id)
);
-- Extracted ideas
CREATE TABLE IF NOT EXISTS ideas (
id INTEGER PRIMARY KEY AUTOINCREMENT,
draft_name TEXT NOT NULL REFERENCES drafts(name),
title TEXT NOT NULL,
description TEXT NOT NULL,
idea_type TEXT DEFAULT '',
extracted_at TEXT
);
CREATE INDEX IF NOT EXISTS idx_ideas_draft ON ideas(draft_name);
-- Gap analysis results
CREATE TABLE IF NOT EXISTS gaps (
id INTEGER PRIMARY KEY AUTOINCREMENT,
topic TEXT NOT NULL,
description TEXT NOT NULL,
category TEXT DEFAULT '',
evidence TEXT DEFAULT '',
severity TEXT DEFAULT 'medium',
analyzed_at TEXT
);
-- Triggers to keep FTS index in sync
CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
@@ -341,6 +382,189 @@ class Database:
).fetchone()
return (row[0], row[1])
# --- Authors ---
def upsert_author(self, author: Author) -> None:
self.conn.execute(
"""INSERT INTO authors (person_id, name, ascii_name, affiliation, resource_uri, fetched_at)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(person_id) DO UPDATE SET
name=excluded.name, ascii_name=excluded.ascii_name,
affiliation=excluded.affiliation, resource_uri=excluded.resource_uri,
fetched_at=excluded.fetched_at
""",
(author.person_id, author.name, author.ascii_name,
author.affiliation, author.resource_uri, author.fetched_at),
)
self.conn.commit()
def upsert_draft_author(
self, draft_name: str, person_id: int, order: int = 1, affiliation: str = ""
) -> None:
self.conn.execute(
"""INSERT INTO draft_authors (draft_name, person_id, author_order, affiliation)
VALUES (?, ?, ?, ?)
ON CONFLICT(draft_name, person_id) DO UPDATE SET
author_order=excluded.author_order, affiliation=excluded.affiliation
""",
(draft_name, person_id, order, affiliation),
)
self.conn.commit()
def get_authors_for_draft(self, draft_name: str) -> list[Author]:
rows = self.conn.execute(
"""SELECT a.* FROM authors a
JOIN draft_authors da ON a.person_id = da.person_id
WHERE da.draft_name = ?
ORDER BY da.author_order""",
(draft_name,),
).fetchall()
return [Author(
person_id=r["person_id"], name=r["name"],
ascii_name=r.get("ascii_name", ""),
affiliation=r.get("affiliation", ""),
resource_uri=r.get("resource_uri", ""),
fetched_at=r.get("fetched_at"),
) for r in rows]
def drafts_without_authors(self, limit: int = 500) -> list[str]:
rows = self.conn.execute(
"""SELECT d.name FROM drafts d
LEFT JOIN draft_authors da ON d.name = da.draft_name
WHERE da.draft_name IS NULL
LIMIT ?""",
(limit,),
).fetchall()
return [r["name"] for r in rows]
def author_count(self) -> int:
return self.conn.execute("SELECT COUNT(*) FROM authors").fetchone()[0]
def top_authors(self, limit: int = 20) -> list[tuple[str, str, int, list[str]]]:
"""Return (name, affiliation, draft_count, [draft_names])."""
rows = self.conn.execute(
"""SELECT a.name, a.affiliation, COUNT(da.draft_name) as cnt,
GROUP_CONCAT(da.draft_name, '||') as drafts
FROM authors a
JOIN draft_authors da ON a.person_id = da.person_id
GROUP BY a.person_id
ORDER BY cnt DESC
LIMIT ?""",
(limit,),
).fetchall()
return [
(r["name"], r["affiliation"], r["cnt"],
r["drafts"].split("||") if r["drafts"] else [])
for r in rows
]
def top_orgs(self, limit: int = 20) -> list[tuple[str, int, int]]:
"""Return (org, author_count, draft_count)."""
rows = self.conn.execute(
"""SELECT da.affiliation as org,
COUNT(DISTINCT da.person_id) as authors,
COUNT(DISTINCT da.draft_name) as drafts
FROM draft_authors da
WHERE da.affiliation != ''
GROUP BY da.affiliation
ORDER BY drafts DESC
LIMIT ?""",
(limit,),
).fetchall()
return [(r["org"], r["authors"], r["drafts"]) for r in rows]
def coauthor_pairs(self) -> list[tuple[str, str, int]]:
"""Return (author_a, author_b, shared_drafts) for all co-author pairs."""
rows = self.conn.execute(
"""SELECT a1.name as a, a2.name as b, COUNT(*) as shared
FROM draft_authors da1
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name AND da1.person_id < da2.person_id
JOIN authors a1 ON da1.person_id = a1.person_id
JOIN authors a2 ON da2.person_id = a2.person_id
GROUP BY da1.person_id, da2.person_id
ORDER BY shared DESC"""
).fetchall()
return [(r["a"], r["b"], r["shared"]) for r in rows]
def cross_org_collaborations(self, limit: int = 20) -> list[tuple[str, str, int]]:
"""Return (org_a, org_b, shared_drafts) for cross-org collaboration."""
rows = self.conn.execute(
"""SELECT da1.affiliation as org_a, da2.affiliation as org_b,
COUNT(DISTINCT da1.draft_name) as shared
FROM draft_authors da1
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
AND da1.person_id < da2.person_id
WHERE da1.affiliation != '' AND da2.affiliation != ''
AND da1.affiliation != da2.affiliation
GROUP BY da1.affiliation, da2.affiliation
ORDER BY shared DESC
LIMIT ?""",
(limit,),
).fetchall()
return [(r["org_a"], r["org_b"], r["shared"]) for r in rows]
# --- Ideas ---
def insert_ideas(self, draft_name: str, ideas: list[dict]) -> None:
# Clear existing ideas for this draft first
self.conn.execute("DELETE FROM ideas WHERE draft_name = ?", (draft_name,))
now = datetime.now(timezone.utc).isoformat()
for idea in ideas:
self.conn.execute(
"""INSERT INTO ideas (draft_name, title, description, idea_type, extracted_at)
VALUES (?, ?, ?, ?, ?)""",
(draft_name, idea["title"], idea["description"],
idea.get("type", ""), now),
)
self.conn.commit()
def get_ideas_for_draft(self, draft_name: str) -> list[dict]:
rows = self.conn.execute(
"SELECT * FROM ideas WHERE draft_name = ?", (draft_name,)
).fetchall()
return [{"title": r["title"], "description": r["description"],
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
def drafts_without_ideas(self, limit: int = 500) -> list[str]:
rows = self.conn.execute(
"""SELECT d.name FROM drafts d
LEFT JOIN ideas i ON d.name = i.draft_name
WHERE i.draft_name IS NULL
LIMIT ?""",
(limit,),
).fetchall()
return [r["name"] for r in rows]
def all_ideas(self) -> list[dict]:
rows = self.conn.execute(
"SELECT * FROM ideas ORDER BY draft_name"
).fetchall()
return [{"title": r["title"], "description": r["description"],
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
def idea_count(self) -> int:
return self.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0]
# --- Gaps ---
def insert_gaps(self, gaps: list[dict]) -> None:
self.conn.execute("DELETE FROM gaps") # Replace old analysis
now = datetime.now(timezone.utc).isoformat()
for g in gaps:
self.conn.execute(
"""INSERT INTO gaps (topic, description, category, evidence, severity, analyzed_at)
VALUES (?, ?, ?, ?, ?, ?)""",
(g["topic"], g["description"], g.get("category", ""),
g.get("evidence", ""), g.get("severity", "medium"), now),
)
self.conn.commit()
def all_gaps(self) -> list[dict]:
rows = self.conn.execute("SELECT * FROM gaps ORDER BY id").fetchall()
return [{"id": r["id"], "topic": r["topic"], "description": r["description"],
"category": r["category"], "evidence": r["evidence"],
"severity": r["severity"]} for r in rows]
# --- Helpers ---
@staticmethod