v0.2.0: visualizations, interactive browser, arXiv paper, gap analysis
New features: - 12 interactive visualizations (ietf viz): t-SNE landscape, similarity heatmap, score distributions, timeline, bubble explorer, radar charts, author network graph, category treemap, quality vs overlap, org bar chart, ideas chart, and interactive draft browser - Interactive draft browser (browser.html): filterable by category, keyword, score sliders with sortable table and expandable detail rows - arXiv paper (paper/main.tex): 13-page manuscript with all findings - Gap analysis: 12 identified under-addressed areas - Author network: collaboration graph, org contributions, cross-org analysis - Draft generation from gaps (ietf draft-gen) - Auto-load .env for API keys (python-dotenv) New modules: visualize.py, authors.py, draftgen.py New reports: timeline, overlap-matrix, authors, gaps New deps: plotly, matplotlib, seaborn, scipy, scikit-learn, networkx, python-dotenv Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -10,7 +10,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
from .config import Config
|
||||
from .models import Draft, Rating
|
||||
from .models import Author, Draft, Rating
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS drafts (
|
||||
@@ -76,6 +76,47 @@ CREATE VIRTUAL TABLE IF NOT EXISTS drafts_fts USING fts5(
|
||||
content_rowid='rowid'
|
||||
);
|
||||
|
||||
-- Authors (fetched from Datatracker)
|
||||
CREATE TABLE IF NOT EXISTS authors (
|
||||
person_id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
ascii_name TEXT,
|
||||
affiliation TEXT DEFAULT '',
|
||||
resource_uri TEXT,
|
||||
fetched_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS draft_authors (
|
||||
draft_name TEXT NOT NULL REFERENCES drafts(name),
|
||||
person_id INTEGER NOT NULL REFERENCES authors(person_id),
|
||||
author_order INTEGER DEFAULT 1,
|
||||
affiliation TEXT DEFAULT '',
|
||||
PRIMARY KEY (draft_name, person_id)
|
||||
);
|
||||
|
||||
-- Extracted ideas
|
||||
CREATE TABLE IF NOT EXISTS ideas (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
draft_name TEXT NOT NULL REFERENCES drafts(name),
|
||||
title TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
idea_type TEXT DEFAULT '',
|
||||
extracted_at TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_ideas_draft ON ideas(draft_name);
|
||||
|
||||
-- Gap analysis results
|
||||
CREATE TABLE IF NOT EXISTS gaps (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
topic TEXT NOT NULL,
|
||||
description TEXT NOT NULL,
|
||||
category TEXT DEFAULT '',
|
||||
evidence TEXT DEFAULT '',
|
||||
severity TEXT DEFAULT 'medium',
|
||||
analyzed_at TEXT
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS index in sync
|
||||
CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
|
||||
INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
|
||||
@@ -341,6 +382,189 @@ class Database:
|
||||
).fetchone()
|
||||
return (row[0], row[1])
|
||||
|
||||
# --- Authors ---
|
||||
|
||||
def upsert_author(self, author: Author) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO authors (person_id, name, ascii_name, affiliation, resource_uri, fetched_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(person_id) DO UPDATE SET
|
||||
name=excluded.name, ascii_name=excluded.ascii_name,
|
||||
affiliation=excluded.affiliation, resource_uri=excluded.resource_uri,
|
||||
fetched_at=excluded.fetched_at
|
||||
""",
|
||||
(author.person_id, author.name, author.ascii_name,
|
||||
author.affiliation, author.resource_uri, author.fetched_at),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def upsert_draft_author(
|
||||
self, draft_name: str, person_id: int, order: int = 1, affiliation: str = ""
|
||||
) -> None:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO draft_authors (draft_name, person_id, author_order, affiliation)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(draft_name, person_id) DO UPDATE SET
|
||||
author_order=excluded.author_order, affiliation=excluded.affiliation
|
||||
""",
|
||||
(draft_name, person_id, order, affiliation),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_authors_for_draft(self, draft_name: str) -> list[Author]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT a.* FROM authors a
|
||||
JOIN draft_authors da ON a.person_id = da.person_id
|
||||
WHERE da.draft_name = ?
|
||||
ORDER BY da.author_order""",
|
||||
(draft_name,),
|
||||
).fetchall()
|
||||
return [Author(
|
||||
person_id=r["person_id"], name=r["name"],
|
||||
ascii_name=r.get("ascii_name", ""),
|
||||
affiliation=r.get("affiliation", ""),
|
||||
resource_uri=r.get("resource_uri", ""),
|
||||
fetched_at=r.get("fetched_at"),
|
||||
) for r in rows]
|
||||
|
||||
def drafts_without_authors(self, limit: int = 500) -> list[str]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.name FROM drafts d
|
||||
LEFT JOIN draft_authors da ON d.name = da.draft_name
|
||||
WHERE da.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [r["name"] for r in rows]
|
||||
|
||||
def author_count(self) -> int:
|
||||
return self.conn.execute("SELECT COUNT(*) FROM authors").fetchone()[0]
|
||||
|
||||
def top_authors(self, limit: int = 20) -> list[tuple[str, str, int, list[str]]]:
|
||||
"""Return (name, affiliation, draft_count, [draft_names])."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT a.name, a.affiliation, COUNT(da.draft_name) as cnt,
|
||||
GROUP_CONCAT(da.draft_name, '||') as drafts
|
||||
FROM authors a
|
||||
JOIN draft_authors da ON a.person_id = da.person_id
|
||||
GROUP BY a.person_id
|
||||
ORDER BY cnt DESC
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [
|
||||
(r["name"], r["affiliation"], r["cnt"],
|
||||
r["drafts"].split("||") if r["drafts"] else [])
|
||||
for r in rows
|
||||
]
|
||||
|
||||
def top_orgs(self, limit: int = 20) -> list[tuple[str, int, int]]:
|
||||
"""Return (org, author_count, draft_count)."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT da.affiliation as org,
|
||||
COUNT(DISTINCT da.person_id) as authors,
|
||||
COUNT(DISTINCT da.draft_name) as drafts
|
||||
FROM draft_authors da
|
||||
WHERE da.affiliation != ''
|
||||
GROUP BY da.affiliation
|
||||
ORDER BY drafts DESC
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [(r["org"], r["authors"], r["drafts"]) for r in rows]
|
||||
|
||||
def coauthor_pairs(self) -> list[tuple[str, str, int]]:
|
||||
"""Return (author_a, author_b, shared_drafts) for all co-author pairs."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT a1.name as a, a2.name as b, COUNT(*) as shared
|
||||
FROM draft_authors da1
|
||||
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name AND da1.person_id < da2.person_id
|
||||
JOIN authors a1 ON da1.person_id = a1.person_id
|
||||
JOIN authors a2 ON da2.person_id = a2.person_id
|
||||
GROUP BY da1.person_id, da2.person_id
|
||||
ORDER BY shared DESC"""
|
||||
).fetchall()
|
||||
return [(r["a"], r["b"], r["shared"]) for r in rows]
|
||||
|
||||
def cross_org_collaborations(self, limit: int = 20) -> list[tuple[str, str, int]]:
|
||||
"""Return (org_a, org_b, shared_drafts) for cross-org collaboration."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT da1.affiliation as org_a, da2.affiliation as org_b,
|
||||
COUNT(DISTINCT da1.draft_name) as shared
|
||||
FROM draft_authors da1
|
||||
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
|
||||
AND da1.person_id < da2.person_id
|
||||
WHERE da1.affiliation != '' AND da2.affiliation != ''
|
||||
AND da1.affiliation != da2.affiliation
|
||||
GROUP BY da1.affiliation, da2.affiliation
|
||||
ORDER BY shared DESC
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [(r["org_a"], r["org_b"], r["shared"]) for r in rows]
|
||||
|
||||
# --- Ideas ---
|
||||
|
||||
def insert_ideas(self, draft_name: str, ideas: list[dict]) -> None:
|
||||
# Clear existing ideas for this draft first
|
||||
self.conn.execute("DELETE FROM ideas WHERE draft_name = ?", (draft_name,))
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
for idea in ideas:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO ideas (draft_name, title, description, idea_type, extracted_at)
|
||||
VALUES (?, ?, ?, ?, ?)""",
|
||||
(draft_name, idea["title"], idea["description"],
|
||||
idea.get("type", ""), now),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_ideas_for_draft(self, draft_name: str) -> list[dict]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM ideas WHERE draft_name = ?", (draft_name,)
|
||||
).fetchall()
|
||||
return [{"title": r["title"], "description": r["description"],
|
||||
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
|
||||
|
||||
def drafts_without_ideas(self, limit: int = 500) -> list[str]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.name FROM drafts d
|
||||
LEFT JOIN ideas i ON d.name = i.draft_name
|
||||
WHERE i.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [r["name"] for r in rows]
|
||||
|
||||
def all_ideas(self) -> list[dict]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM ideas ORDER BY draft_name"
|
||||
).fetchall()
|
||||
return [{"title": r["title"], "description": r["description"],
|
||||
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
|
||||
|
||||
def idea_count(self) -> int:
|
||||
return self.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0]
|
||||
|
||||
# --- Gaps ---
|
||||
|
||||
def insert_gaps(self, gaps: list[dict]) -> None:
|
||||
self.conn.execute("DELETE FROM gaps") # Replace old analysis
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
for g in gaps:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO gaps (topic, description, category, evidence, severity, analyzed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||
(g["topic"], g["description"], g.get("category", ""),
|
||||
g.get("evidence", ""), g.get("severity", "medium"), now),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def all_gaps(self) -> list[dict]:
|
||||
rows = self.conn.execute("SELECT * FROM gaps ORDER BY id").fetchall()
|
||||
return [{"id": r["id"], "topic": r["topic"], "description": r["description"],
|
||||
"category": r["category"], "evidence": r["evidence"],
|
||||
"severity": r["severity"]} for r in rows]
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user