v0.2.0: visualizations, interactive browser, arXiv paper, gap analysis

New features:
- 12 interactive visualizations (ietf viz): t-SNE landscape, similarity
  heatmap, score distributions, timeline, bubble explorer, radar charts,
  author network graph, category treemap, quality vs overlap, org bar chart,
  ideas chart, and interactive draft browser
- Interactive draft browser (browser.html): filterable by category, keyword,
  score sliders with sortable table and expandable detail rows
- arXiv paper (paper/main.tex): 13-page manuscript with all findings
- Gap analysis: 12 identified under-addressed areas
- Author network: collaboration graph, org contributions, cross-org analysis
- Draft generation from gaps (ietf draft-gen)
- Auto-load .env for API keys (python-dotenv)

New modules: visualize.py, authors.py, draftgen.py
New reports: timeline, overlap-matrix, authors, gaps
New deps: plotly, matplotlib, seaborn, scipy, scikit-learn, networkx, python-dotenv

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-28 13:37:55 +01:00
parent f44f9265bd
commit be9cf9c5d9
32 changed files with 4447 additions and 4 deletions

View File

@@ -2,6 +2,8 @@
from __future__ import annotations
from pathlib import Path
import click
from rich.console import Console
from rich.table import Table
@@ -372,6 +374,435 @@ def digest(days: int):
db.close()
@report.command()
def timeline():
"""Timeline of draft submissions by month and category."""
from .reports import Reporter
cfg = _get_config()
db = Database(cfg)
reporter = Reporter(cfg, db)
try:
path = reporter.timeline()
console.print(f"Report saved: [bold]{path}[/]")
finally:
db.close()
@report.command("overlap-matrix")
def overlap_matrix():
"""Full pairwise overlap matrix report."""
from .embeddings import Embedder
from .reports import Reporter
cfg = _get_config()
db = Database(cfg)
embedder = Embedder(cfg, db)
reporter = Reporter(cfg, db)
try:
console.print("Computing 260x260 similarity matrix...")
path = reporter.overlap_matrix(embedder)
console.print(f"Report saved: [bold]{path}[/]")
finally:
db.close()
@report.command("authors")
def authors_report():
"""Author and organization network report."""
from .reports import Reporter
cfg = _get_config()
db = Database(cfg)
reporter = Reporter(cfg, db)
try:
path = reporter.authors_report()
console.print(f"Report saved: [bold]{path}[/]")
finally:
db.close()
@report.command("ideas")
def ideas_report():
"""Report on extracted technical ideas."""
from .reports import Reporter
cfg = _get_config()
db = Database(cfg)
reporter = Reporter(cfg, db)
try:
path = reporter.ideas_report()
console.print(f"Report saved: [bold]{path}[/]")
finally:
db.close()
# ── visualize ────────────────────────────────────────────────────────────
@main.group()
def viz():
"""Generate interactive visualizations (HTML/PNG)."""
pass
@viz.command("all")
def viz_all():
"""Generate all available visualizations."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
paths = v.generate_all()
console.print(f"\n[bold green]{len(paths)} visualizations[/] saved to {v.output_dir}/")
finally:
db.close()
@viz.command("landscape")
@click.option("--method", "-m", default="tsne", type=click.Choice(["umap", "tsne"]),
help="Dimensionality reduction method")
def viz_landscape(method: str):
"""2D scatter of draft embeddings colored by category."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.landscape_scatter(method=method)
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("heatmap")
def viz_heatmap():
"""Clustered similarity heatmap (PNG)."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.similarity_heatmap()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("distributions")
def viz_distributions():
"""Rating dimension distributions by category (PNG)."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.score_distributions()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("timeline")
def viz_timeline():
"""Stacked area chart of monthly submissions."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.timeline_chart()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("bubble")
def viz_bubble():
"""Interactive bubble chart: novelty vs maturity."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.bubble_explorer()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("radar")
def viz_radar():
"""Radar chart of average category rating profiles."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.category_radar()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("network")
@click.option("--min-shared", "-n", default=2, help="Minimum shared drafts for an edge")
def viz_network(min_shared: int):
"""Interactive author collaboration network graph."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.author_network(min_shared=min_shared)
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("treemap")
def viz_treemap():
"""Category treemap colored by average score."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.category_treemap()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("quality")
def viz_quality():
"""Score vs uniqueness scatter (quality vs redundancy)."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.score_vs_overlap()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("orgs")
def viz_orgs():
"""Organization contribution bar chart."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.org_contributions()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("ideas")
def viz_ideas():
"""Ideas frequency chart by type."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.ideas_chart()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
@viz.command("browser")
def viz_browser():
"""Interactive filterable draft browser (standalone HTML)."""
from .visualize import Visualizer
cfg = _get_config()
db = Database(cfg)
v = Visualizer(cfg, db)
try:
path = v.draft_browser()
console.print(f"Saved: [bold]{path}[/]")
finally:
db.close()
# ── authors ─────────────────────────────────────────────────────────────
@main.command()
@click.argument("name", required=False)
@click.option("--fetch/--no-fetch", default=False, help="Fetch author data from Datatracker first")
@click.option("--limit", "-n", default=20, help="Number of top authors to show")
def authors(name: str | None, fetch: bool, limit: int):
"""Show authors for a draft, or top authors overall."""
from .authors import AuthorNetwork
cfg = _get_config()
db = Database(cfg)
network = AuthorNetwork(cfg, db)
try:
if fetch:
count = network.fetch_all_authors()
console.print(f"Fetched authors for [bold green]{count}[/] drafts")
if name:
draft_authors = db.get_authors_for_draft(name)
if not draft_authors:
console.print(f"[yellow]No author data for {name}. Run `ietf authors --fetch` first.[/]")
return
console.print(f"\n[bold]Authors of {name}:[/]")
for a in draft_authors:
console.print(f" - {a.name} ({a.affiliation or 'no affiliation'})")
else:
top = db.top_authors(limit=limit)
if not top:
console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]")
return
table = Table(title=f"Top {limit} Authors")
table.add_column("#", justify="right", width=4)
table.add_column("Author", style="cyan")
table.add_column("Organization")
table.add_column("Drafts", justify="right", width=6)
for rank, (aname, aff, cnt, _) in enumerate(top, 1):
table.add_row(str(rank), aname, aff, str(cnt))
console.print(table)
finally:
db.close()
@main.command()
@click.option("--top", "-n", default=20, help="Top N to show")
def network(top: int):
"""Show author collaboration network."""
cfg = _get_config()
db = Database(cfg)
try:
console.print("\n[bold]Top Organizations[/]")
orgs = db.top_orgs(limit=top)
if orgs:
table = Table()
table.add_column("#", justify="right", width=4)
table.add_column("Organization", style="cyan")
table.add_column("Authors", justify="right", width=8)
table.add_column("Drafts", justify="right", width=6)
for rank, (org, auth_cnt, draft_cnt) in enumerate(orgs, 1):
table.add_row(str(rank), org, str(auth_cnt), str(draft_cnt))
console.print(table)
console.print("\n[bold]Cross-Org Collaboration[/]")
cross = db.cross_org_collaborations(limit=top)
if cross:
table = Table()
table.add_column("Org A", style="cyan")
table.add_column("Org B", style="cyan")
table.add_column("Shared Drafts", justify="right", width=8)
for org_a, org_b, shared in cross:
table.add_row(org_a, org_b, str(shared))
console.print(table)
else:
console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]")
finally:
db.close()
# ── ideas ───────────────────────────────────────────────────────────────
@main.command()
@click.argument("name", required=False)
@click.option("--all", "extract_all", is_flag=True, help="Extract ideas from all drafts")
@click.option("--limit", "-n", default=50, help="Max drafts to extract (with --all)")
@click.option("--batch", "-b", default=5, help="Drafts per API call (default 5, set 1 for individual)")
@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)")
def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool):
"""Extract technical ideas from drafts using Claude."""
from .analyzer import Analyzer
cfg = _get_config()
db = Database(cfg)
analyzer = Analyzer(cfg, db)
try:
if extract_all:
count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap)
console.print(f"Extracted ideas from [bold green]{count}[/] drafts")
elif name:
idea_list = analyzer.extract_ideas(name)
if idea_list:
console.print(f"\n[bold]Ideas from {name}:[/]\n")
for idea in idea_list:
console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]")
console.print(f" {idea['description']}\n")
else:
console.print("[red]Extraction failed or no ideas found[/]")
else:
console.print("Provide a draft name or use --all")
finally:
db.close()
# ── gaps ────────────────────────────────────────────────────────────────
@main.command()
@click.option("--refresh", is_flag=True, help="Re-run gap analysis even if cached")
def gaps(refresh: bool):
"""Identify gaps in the current draft landscape using Claude."""
from .analyzer import Analyzer
from .reports import Reporter
cfg = _get_config()
db = Database(cfg)
analyzer = Analyzer(cfg, db)
reporter = Reporter(cfg, db)
try:
existing = db.all_gaps()
if existing and not refresh:
console.print(f"[bold]{len(existing)} gaps[/] already identified (use --refresh to re-run)\n")
else:
gap_list = analyzer.gap_analysis()
console.print(f"\nIdentified [bold green]{len(gap_list)}[/] gaps\n")
existing = gap_list
for i, gap in enumerate(existing if isinstance(existing[0], dict) else [], 1):
sev = gap.get("severity", "medium").upper()
console.print(f" [bold]{i}. {gap['topic']}[/] [{sev}]")
console.print(f" {gap['description'][:100]}\n")
path = reporter.gaps_report()
console.print(f"Report saved: [bold]{path}[/]")
finally:
db.close()
# ── draft-gen ───────────────────────────────────────────────────────────
@main.command("draft-gen")
@click.argument("gap_topic")
@click.option("--output", "-o", help="Output file path")
def draft_gen(gap_topic: str, output: str | None):
"""Generate an Internet-Draft addressing a landscape gap."""
from .draftgen import DraftGenerator
from .analyzer import Analyzer
cfg = _get_config()
db = Database(cfg)
analyzer = Analyzer(cfg, db)
generator = DraftGenerator(cfg, db, analyzer)
try:
out_path = output or str(Path(cfg.data_dir) / "reports" / "generated-draft.txt")
console.print(f"Generating Internet-Draft on: [bold]{gap_topic}[/]")
path = generator.generate(gap_topic, output_path=out_path)
console.print(f"\nDraft saved: [bold green]{path}[/]")
finally:
db.close()
# ── config ───────────────────────────────────────────────────────────────────