v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series
Gap-to-Draft Pipeline (ietf pipeline): - Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision - Generator produces outlines + sections using rich context with Claude - Quality gates: novelty (embedding similarity), references, format, self-rating - Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE) - I-D formatter with proper headers, references, 72-char wrapping Living Standards Observatory (ietf observatory): - Source abstraction with IETF + W3C fetchers - 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record - Static GitHub Pages dashboard (explorer, gap tracker, timeline) - Weekly CI/CD automation via GitHub Actions Also includes: - 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps - Blog series (8 posts planned), reports, arXiv paper figures - Agent team infrastructure (CLAUDE.md, scripts, dev journal) - 5 new DB tables, schema migration, ~15 new query methods Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -77,7 +77,7 @@ Abstract: {abstract}
|
||||
|
||||
{text_excerpt}
|
||||
|
||||
Return 3-8 ideas. Focus on CONCRETE technical contributions, not general statements.
|
||||
Return 0-8 ideas. Only include CONCRETE, NOVEL technical contributions — not restatements of the abstract or general goals. If the draft has no substantive technical ideas (e.g. it is a problem statement, administrative document, or off-topic), return an empty array [].
|
||||
JSON array only, no fences."""
|
||||
|
||||
BATCH_IDEAS_PROMPT = """\
|
||||
@@ -86,7 +86,7 @@ Per idea: {{"title":"short name","description":"1 sentence","type":"mechanism|pr
|
||||
|
||||
{drafts_block}
|
||||
|
||||
3-8 ideas per draft. CONCRETE technical contributions only.
|
||||
0-8 ideas per draft. Only include CONCRETE, NOVEL technical contributions. If a draft has no substantive ideas, map it to an empty array. Do not pad with restatements of the abstract.
|
||||
Return ONLY a JSON object like {{"draft-name":[...], ...}}, no fences."""
|
||||
|
||||
GAP_ANALYSIS_PROMPT = """\
|
||||
@@ -397,16 +397,16 @@ class Analyzer:
|
||||
count = 0
|
||||
for d in drafts:
|
||||
ideas = results.get(d.name, [])
|
||||
if not isinstance(ideas, list):
|
||||
ideas = [ideas] if ideas else []
|
||||
self.db.cache_response(
|
||||
d.name, _prompt_hash(f"batch-ideas-{phash}-{d.name}"),
|
||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
f"batch-ideas[{d.name}]", json.dumps(ideas),
|
||||
in_tok // len(drafts), out_tok // len(drafts),
|
||||
)
|
||||
self.db.insert_ideas(d.name, ideas)
|
||||
if ideas:
|
||||
if not isinstance(ideas, list):
|
||||
ideas = [ideas]
|
||||
self.db.cache_response(
|
||||
d.name, _prompt_hash(f"batch-ideas-{phash}-{d.name}"),
|
||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
f"batch-ideas[{d.name}]", json.dumps(ideas),
|
||||
in_tok // len(drafts), out_tok // len(drafts),
|
||||
)
|
||||
self.db.insert_ideas(d.name, ideas)
|
||||
count += 1
|
||||
return count
|
||||
except (json.JSONDecodeError, anthropic.APIError) as e:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,6 +16,12 @@ DEFAULT_KEYWORDS = [
|
||||
"autonomous",
|
||||
"machine-learning",
|
||||
"artificial-intelligence",
|
||||
"mcp",
|
||||
"agentic",
|
||||
"inference",
|
||||
"generative",
|
||||
"intelligent",
|
||||
"aipref",
|
||||
]
|
||||
|
||||
|
||||
@@ -32,6 +38,15 @@ class Config:
|
||||
fetch_since: str = "2024-01-01"
|
||||
# Polite delay between API requests (seconds)
|
||||
fetch_delay: float = 0.5
|
||||
# Pipeline
|
||||
generation_max_tokens: int = 4096
|
||||
generation_model: str = "" # defaults to claude_model
|
||||
# Observatory
|
||||
observatory_sources: list[str] = field(default_factory=lambda: ["ietf"])
|
||||
dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
|
||||
w3c_groups: list[str] = field(default_factory=lambda: [
|
||||
"webmachinelearning", "wot", "credentials", "did", "vc"
|
||||
])
|
||||
|
||||
def save(self) -> None:
|
||||
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
981
src/ietf_analyzer/dashboard.py
Normal file
981
src/ietf_analyzer/dashboard.py
Normal file
@@ -0,0 +1,981 @@
|
||||
"""Static dashboard generator for GitHub Pages — Living Standards Observatory."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
from .models import Rating
|
||||
|
||||
console = None
|
||||
|
||||
|
||||
def _get_console():
|
||||
global console
|
||||
if console is None:
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
return console
|
||||
|
||||
|
||||
class DashboardGenerator:
|
||||
"""Generate a static GitHub Pages site under docs/."""
|
||||
|
||||
def __init__(self, config: Config | None = None, db: Database | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.db = db or Database(self.config)
|
||||
self.output_dir = Path(self.config.dashboard_dir)
|
||||
|
||||
def generate(self) -> str:
|
||||
"""Generate full static site. Returns path to docs/."""
|
||||
con = _get_console()
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
(self.output_dir / "observatory").mkdir(exist_ok=True)
|
||||
(self.output_dir / "data").mkdir(exist_ok=True)
|
||||
(self.output_dir / "assets").mkdir(exist_ok=True)
|
||||
|
||||
con.print("[bold]Generating dashboard...[/]")
|
||||
|
||||
self._generate_data_files()
|
||||
con.print(" [green]OK[/] Data files")
|
||||
|
||||
self._generate_style()
|
||||
con.print(" [green]OK[/] Styles")
|
||||
|
||||
self._generate_index()
|
||||
con.print(" [green]OK[/] Index page")
|
||||
|
||||
self._generate_explorer()
|
||||
con.print(" [green]OK[/] Explorer page")
|
||||
|
||||
self._generate_gaps_page()
|
||||
con.print(" [green]OK[/] Gaps page")
|
||||
|
||||
self._generate_timeline_page()
|
||||
con.print(" [green]OK[/] Timeline page")
|
||||
|
||||
con.print(f"\n[bold green]Dashboard generated at {self.output_dir}/[/]")
|
||||
return str(self.output_dir)
|
||||
|
||||
# ── Data files ──────────────────────────────────────────────────────────
|
||||
|
||||
def _generate_data_files(self) -> None:
|
||||
"""Write JSON data files to docs/data/."""
|
||||
data_dir = self.output_dir / "data"
|
||||
|
||||
# observatory.json — key metrics
|
||||
total = self.db.count_drafts()
|
||||
sources = self.db.all_sources()
|
||||
gaps = self.db.all_gaps()
|
||||
snapshots = self.db.get_snapshots(limit=1)
|
||||
unrated = len(self.db.unrated_drafts(limit=10000))
|
||||
idea_count = self.db.idea_count()
|
||||
author_count = self.db.author_count()
|
||||
|
||||
observatory_data = {
|
||||
"total_docs": total,
|
||||
"sources": {s["name"]: s["doc_count"] for s in sources},
|
||||
"gaps_count": len(gaps),
|
||||
"unrated": unrated,
|
||||
"ideas": idea_count,
|
||||
"authors": author_count,
|
||||
"last_update": snapshots[0]["snapshot_at"] if snapshots else None,
|
||||
}
|
||||
(data_dir / "observatory.json").write_text(json.dumps(observatory_data, indent=2))
|
||||
|
||||
# drafts.json — all docs with ratings
|
||||
pairs = self.db.drafts_with_ratings(limit=1000)
|
||||
drafts_data = []
|
||||
for d, r in pairs:
|
||||
drafts_data.append({
|
||||
"name": d.name,
|
||||
"title": d.title,
|
||||
"date": d.date,
|
||||
"source": d.source or "ietf",
|
||||
"url": d.source_url or d.datatracker_url,
|
||||
"pages": d.pages or 0,
|
||||
"group": d.group or "individual",
|
||||
"score": round(r.composite_score, 2),
|
||||
"novelty": r.novelty,
|
||||
"maturity": r.maturity,
|
||||
"overlap": r.overlap,
|
||||
"momentum": r.momentum,
|
||||
"relevance": r.relevance,
|
||||
"categories": r.categories,
|
||||
"summary": r.summary,
|
||||
"novelty_note": r.novelty_note,
|
||||
"maturity_note": r.maturity_note,
|
||||
"overlap_note": r.overlap_note,
|
||||
"momentum_note": r.momentum_note,
|
||||
"relevance_note": r.relevance_note,
|
||||
"doc_status": d.doc_status or "",
|
||||
})
|
||||
(data_dir / "drafts.json").write_text(json.dumps(drafts_data, indent=2))
|
||||
|
||||
# gaps.json — current gaps + history
|
||||
gap_history = self.db.gap_history_timeline()
|
||||
gaps_data = {
|
||||
"current": gaps,
|
||||
"history": gap_history,
|
||||
}
|
||||
(data_dir / "gaps.json").write_text(json.dumps(gaps_data, indent=2))
|
||||
|
||||
# timeline.json — monthly counts by source and category
|
||||
all_drafts = self.db.list_drafts(limit=2000, order_by="time ASC")
|
||||
rating_map = {d.name: r for d, r in pairs}
|
||||
monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
monthly_source: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for d in all_drafts:
|
||||
month = d.time[:7] if d.time else "unknown"
|
||||
src = d.source or "ietf"
|
||||
monthly_source[month][src] += 1
|
||||
r = rating_map.get(d.name)
|
||||
if r:
|
||||
for c in r.categories:
|
||||
monthly[month][c] += 1
|
||||
|
||||
months = sorted(set(list(monthly.keys()) + list(monthly_source.keys())))
|
||||
all_cats: set[str] = set()
|
||||
for mc in monthly.values():
|
||||
all_cats.update(mc.keys())
|
||||
all_sources_set: set[str] = set()
|
||||
for ms in monthly_source.values():
|
||||
all_sources_set.update(ms.keys())
|
||||
|
||||
timeline_data = {
|
||||
"months": months,
|
||||
"by_category": {m: dict(monthly.get(m, {})) for m in months},
|
||||
"by_source": {m: dict(monthly_source.get(m, {})) for m in months},
|
||||
"categories": sorted(all_cats),
|
||||
"sources": sorted(all_sources_set),
|
||||
}
|
||||
(data_dir / "timeline.json").write_text(json.dumps(timeline_data, indent=2))
|
||||
|
||||
# meta.json
|
||||
meta = {
|
||||
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||
"version": "0.3.0",
|
||||
"project": "IETF Living Standards Observatory",
|
||||
}
|
||||
(data_dir / "meta.json").write_text(json.dumps(meta, indent=2))
|
||||
|
||||
# ── Style ───────────────────────────────────────────────────────────────
|
||||
|
||||
def _generate_style(self) -> None:
|
||||
"""Shared CSS."""
|
||||
css = """\
|
||||
:root {
|
||||
--bg: #f5f7fa;
|
||||
--card-bg: #ffffff;
|
||||
--text: #1a1a2e;
|
||||
--text-dim: #666;
|
||||
--accent: #4a6cf7;
|
||||
--accent-light: rgba(74,108,247,0.1);
|
||||
--green: #10b981;
|
||||
--orange: #f59e0b;
|
||||
--red: #ef4444;
|
||||
--border: #e5e7eb;
|
||||
--shadow: 0 1px 4px rgba(0,0,0,0.08);
|
||||
--radius: 10px;
|
||||
}
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: var(--bg); color: var(--text);
|
||||
line-height: 1.5;
|
||||
}
|
||||
a { color: var(--accent); text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
|
||||
/* Layout */
|
||||
.container { max-width: 1200px; margin: 0 auto; padding: 20px; }
|
||||
.header {
|
||||
background: var(--card-bg); border-bottom: 1px solid var(--border);
|
||||
padding: 16px 0; margin-bottom: 24px;
|
||||
}
|
||||
.header .container { display: flex; align-items: center; justify-content: space-between; }
|
||||
.header h1 { font-size: 1.3rem; }
|
||||
.header nav { display: flex; gap: 20px; font-size: 0.9rem; }
|
||||
.header nav a { color: var(--text-dim); font-weight: 500; }
|
||||
.header nav a:hover, .header nav a.active { color: var(--accent); text-decoration: none; }
|
||||
|
||||
/* Cards */
|
||||
.cards { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; margin-bottom: 24px; }
|
||||
.card {
|
||||
background: var(--card-bg); border-radius: var(--radius);
|
||||
padding: 20px; box-shadow: var(--shadow);
|
||||
}
|
||||
.card .label { font-size: 0.8rem; color: var(--text-dim); text-transform: uppercase; letter-spacing: 0.5px; }
|
||||
.card .value { font-size: 2rem; font-weight: 700; margin-top: 4px; }
|
||||
.card .sub { font-size: 0.8rem; color: var(--text-dim); margin-top: 4px; }
|
||||
|
||||
/* Tables */
|
||||
.panel {
|
||||
background: var(--card-bg); border-radius: var(--radius);
|
||||
box-shadow: var(--shadow); overflow: hidden; margin-bottom: 24px;
|
||||
}
|
||||
.panel-header { padding: 16px 20px; border-bottom: 1px solid var(--border); font-weight: 600; }
|
||||
table { width: 100%; border-collapse: collapse; }
|
||||
th {
|
||||
background: #f8f9fb; padding: 10px 12px; text-align: left;
|
||||
font-size: 0.78rem; color: var(--text-dim); cursor: pointer; user-select: none;
|
||||
white-space: nowrap; border-bottom: 2px solid var(--border);
|
||||
}
|
||||
th:hover { color: var(--accent); }
|
||||
td { padding: 10px 12px; border-bottom: 1px solid #f0f0f0; font-size: 0.83rem; vertical-align: top; }
|
||||
tr:hover { background: #fafbff; }
|
||||
|
||||
/* Controls */
|
||||
.controls {
|
||||
background: var(--card-bg); border-radius: var(--radius);
|
||||
padding: 16px 20px; margin-bottom: 16px; box-shadow: var(--shadow);
|
||||
}
|
||||
.controls-row { display: flex; gap: 16px; align-items: center; flex-wrap: wrap; margin-bottom: 10px; }
|
||||
.controls-row:last-child { margin-bottom: 0; }
|
||||
.search-box {
|
||||
flex: 1; min-width: 250px; padding: 8px 14px;
|
||||
border: 1px solid var(--border); border-radius: 6px;
|
||||
font-size: 0.9rem; outline: none;
|
||||
}
|
||||
.search-box:focus { border-color: var(--accent); box-shadow: 0 0 0 2px var(--accent-light); }
|
||||
.slider-group { display: flex; align-items: center; gap: 6px; font-size: 0.8rem; color: var(--text-dim); }
|
||||
.slider-group input[type=range] { width: 100px; cursor: pointer; }
|
||||
.slider-val { font-weight: 600; min-width: 24px; text-align: center; }
|
||||
|
||||
/* Chips */
|
||||
.chip-row { display: flex; flex-wrap: wrap; gap: 6px; }
|
||||
.chip {
|
||||
display: inline-block; padding: 3px 10px; border-radius: 12px;
|
||||
font-size: 0.75rem; cursor: pointer; border: 1px solid var(--border);
|
||||
background: var(--card-bg); transition: all 0.15s; user-select: none;
|
||||
}
|
||||
.chip.active { background: var(--accent); color: #fff; border-color: var(--accent); }
|
||||
.chip:hover { border-color: var(--accent); }
|
||||
|
||||
/* Badges */
|
||||
.score-badge {
|
||||
display: inline-block; padding: 2px 8px; border-radius: 10px;
|
||||
font-weight: 600; font-size: 0.8rem;
|
||||
}
|
||||
.score-high { background: #d4edda; color: #155724; }
|
||||
.score-mid { background: #fff3cd; color: #856404; }
|
||||
.score-low { background: #f8d7da; color: #721c24; }
|
||||
.cat-badge {
|
||||
display: inline-block; padding: 1px 7px; border-radius: 8px;
|
||||
font-size: 0.68rem; margin: 1px 2px; background: #e8eaf6; color: #3949ab;
|
||||
}
|
||||
.source-badge {
|
||||
display: inline-block; padding: 1px 7px; border-radius: 8px;
|
||||
font-size: 0.68rem; margin: 1px 2px;
|
||||
}
|
||||
.source-ietf { background: #e3f2fd; color: #1565c0; }
|
||||
.source-w3c { background: #fce4ec; color: #c62828; }
|
||||
|
||||
/* Severity */
|
||||
.sev-critical { color: var(--red); font-weight: 600; }
|
||||
.sev-high { color: var(--orange); font-weight: 600; }
|
||||
.sev-medium { color: var(--text); }
|
||||
.sev-low { color: var(--text-dim); }
|
||||
|
||||
/* Bar */
|
||||
.bar { display: inline-block; height: 10px; border-radius: 3px; background: var(--accent); vertical-align: middle; }
|
||||
|
||||
/* Detail */
|
||||
.detail-row td { padding: 12px 20px; background: #f8faff; }
|
||||
.detail-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 10px; max-width: 800px; }
|
||||
.detail-item { font-size: 0.82rem; }
|
||||
.detail-item strong { color: #333; }
|
||||
.detail-item .note { color: var(--text-dim); font-size: 0.78rem; }
|
||||
.summary-text { font-size: 0.82rem; color: #444; margin-top: 6px; line-height: 1.4; }
|
||||
|
||||
/* Chart container */
|
||||
.chart-container {
|
||||
background: var(--card-bg); border-radius: var(--radius);
|
||||
box-shadow: var(--shadow); padding: 20px; margin-bottom: 24px;
|
||||
}
|
||||
|
||||
/* Gap cards */
|
||||
.gap-card {
|
||||
background: var(--card-bg); border-radius: var(--radius);
|
||||
border-left: 4px solid var(--accent); padding: 16px 20px;
|
||||
box-shadow: var(--shadow); margin-bottom: 12px;
|
||||
}
|
||||
.gap-card h3 { font-size: 0.95rem; margin-bottom: 4px; }
|
||||
.gap-card p { font-size: 0.83rem; color: var(--text-dim); margin-bottom: 4px; }
|
||||
.gap-card .meta { font-size: 0.75rem; color: var(--text-dim); }
|
||||
.gap-card.critical { border-left-color: var(--red); }
|
||||
.gap-card.high { border-left-color: var(--orange); }
|
||||
|
||||
.dim { font-size: 0.75rem; color: var(--text-dim); }
|
||||
.clickable { cursor: pointer; }
|
||||
.reset-btn {
|
||||
padding: 4px 12px; border: 1px solid var(--border); border-radius: 6px;
|
||||
background: var(--card-bg); cursor: pointer; font-size: 0.78rem; color: var(--text-dim);
|
||||
}
|
||||
.reset-btn:hover { border-color: var(--accent); color: var(--accent); }
|
||||
.result-count { font-size: 0.85rem; color: var(--text-dim); margin: 10px 0 8px; }
|
||||
|
||||
/* Timeline bars */
|
||||
.tl-bar {
|
||||
display: inline-block; height: 16px; border-radius: 3px;
|
||||
vertical-align: middle; min-width: 2px;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.cards { grid-template-columns: 1fr 1fr; }
|
||||
.controls-row { flex-direction: column; align-items: stretch; }
|
||||
.detail-grid { grid-template-columns: 1fr; }
|
||||
}
|
||||
"""
|
||||
(self.output_dir / "assets" / "style.css").write_text(css)
|
||||
|
||||
# ── Shared HTML pieces ──────────────────────────────────────────────────
|
||||
|
||||
def _header_html(self, active: str = "") -> str:
|
||||
def active_cls(page: str) -> str:
|
||||
return ' class="active"' if page == active else ""
|
||||
|
||||
return f"""\
|
||||
<div class="header">
|
||||
<div class="container">
|
||||
<h1>Living Standards Observatory</h1>
|
||||
<nav>
|
||||
<a href="../index.html"{active_cls("index")}>Dashboard</a>
|
||||
<a href="explorer.html"{active_cls("explorer")}>Explorer</a>
|
||||
<a href="gaps.html"{active_cls("gaps")}>Gaps</a>
|
||||
<a href="timeline.html"{active_cls("timeline")}>Timeline</a>
|
||||
</nav>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
def _index_header_html(self) -> str:
|
||||
return """\
|
||||
<div class="header">
|
||||
<div class="container">
|
||||
<h1>Living Standards Observatory</h1>
|
||||
<nav>
|
||||
<a href="index.html" class="active">Dashboard</a>
|
||||
<a href="observatory/explorer.html">Explorer</a>
|
||||
<a href="observatory/gaps.html">Gaps</a>
|
||||
<a href="observatory/timeline.html">Timeline</a>
|
||||
</nav>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
# ── Index page ──────────────────────────────────────────────────────────
|
||||
|
||||
def _generate_index(self) -> None:
|
||||
"""Landing page with key metrics dashboard."""
|
||||
html = f"""\
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Living Standards Observatory</title>
|
||||
<link rel="stylesheet" href="assets/style.css">
|
||||
</head>
|
||||
<body>
|
||||
{self._index_header_html()}
|
||||
<div class="container">
|
||||
|
||||
<div class="cards" id="metricsCards">
|
||||
<div class="card"><div class="label">Total Documents</div><div class="value" id="totalDocs">--</div><div class="sub" id="sourceSub"></div></div>
|
||||
<div class="card"><div class="label">Standards Bodies</div><div class="value" id="sourceCount">--</div><div class="sub">Active sources</div></div>
|
||||
<div class="card"><div class="label">Open Gaps</div><div class="value" id="gapCount">--</div><div class="sub">Identified coverage gaps</div></div>
|
||||
<div class="card"><div class="label">Ideas Extracted</div><div class="value" id="ideaCount">--</div><div class="sub">Technical contributions</div></div>
|
||||
<div class="card"><div class="label">Authors Tracked</div><div class="value" id="authorCount">--</div><div class="sub">Individual contributors</div></div>
|
||||
<div class="card"><div class="label">Last Update</div><div class="value" id="lastUpdate" style="font-size:1rem">--</div><div class="sub" id="updateSub"></div></div>
|
||||
</div>
|
||||
|
||||
<div class="panel">
|
||||
<div class="panel-header">Top Rated Documents</div>
|
||||
<table>
|
||||
<thead>
|
||||
<tr><th>Score</th><th>Document</th><th>Source</th><th>Date</th><th>Categories</th></tr>
|
||||
</thead>
|
||||
<tbody id="topDrafts"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="panel">
|
||||
<div class="panel-header">Critical & High Severity Gaps</div>
|
||||
<div id="gapsList" style="padding: 16px;"></div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
|
||||
function scoreBadge(s) {{
|
||||
const cls = s >= 4.0 ? 'score-high' : s >= 3.0 ? 'score-mid' : 'score-low';
|
||||
return '<span class="score-badge ' + cls + '">' + s.toFixed(1) + '</span>';
|
||||
}}
|
||||
|
||||
async function init() {{
|
||||
const [obs, drafts, gaps] = await Promise.all([
|
||||
fetch('data/observatory.json').then(r => r.json()),
|
||||
fetch('data/drafts.json').then(r => r.json()),
|
||||
fetch('data/gaps.json').then(r => r.json()),
|
||||
]);
|
||||
|
||||
// Metrics
|
||||
document.getElementById('totalDocs').textContent = obs.total_docs;
|
||||
const srcNames = Object.keys(obs.sources || {{}});
|
||||
document.getElementById('sourceCount').textContent = srcNames.length || 1;
|
||||
document.getElementById('sourceSub').textContent = srcNames.map(s => s.toUpperCase() + ': ' + (obs.sources[s] || 0)).join(' | ') || '';
|
||||
document.getElementById('gapCount').textContent = obs.gaps_count;
|
||||
document.getElementById('ideaCount').textContent = obs.ideas;
|
||||
document.getElementById('authorCount').textContent = obs.authors;
|
||||
if (obs.last_update) {{
|
||||
document.getElementById('lastUpdate').textContent = obs.last_update.substring(0, 10);
|
||||
}}
|
||||
|
||||
// Top drafts
|
||||
const top = drafts.sort((a, b) => b.score - a.score).slice(0, 15);
|
||||
const tbody = document.getElementById('topDrafts');
|
||||
top.forEach(d => {{
|
||||
const tr = document.createElement('tr');
|
||||
const srcClass = 'source-' + (d.source || 'ietf');
|
||||
tr.innerHTML =
|
||||
'<td>' + scoreBadge(d.score) + '</td>' +
|
||||
'<td><a href="' + escHtml(d.url) + '" target="_blank">' + escHtml(d.name) + '</a><br><span class="dim">' + escHtml(d.title.substring(0,80)) + '</span></td>' +
|
||||
'<td><span class="source-badge ' + srcClass + '">' + (d.source || 'ietf').toUpperCase() + '</span></td>' +
|
||||
'<td class="dim">' + d.date + '</td>' +
|
||||
'<td>' + d.categories.map(c => '<span class="cat-badge">' + escHtml(c) + '</span>').join('') + '</td>';
|
||||
tbody.appendChild(tr);
|
||||
}});
|
||||
|
||||
// Gaps
|
||||
const gapsList = document.getElementById('gapsList');
|
||||
const critical = (gaps.current || []).filter(g => g.severity === 'critical' || g.severity === 'high');
|
||||
if (critical.length === 0) {{
|
||||
gapsList.innerHTML = '<p class="dim">No critical or high severity gaps found.</p>';
|
||||
}} else {{
|
||||
critical.forEach(g => {{
|
||||
const cls = g.severity === 'critical' ? 'critical' : 'high';
|
||||
gapsList.innerHTML +=
|
||||
'<div class="gap-card ' + cls + '">' +
|
||||
'<h3>' + escHtml(g.topic) + '</h3>' +
|
||||
'<p>' + escHtml(g.description) + '</p>' +
|
||||
'<div class="meta"><span class="sev-' + g.severity + '">' + g.severity.toUpperCase() + '</span> · ' + escHtml(g.category || '') + '</div>' +
|
||||
'</div>';
|
||||
}});
|
||||
}}
|
||||
}}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
(self.output_dir / "index.html").write_text(html)
|
||||
|
||||
# ── Explorer page ───────────────────────────────────────────────────────
|
||||
|
||||
def _generate_explorer(self) -> None:
|
||||
"""Multi-source draft browser with search, filters, score sliders."""
|
||||
html = f"""\
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Explorer - Living Standards Observatory</title>
|
||||
<link rel="stylesheet" href="../assets/style.css">
|
||||
</head>
|
||||
<body>
|
||||
{self._header_html("explorer")}
|
||||
<div class="container">
|
||||
|
||||
<div class="controls">
|
||||
<div class="controls-row">
|
||||
<input type="text" class="search-box" id="searchBox" placeholder="Search by name, title, summary, or keyword...">
|
||||
<select id="sourceFilter" style="padding:8px;border:1px solid var(--border);border-radius:6px;font-size:0.85rem">
|
||||
<option value="">All sources</option>
|
||||
</select>
|
||||
<div class="slider-group">Min score: <input type="range" id="minScore" min="1" max="5" step="0.1" value="1"><span class="slider-val" id="minScoreVal">1.0</span></div>
|
||||
<div class="slider-group">Min novelty: <input type="range" id="minNovelty" min="1" max="5" step="1" value="1"><span class="slider-val" id="minNoveltyVal">1</span></div>
|
||||
<div class="slider-group">Max overlap: <input type="range" id="maxOverlap" min="1" max="5" step="1" value="5"><span class="slider-val" id="maxOverlapVal">5</span></div>
|
||||
<button class="reset-btn" onclick="resetFilters()">Reset</button>
|
||||
</div>
|
||||
<div class="controls-row">
|
||||
<div class="chip-row" id="catChips"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="result-count" id="resultCount"></div>
|
||||
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th onclick="sortBy('score')" width="60">Score <span class="sort-arrow" id="sort-score"></span></th>
|
||||
<th onclick="sortBy('name')">Draft <span class="sort-arrow" id="sort-name"></span></th>
|
||||
<th onclick="sortBy('source')" width="60">Src <span class="sort-arrow" id="sort-source"></span></th>
|
||||
<th onclick="sortBy('date')" width="90">Date <span class="sort-arrow" id="sort-date"></span></th>
|
||||
<th onclick="sortBy('novelty')" width="30">N</th>
|
||||
<th onclick="sortBy('maturity')" width="30">M</th>
|
||||
<th onclick="sortBy('overlap')" width="30">O</th>
|
||||
<th onclick="sortBy('momentum')" width="30">Mom</th>
|
||||
<th onclick="sortBy('relevance')" width="30">R</th>
|
||||
<th>Categories</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="tableBody"></tbody>
|
||||
</table>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let DRAFTS = [];
|
||||
let ALL_CATS = [];
|
||||
let activeCats = new Set();
|
||||
let sortField = 'score';
|
||||
let sortAsc = false;
|
||||
let expandedRow = null;
|
||||
|
||||
function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
|
||||
function scoreBadge(s) {{
|
||||
const cls = s >= 4.0 ? 'score-high' : s >= 3.0 ? 'score-mid' : 'score-low';
|
||||
return '<span class="score-badge ' + cls + '">' + s.toFixed(1) + '</span>';
|
||||
}}
|
||||
function dimBar(v) {{ return '<span class="bar" style="width:' + (v * 12) + 'px"></span> ' + v; }}
|
||||
|
||||
const searchBox = document.getElementById('searchBox');
|
||||
const sourceFilter = document.getElementById('sourceFilter');
|
||||
const minScore = document.getElementById('minScore');
|
||||
const minNovelty = document.getElementById('minNovelty');
|
||||
const maxOverlap = document.getElementById('maxOverlap');
|
||||
|
||||
searchBox.oninput = render;
|
||||
sourceFilter.onchange = render;
|
||||
minScore.oninput = () => {{ document.getElementById('minScoreVal').textContent = parseFloat(minScore.value).toFixed(1); render(); }};
|
||||
minNovelty.oninput = () => {{ document.getElementById('minNoveltyVal').textContent = minNovelty.value; render(); }};
|
||||
maxOverlap.oninput = () => {{ document.getElementById('maxOverlapVal').textContent = maxOverlap.value; render(); }};
|
||||
|
||||
function resetFilters() {{
|
||||
searchBox.value = '';
|
||||
sourceFilter.value = '';
|
||||
minScore.value = 1; document.getElementById('minScoreVal').textContent = '1.0';
|
||||
minNovelty.value = 1; document.getElementById('minNoveltyVal').textContent = '1';
|
||||
maxOverlap.value = 5; document.getElementById('maxOverlapVal').textContent = '5';
|
||||
activeCats.clear();
|
||||
document.querySelectorAll('.chip').forEach(c => c.classList.remove('active'));
|
||||
sortField = 'score'; sortAsc = false;
|
||||
render();
|
||||
}}
|
||||
|
||||
function sortBy(field) {{
|
||||
if (sortField === field) sortAsc = !sortAsc;
|
||||
else {{ sortField = field; sortAsc = field === 'name' || field === 'date'; }}
|
||||
render();
|
||||
}}
|
||||
|
||||
function cmp(a, b) {{
|
||||
let va = a[sortField], vb = b[sortField];
|
||||
if (typeof va === 'string') return sortAsc ? va.localeCompare(vb) : vb.localeCompare(va);
|
||||
return sortAsc ? va - vb : vb - va;
|
||||
}}
|
||||
|
||||
function render() {{
|
||||
const q = searchBox.value.toLowerCase().trim();
|
||||
const src = sourceFilter.value;
|
||||
const ms = parseFloat(minScore.value);
|
||||
const mn = parseInt(minNovelty.value);
|
||||
const mo = parseInt(maxOverlap.value);
|
||||
|
||||
let filtered = DRAFTS.filter(d => {{
|
||||
if (d.score < ms) return false;
|
||||
if (d.novelty < mn) return false;
|
||||
if (d.overlap > mo) return false;
|
||||
if (src && (d.source || 'ietf') !== src) return false;
|
||||
if (activeCats.size > 0 && !d.categories.some(c => activeCats.has(c))) return false;
|
||||
if (q) {{
|
||||
const hay = (d.name + ' ' + d.title + ' ' + d.summary + ' ' + d.categories.join(' ')).toLowerCase();
|
||||
const words = q.split(/\\s+/);
|
||||
if (!words.every(w => hay.includes(w))) return false;
|
||||
}}
|
||||
return true;
|
||||
}});
|
||||
|
||||
filtered.sort(cmp);
|
||||
|
||||
document.querySelectorAll('.sort-arrow').forEach(el => el.textContent = '');
|
||||
const arrow = document.getElementById('sort-' + sortField);
|
||||
if (arrow) arrow.textContent = sortAsc ? '\\u25B2' : '\\u25BC';
|
||||
|
||||
const tbody = document.getElementById('tableBody');
|
||||
tbody.innerHTML = '';
|
||||
expandedRow = null;
|
||||
|
||||
filtered.forEach(d => {{
|
||||
const tr = document.createElement('tr');
|
||||
tr.className = 'clickable';
|
||||
const srcClass = 'source-' + (d.source || 'ietf');
|
||||
tr.innerHTML =
|
||||
'<td>' + scoreBadge(d.score) + '</td>' +
|
||||
'<td style="max-width:300px"><a href="' + escHtml(d.url) + '" target="_blank" onclick="event.stopPropagation()" style="color:var(--accent);font-weight:500">' + escHtml(d.name) + '</a>' +
|
||||
'<br><span class="dim">' + escHtml(d.title.substring(0, 80)) + '</span></td>' +
|
||||
'<td><span class="source-badge ' + srcClass + '">' + (d.source || 'ietf').toUpperCase() + '</span></td>' +
|
||||
'<td class="dim">' + d.date + '</td>' +
|
||||
'<td>' + dimBar(d.novelty) + '</td>' +
|
||||
'<td>' + dimBar(d.maturity) + '</td>' +
|
||||
'<td>' + dimBar(d.overlap) + '</td>' +
|
||||
'<td>' + dimBar(d.momentum) + '</td>' +
|
||||
'<td>' + dimBar(d.relevance) + '</td>' +
|
||||
'<td>' + d.categories.map(c => '<span class="cat-badge">' + escHtml(c) + '</span>').join('') + '</td>';
|
||||
|
||||
tr.onclick = () => toggleDetail(tr, d);
|
||||
tbody.appendChild(tr);
|
||||
}});
|
||||
|
||||
document.getElementById('resultCount').textContent =
|
||||
'Showing ' + filtered.length + ' of ' + DRAFTS.length + ' drafts';
|
||||
}}
|
||||
|
||||
function toggleDetail(tr, d) {{
|
||||
if (expandedRow) {{
|
||||
expandedRow.previousElementSibling?.classList.remove('expanded');
|
||||
expandedRow.remove();
|
||||
if (expandedRow._draftName === d.name) {{ expandedRow = null; return; }}
|
||||
}}
|
||||
tr.classList.add('expanded');
|
||||
const detail = document.createElement('tr');
|
||||
detail.className = 'detail-row';
|
||||
detail._draftName = d.name;
|
||||
function detailItem(label, score, note) {{
|
||||
return '<div class="detail-item"><strong>' + label + ':</strong> ' + score + '/5 ' +
|
||||
'<span class="bar" style="width:' + (score * 16) + 'px"></span>' +
|
||||
(note ? '<div class="note">' + escHtml(note) + '</div>' : '') + '</div>';
|
||||
}}
|
||||
detail.innerHTML = '<td colspan="10">' +
|
||||
'<div class="summary-text"><strong>Summary:</strong> ' + escHtml(d.summary) + '</div>' +
|
||||
'<div class="detail-grid" style="margin-top:10px">' +
|
||||
detailItem('Novelty', d.novelty, d.novelty_note) +
|
||||
detailItem('Maturity', d.maturity, d.maturity_note) +
|
||||
detailItem('Overlap', d.overlap, d.overlap_note) +
|
||||
detailItem('Momentum', d.momentum, d.momentum_note) +
|
||||
detailItem('Relevance', d.relevance, d.relevance_note) +
|
||||
'<div class="detail-item"><strong>Source:</strong> ' + (d.source || 'ietf').toUpperCase() + ' · <strong>Pages:</strong> ' + d.pages + '</div>' +
|
||||
'</div>' +
|
||||
'<div style="margin-top:8px"><a href="' + escHtml(d.url) + '" target="_blank" style="color:var(--accent)">Open document \\u2192</a></div>' +
|
||||
'</td>';
|
||||
tr.after(detail);
|
||||
expandedRow = detail;
|
||||
}}
|
||||
|
||||
async function init() {{
|
||||
DRAFTS = await fetch('../data/drafts.json').then(r => r.json());
|
||||
|
||||
// Build categories
|
||||
const catSet = new Set();
|
||||
const sources = new Set();
|
||||
DRAFTS.forEach(d => {{
|
||||
d.categories.forEach(c => catSet.add(c));
|
||||
sources.add(d.source || 'ietf');
|
||||
}});
|
||||
ALL_CATS = [...catSet].sort();
|
||||
|
||||
// Source filter options
|
||||
sources.forEach(s => {{
|
||||
const opt = document.createElement('option');
|
||||
opt.value = s;
|
||||
opt.textContent = s.toUpperCase();
|
||||
sourceFilter.appendChild(opt);
|
||||
}});
|
||||
|
||||
// Category chips
|
||||
const chipBox = document.getElementById('catChips');
|
||||
ALL_CATS.forEach(cat => {{
|
||||
const el = document.createElement('span');
|
||||
el.className = 'chip';
|
||||
const count = DRAFTS.filter(d => d.categories.includes(cat)).length;
|
||||
el.innerHTML = escHtml(cat) + '<span style="font-size:0.65rem;opacity:0.7;margin-left:2px">(' + count + ')</span>';
|
||||
el.onclick = () => {{
|
||||
if (activeCats.has(cat)) {{ activeCats.delete(cat); el.classList.remove('active'); }}
|
||||
else {{ activeCats.add(cat); el.classList.add('active'); }}
|
||||
render();
|
||||
}};
|
||||
chipBox.appendChild(el);
|
||||
}});
|
||||
|
||||
render();
|
||||
}}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
(self.output_dir / "observatory" / "explorer.html").write_text(html)
|
||||
|
||||
# ── Gaps page ───────────────────────────────────────────────────────────
|
||||
|
||||
def _generate_gaps_page(self) -> None:
|
||||
"""Gap tracker with fill-status over time."""
|
||||
html = f"""\
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Gaps - Living Standards Observatory</title>
|
||||
<link rel="stylesheet" href="../assets/style.css">
|
||||
</head>
|
||||
<body>
|
||||
{self._header_html("gaps")}
|
||||
<div class="container">
|
||||
|
||||
<h2 style="margin-bottom:16px">Coverage Gaps</h2>
|
||||
<p class="dim" style="margin-bottom:20px">Areas, problems, or technical challenges not adequately addressed by existing standards documents.</p>
|
||||
|
||||
<div class="controls">
|
||||
<div class="controls-row">
|
||||
<select id="sevFilter" style="padding:8px;border:1px solid var(--border);border-radius:6px;font-size:0.85rem">
|
||||
<option value="">All severities</option>
|
||||
<option value="critical">Critical</option>
|
||||
<option value="high">High</option>
|
||||
<option value="medium">Medium</option>
|
||||
<option value="low">Low</option>
|
||||
</select>
|
||||
<input type="text" class="search-box" id="gapSearch" placeholder="Filter gaps..." style="max-width:400px">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="gapsList"></div>
|
||||
|
||||
<h2 style="margin:32px 0 16px">Gap History</h2>
|
||||
<p class="dim" style="margin-bottom:20px">How gaps have evolved across observatory snapshots.</p>
|
||||
<div class="panel">
|
||||
<table>
|
||||
<thead>
|
||||
<tr><th>Snapshot</th><th>Topic</th><th>Severity</th><th>Status</th></tr>
|
||||
</thead>
|
||||
<tbody id="historyBody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
|
||||
|
||||
let GAPS_DATA = null;
|
||||
|
||||
function renderGaps() {{
|
||||
const sev = document.getElementById('sevFilter').value;
|
||||
const q = document.getElementById('gapSearch').value.toLowerCase().trim();
|
||||
const list = document.getElementById('gapsList');
|
||||
list.innerHTML = '';
|
||||
|
||||
let current = GAPS_DATA.current || [];
|
||||
if (sev) current = current.filter(g => g.severity === sev);
|
||||
if (q) current = current.filter(g => (g.topic + ' ' + g.description + ' ' + (g.category || '')).toLowerCase().includes(q));
|
||||
|
||||
if (current.length === 0) {{
|
||||
list.innerHTML = '<p class="dim" style="padding:16px">No gaps match the current filters.</p>';
|
||||
return;
|
||||
}}
|
||||
|
||||
const order = {{'critical': 0, 'high': 1, 'medium': 2, 'low': 3}};
|
||||
current.sort((a, b) => (order[a.severity] || 2) - (order[b.severity] || 2));
|
||||
|
||||
current.forEach(g => {{
|
||||
const cls = (g.severity === 'critical' || g.severity === 'high') ? g.severity : '';
|
||||
list.innerHTML +=
|
||||
'<div class="gap-card ' + cls + '">' +
|
||||
'<h3>' + escHtml(g.topic) + '</h3>' +
|
||||
'<p>' + escHtml(g.description) + '</p>' +
|
||||
'<div class="meta">' +
|
||||
'<span class="sev-' + g.severity + '">' + (g.severity || 'medium').toUpperCase() + '</span>' +
|
||||
(g.category ? ' · ' + escHtml(g.category) : '') +
|
||||
(g.evidence ? '<br><em>' + escHtml(g.evidence) + '</em>' : '') +
|
||||
'</div></div>';
|
||||
}});
|
||||
}}
|
||||
|
||||
async function init() {{
|
||||
GAPS_DATA = await fetch('../data/gaps.json').then(r => r.json());
|
||||
|
||||
document.getElementById('sevFilter').onchange = renderGaps;
|
||||
document.getElementById('gapSearch').oninput = renderGaps;
|
||||
renderGaps();
|
||||
|
||||
// History table
|
||||
const history = GAPS_DATA.history || [];
|
||||
const tbody = document.getElementById('historyBody');
|
||||
if (history.length === 0) {{
|
||||
tbody.innerHTML = '<tr><td colspan="4" class="dim">No history recorded yet.</td></tr>';
|
||||
}} else {{
|
||||
history.slice(-50).reverse().forEach(h => {{
|
||||
const tr = document.createElement('tr');
|
||||
tr.innerHTML =
|
||||
'<td class="dim">' + (h.snapshot_at || h.recorded_at || '').substring(0, 10) + '</td>' +
|
||||
'<td>' + escHtml(h.gap_topic) + '</td>' +
|
||||
'<td><span class="sev-' + (h.severity || 'medium') + '">' + (h.severity || 'medium').toUpperCase() + '</span></td>' +
|
||||
'<td>' + escHtml(h.status || 'open') + '</td>';
|
||||
tbody.appendChild(tr);
|
||||
}});
|
||||
}}
|
||||
}}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
(self.output_dir / "observatory" / "gaps.html").write_text(html)
|
||||
|
||||
# ── Timeline page ───────────────────────────────────────────────────────
|
||||
|
||||
def _generate_timeline_page(self) -> None:
|
||||
"""Submission timeline across sources."""
|
||||
html = f"""\
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Timeline - Living Standards Observatory</title>
|
||||
<link rel="stylesheet" href="../assets/style.css">
|
||||
<style>
|
||||
.tl-row {{ display: flex; align-items: center; gap: 8px; padding: 6px 0; border-bottom: 1px solid #f0f0f0; }}
|
||||
.tl-month {{ min-width: 80px; font-size: 0.82rem; color: var(--text-dim); font-family: monospace; }}
|
||||
.tl-bars {{ flex: 1; display: flex; gap: 1px; align-items: center; }}
|
||||
.tl-count {{ min-width: 30px; text-align: right; font-size: 0.78rem; color: var(--text-dim); }}
|
||||
.legend {{ display: flex; gap: 16px; flex-wrap: wrap; margin-bottom: 16px; }}
|
||||
.legend-item {{ display: flex; align-items: center; gap: 4px; font-size: 0.8rem; }}
|
||||
.legend-swatch {{ width: 14px; height: 14px; border-radius: 3px; }}
|
||||
.view-toggle {{ display: flex; gap: 8px; margin-bottom: 16px; }}
|
||||
.view-btn {{ padding: 6px 16px; border: 1px solid var(--border); border-radius: 6px; background: var(--card-bg); cursor: pointer; font-size: 0.82rem; }}
|
||||
.view-btn.active {{ background: var(--accent); color: #fff; border-color: var(--accent); }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
{self._header_html("timeline")}
|
||||
<div class="container">
|
||||
|
||||
<h2 style="margin-bottom:8px">Submission Timeline</h2>
|
||||
<p class="dim" style="margin-bottom:20px">Monthly document submissions across standards bodies and categories.</p>
|
||||
|
||||
<div class="view-toggle">
|
||||
<button class="view-btn active" id="btnSource" onclick="setView('source')">By Source</button>
|
||||
<button class="view-btn" id="btnCategory" onclick="setView('category')">By Category</button>
|
||||
</div>
|
||||
|
||||
<div class="legend" id="legend"></div>
|
||||
|
||||
<div class="chart-container" id="timeline"></div>
|
||||
|
||||
<div class="panel">
|
||||
<div class="panel-header">Monthly Totals</div>
|
||||
<table>
|
||||
<thead><tr><th>Month</th><th>Total</th><th id="breakdownHeader">By Source</th></tr></thead>
|
||||
<tbody id="monthTable"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function escHtml(s) {{ const d = document.createElement('div'); d.textContent = s || ''; return d.innerHTML; }}
|
||||
|
||||
const COLORS_SOURCE = {{'ietf': '#4a6cf7', 'w3c': '#ef4444', 'ieee': '#10b981', 'other': '#9ca3af'}};
|
||||
const COLORS_CAT = [
|
||||
'#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A',
|
||||
'#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52',
|
||||
'#7C8CF5', '#FF8C69', '#66CDAA', '#BA55D3', '#FFD700',
|
||||
];
|
||||
|
||||
let TL_DATA = null;
|
||||
let currentView = 'source';
|
||||
|
||||
function setView(view) {{
|
||||
currentView = view;
|
||||
document.getElementById('btnSource').className = 'view-btn' + (view === 'source' ? ' active' : '');
|
||||
document.getElementById('btnCategory').className = 'view-btn' + (view === 'category' ? ' active' : '');
|
||||
document.getElementById('breakdownHeader').textContent = view === 'source' ? 'By Source' : 'By Category';
|
||||
renderTimeline();
|
||||
}}
|
||||
|
||||
function renderTimeline() {{
|
||||
if (!TL_DATA) return;
|
||||
const months = TL_DATA.months;
|
||||
const isSource = currentView === 'source';
|
||||
const dataMap = isSource ? TL_DATA.by_source : TL_DATA.by_category;
|
||||
const keys = isSource ? TL_DATA.sources : TL_DATA.categories;
|
||||
|
||||
// Assign colors
|
||||
const colorMap = {{}};
|
||||
if (isSource) {{
|
||||
keys.forEach(k => {{ colorMap[k] = COLORS_SOURCE[k] || '#9ca3af'; }});
|
||||
}} else {{
|
||||
keys.forEach((k, i) => {{ colorMap[k] = COLORS_CAT[i % COLORS_CAT.length]; }});
|
||||
}}
|
||||
|
||||
// Max for scaling
|
||||
let maxTotal = 0;
|
||||
months.forEach(m => {{
|
||||
const d = dataMap[m] || {{}};
|
||||
let t = 0;
|
||||
keys.forEach(k => {{ t += d[k] || 0; }});
|
||||
if (t > maxTotal) maxTotal = t;
|
||||
}});
|
||||
const scale = maxTotal > 0 ? 500 / maxTotal : 1;
|
||||
|
||||
// Legend
|
||||
const legendEl = document.getElementById('legend');
|
||||
legendEl.innerHTML = '';
|
||||
keys.forEach(k => {{
|
||||
legendEl.innerHTML += '<div class="legend-item"><div class="legend-swatch" style="background:' + colorMap[k] + '"></div>' + escHtml(k) + '</div>';
|
||||
}});
|
||||
|
||||
// Chart
|
||||
const container = document.getElementById('timeline');
|
||||
container.innerHTML = '';
|
||||
months.forEach(m => {{
|
||||
const d = dataMap[m] || {{}};
|
||||
let total = 0;
|
||||
keys.forEach(k => {{ total += d[k] || 0; }});
|
||||
|
||||
let barsHtml = '';
|
||||
keys.forEach(k => {{
|
||||
const v = d[k] || 0;
|
||||
if (v > 0) {{
|
||||
const w = Math.max(v * scale, 2);
|
||||
barsHtml += '<div class="tl-bar" style="width:' + w + 'px;background:' + colorMap[k] + '" title="' + escHtml(k) + ': ' + v + '"></div>';
|
||||
}}
|
||||
}});
|
||||
|
||||
container.innerHTML += '<div class="tl-row"><span class="tl-month">' + m + '</span><div class="tl-bars">' + barsHtml + '</div><span class="tl-count">' + total + '</span></div>';
|
||||
}});
|
||||
|
||||
// Table
|
||||
const tbody = document.getElementById('monthTable');
|
||||
tbody.innerHTML = '';
|
||||
[...months].reverse().forEach(m => {{
|
||||
const d = dataMap[m] || {{}};
|
||||
let total = 0;
|
||||
const parts = [];
|
||||
keys.forEach(k => {{
|
||||
const v = d[k] || 0;
|
||||
total += v;
|
||||
if (v > 0) parts.push(k + ': ' + v);
|
||||
}});
|
||||
if (total > 0) {{
|
||||
const tr = document.createElement('tr');
|
||||
tr.innerHTML = '<td class="dim">' + m + '</td><td>' + total + '</td><td class="dim">' + parts.join(', ') + '</td>';
|
||||
tbody.appendChild(tr);
|
||||
}}
|
||||
}});
|
||||
}}
|
||||
|
||||
async function init() {{
|
||||
TL_DATA = await fetch('../data/timeline.json').then(r => r.json());
|
||||
renderTimeline();
|
||||
}}
|
||||
init();
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
(self.output_dir / "observatory" / "timeline.html").write_text(html)
|
||||
@@ -10,7 +10,7 @@ from pathlib import Path
|
||||
import numpy as np
|
||||
|
||||
from .config import Config
|
||||
from .models import Author, Draft, Rating
|
||||
from .models import Author, Draft, Rating, normalize_category
|
||||
|
||||
SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS drafts (
|
||||
@@ -117,6 +117,73 @@ CREATE TABLE IF NOT EXISTS gaps (
|
||||
analyzed_at TEXT
|
||||
);
|
||||
|
||||
-- Cross-references (RFC, draft, BCP references found in draft text)
|
||||
CREATE TABLE IF NOT EXISTS draft_refs (
|
||||
draft_name TEXT NOT NULL REFERENCES drafts(name),
|
||||
ref_type TEXT NOT NULL, -- 'rfc', 'draft', 'bcp'
|
||||
ref_id TEXT NOT NULL, -- e.g. '8259', 'draft-ietf-httpbis-semantics', 'BCP14'
|
||||
UNIQUE(draft_name, ref_type, ref_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_draft_refs_ref ON draft_refs(ref_type, ref_id);
|
||||
|
||||
-- Generated drafts from gap-to-draft pipeline
|
||||
CREATE TABLE IF NOT EXISTS generated_drafts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
gap_topic TEXT NOT NULL,
|
||||
draft_name TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
abstract TEXT NOT NULL DEFAULT '',
|
||||
outline_json TEXT DEFAULT '{}',
|
||||
sections_json TEXT DEFAULT '[]',
|
||||
full_text TEXT,
|
||||
family_name TEXT DEFAULT '',
|
||||
family_role TEXT DEFAULT '',
|
||||
version INTEGER DEFAULT 0,
|
||||
rating_json TEXT DEFAULT '{}',
|
||||
novelty_score REAL DEFAULT 0.0,
|
||||
quality_score REAL DEFAULT 0.0,
|
||||
status TEXT DEFAULT 'draft',
|
||||
created_at TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS generation_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
family_name TEXT DEFAULT '',
|
||||
gap_ids TEXT DEFAULT '[]',
|
||||
total_input_tokens INTEGER DEFAULT 0,
|
||||
total_output_tokens INTEGER DEFAULT 0,
|
||||
model_used TEXT DEFAULT '',
|
||||
status TEXT DEFAULT 'running',
|
||||
started_at TEXT,
|
||||
completed_at TEXT
|
||||
);
|
||||
|
||||
-- Observatory tables
|
||||
CREATE TABLE IF NOT EXISTS sources (
|
||||
name TEXT PRIMARY KEY,
|
||||
last_fetch TEXT,
|
||||
doc_count INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS observatory_snapshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
snapshot_at TEXT NOT NULL,
|
||||
total_docs INTEGER DEFAULT 0,
|
||||
new_since_last INTEGER DEFAULT 0,
|
||||
changed_gaps INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS gap_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
snapshot_id INTEGER REFERENCES observatory_snapshots(id),
|
||||
gap_topic TEXT NOT NULL,
|
||||
gap_description TEXT NOT NULL,
|
||||
severity TEXT DEFAULT 'medium',
|
||||
status TEXT DEFAULT 'open',
|
||||
recorded_at TEXT
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS index in sync
|
||||
CREATE TRIGGER IF NOT EXISTS drafts_ai AFTER INSERT ON drafts BEGIN
|
||||
INSERT INTO drafts_fts(rowid, name, title, abstract, full_text)
|
||||
@@ -152,8 +219,23 @@ class Database:
|
||||
self._conn.execute("PRAGMA journal_mode=WAL")
|
||||
self._conn.execute("PRAGMA foreign_keys=ON")
|
||||
self._conn.executescript(SCHEMA)
|
||||
self._migrate_schema()
|
||||
return self._conn
|
||||
|
||||
def _migrate_schema(self) -> None:
|
||||
"""Additive migration — add columns if missing."""
|
||||
cols = {r[1] for r in self._conn.execute("PRAGMA table_info(drafts)").fetchall()}
|
||||
migrations = [
|
||||
("source", "TEXT DEFAULT 'ietf'"),
|
||||
("source_id", "TEXT DEFAULT ''"),
|
||||
("source_url", "TEXT DEFAULT ''"),
|
||||
("doc_status", "TEXT DEFAULT ''"),
|
||||
]
|
||||
for col, typedef in migrations:
|
||||
if col not in cols:
|
||||
self._conn.execute(f"ALTER TABLE drafts ADD COLUMN {col} {typedef}")
|
||||
self._conn.commit()
|
||||
|
||||
def close(self) -> None:
|
||||
if self._conn:
|
||||
self._conn.close()
|
||||
@@ -303,7 +385,7 @@ class Database:
|
||||
novelty_note=r["novelty_note"], maturity_note=r["maturity_note"],
|
||||
overlap_note=r["overlap_note"], momentum_note=r["momentum_note"],
|
||||
relevance_note=r["relevance_note"],
|
||||
categories=json.loads(r["r_categories"]) if r["r_categories"] else [],
|
||||
categories=[normalize_category(c) for c in json.loads(r["r_categories"])] if r["r_categories"] else [],
|
||||
rated_at=r["rated_at"],
|
||||
)
|
||||
results.append((draft, rating))
|
||||
@@ -503,6 +585,30 @@ class Database:
|
||||
).fetchall()
|
||||
return [(r["org_a"], r["org_b"], r["shared"]) for r in rows]
|
||||
|
||||
def org_data_raw(self) -> list[tuple[str, int, str]]:
|
||||
"""Return (affiliation, person_id, draft_name) for all draft_authors with affiliation."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT affiliation, person_id, draft_name FROM draft_authors WHERE affiliation != ''"
|
||||
).fetchall()
|
||||
return [(r[0], r[1], r[2]) for r in rows]
|
||||
|
||||
def author_draft_counts(self) -> dict[int, int]:
|
||||
"""Return {person_id: draft_count} for all authors."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT person_id, COUNT(*) FROM draft_authors GROUP BY person_id"
|
||||
).fetchall()
|
||||
return {r[0]: r[1] for r in rows}
|
||||
|
||||
def author_draft_sets(self) -> dict[int, set[str]]:
|
||||
"""Return {person_id: set(draft_names)} for all authors."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT person_id, draft_name FROM draft_authors"
|
||||
).fetchall()
|
||||
result: dict[int, set[str]] = {}
|
||||
for r in rows:
|
||||
result.setdefault(r[0], set()).add(r[1])
|
||||
return result
|
||||
|
||||
# --- Ideas ---
|
||||
|
||||
def insert_ideas(self, draft_name: str, ideas: list[dict]) -> None:
|
||||
@@ -529,7 +635,9 @@ class Database:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.name FROM drafts d
|
||||
LEFT JOIN ideas i ON d.name = i.draft_name
|
||||
WHERE i.draft_name IS NULL
|
||||
LEFT JOIN llm_cache lc ON d.name = lc.draft_name
|
||||
AND lc.request_json LIKE 'batch-ideas[%'
|
||||
WHERE i.draft_name IS NULL AND lc.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
@@ -565,6 +673,314 @@ class Database:
|
||||
"category": r["category"], "evidence": r["evidence"],
|
||||
"severity": r["severity"]} for r in rows]
|
||||
|
||||
# --- Refs ---
|
||||
|
||||
def insert_refs(self, draft_name: str, refs: list[tuple[str, str]]) -> None:
|
||||
"""Insert cross-references for a draft. refs = [(ref_type, ref_id), ...]."""
|
||||
for ref_type, ref_id in refs:
|
||||
self.conn.execute(
|
||||
"""INSERT OR IGNORE INTO draft_refs (draft_name, ref_type, ref_id)
|
||||
VALUES (?, ?, ?)""",
|
||||
(draft_name, ref_type, ref_id),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_refs_for_draft(self, draft_name: str) -> list[tuple[str, str]]:
|
||||
"""Return [(ref_type, ref_id)] for a draft."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT ref_type, ref_id FROM draft_refs WHERE draft_name = ?",
|
||||
(draft_name,),
|
||||
).fetchall()
|
||||
return [(r["ref_type"], r["ref_id"]) for r in rows]
|
||||
|
||||
def top_referenced(self, ref_type: str = "rfc", limit: int = 30) -> list[tuple[str, int, list[str]]]:
|
||||
"""Return (ref_id, count, [draft_names]) for most-referenced items."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT ref_id, COUNT(*) as cnt,
|
||||
GROUP_CONCAT(draft_name, '||') as drafts
|
||||
FROM draft_refs
|
||||
WHERE ref_type = ?
|
||||
GROUP BY ref_id
|
||||
ORDER BY cnt DESC
|
||||
LIMIT ?""",
|
||||
(ref_type, limit),
|
||||
).fetchall()
|
||||
return [
|
||||
(r["ref_id"], r["cnt"], r["drafts"].split("||") if r["drafts"] else [])
|
||||
for r in rows
|
||||
]
|
||||
|
||||
def drafts_referencing(self, ref_type: str, ref_id: str) -> list[str]:
|
||||
"""Return draft names that reference a specific RFC/draft/BCP."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT draft_name FROM draft_refs WHERE ref_type = ? AND ref_id = ?",
|
||||
(ref_type, ref_id),
|
||||
).fetchall()
|
||||
return [r["draft_name"] for r in rows]
|
||||
|
||||
def ref_counts_by_draft(self) -> list[tuple[str, int, int, int]]:
|
||||
"""Return (draft_name, rfc_count, draft_count, bcp_count) for all drafts with refs."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT draft_name,
|
||||
SUM(CASE WHEN ref_type = 'rfc' THEN 1 ELSE 0 END) as rfcs,
|
||||
SUM(CASE WHEN ref_type = 'draft' THEN 1 ELSE 0 END) as drafts,
|
||||
SUM(CASE WHEN ref_type = 'bcp' THEN 1 ELSE 0 END) as bcps
|
||||
FROM draft_refs
|
||||
GROUP BY draft_name
|
||||
ORDER BY rfcs DESC"""
|
||||
).fetchall()
|
||||
return [(r["draft_name"], r["rfcs"], r["drafts"], r["bcps"]) for r in rows]
|
||||
|
||||
def drafts_without_refs(self, limit: int = 500) -> list[str]:
|
||||
"""Return draft names that have full_text but no refs extracted yet."""
|
||||
rows = self.conn.execute(
|
||||
"""SELECT d.name FROM drafts d
|
||||
LEFT JOIN draft_refs dr ON d.name = dr.draft_name
|
||||
WHERE d.full_text IS NOT NULL AND dr.draft_name IS NULL
|
||||
LIMIT ?""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [r["name"] for r in rows]
|
||||
|
||||
def ref_stats(self) -> dict:
|
||||
"""Return summary stats for refs table."""
|
||||
row = self.conn.execute(
|
||||
"""SELECT COUNT(DISTINCT draft_name) as drafts_with_refs,
|
||||
COUNT(*) as total_refs,
|
||||
SUM(CASE WHEN ref_type = 'rfc' THEN 1 ELSE 0 END) as rfc_refs,
|
||||
SUM(CASE WHEN ref_type = 'draft' THEN 1 ELSE 0 END) as draft_refs,
|
||||
SUM(CASE WHEN ref_type = 'bcp' THEN 1 ELSE 0 END) as bcp_refs,
|
||||
COUNT(DISTINCT ref_id) as unique_refs
|
||||
FROM draft_refs"""
|
||||
).fetchone()
|
||||
return dict(row)
|
||||
|
||||
# --- Generated Drafts ---
|
||||
|
||||
def upsert_generated_draft(self, data: dict) -> int:
|
||||
"""Insert or update a generated draft. Returns row id."""
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
existing = self.conn.execute(
|
||||
"SELECT id FROM generated_drafts WHERE draft_name = ? AND version = ?",
|
||||
(data["draft_name"], data.get("version", 0)),
|
||||
).fetchone()
|
||||
if existing:
|
||||
self.conn.execute(
|
||||
"""UPDATE generated_drafts SET
|
||||
gap_topic=?, title=?, abstract=?, outline_json=?,
|
||||
sections_json=?, full_text=?, family_name=?, family_role=?,
|
||||
rating_json=?, novelty_score=?, quality_score=?, status=?
|
||||
WHERE id=?""",
|
||||
(data["gap_topic"], data["title"], data.get("abstract", ""),
|
||||
json.dumps(data.get("outline", {})), json.dumps(data.get("sections", [])),
|
||||
data.get("full_text"), data.get("family_name", ""),
|
||||
data.get("family_role", ""), json.dumps(data.get("rating", {})),
|
||||
data.get("novelty_score", 0.0), data.get("quality_score", 0.0),
|
||||
data.get("status", "draft"), existing["id"]),
|
||||
)
|
||||
self.conn.commit()
|
||||
return existing["id"]
|
||||
else:
|
||||
cur = self.conn.execute(
|
||||
"""INSERT INTO generated_drafts
|
||||
(gap_topic, draft_name, title, abstract, outline_json, sections_json,
|
||||
full_text, family_name, family_role, version, rating_json,
|
||||
novelty_score, quality_score, status, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(data["gap_topic"], data["draft_name"], data["title"],
|
||||
data.get("abstract", ""), json.dumps(data.get("outline", {})),
|
||||
json.dumps(data.get("sections", [])), data.get("full_text"),
|
||||
data.get("family_name", ""), data.get("family_role", ""),
|
||||
data.get("version", 0), json.dumps(data.get("rating", {})),
|
||||
data.get("novelty_score", 0.0), data.get("quality_score", 0.0),
|
||||
data.get("status", "draft"), now),
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def get_generated_drafts(self, status: str | None = None) -> list[dict]:
|
||||
query = "SELECT * FROM generated_drafts"
|
||||
params: list = []
|
||||
if status:
|
||||
query += " WHERE status = ?"
|
||||
params.append(status)
|
||||
query += " ORDER BY created_at DESC"
|
||||
rows = self.conn.execute(query, params).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def get_generated_draft(self, draft_id: int) -> dict | None:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM generated_drafts WHERE id = ?", (draft_id,)
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def get_family_drafts(self, family_name: str) -> list[dict]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM generated_drafts WHERE family_name = ? ORDER BY family_role",
|
||||
(family_name,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def log_generation_run(self, data: dict) -> int:
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
cur = self.conn.execute(
|
||||
"""INSERT INTO generation_runs
|
||||
(family_name, gap_ids, total_input_tokens, total_output_tokens,
|
||||
model_used, status, started_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
(data.get("family_name", ""), json.dumps(data.get("gap_ids", [])),
|
||||
data.get("total_input_tokens", 0), data.get("total_output_tokens", 0),
|
||||
data.get("model_used", ""), data.get("status", "running"), now),
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def update_generation_run(self, run_id: int, **kwargs) -> None:
|
||||
sets = []
|
||||
params = []
|
||||
for k, v in kwargs.items():
|
||||
sets.append(f"{k} = ?")
|
||||
params.append(v)
|
||||
if not sets:
|
||||
return
|
||||
params.append(run_id)
|
||||
self.conn.execute(
|
||||
f"UPDATE generation_runs SET {', '.join(sets)} WHERE id = ?", params
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
# --- Observatory ---
|
||||
|
||||
def upsert_source(self, name: str, doc_count: int = 0) -> None:
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
self.conn.execute(
|
||||
"""INSERT INTO sources (name, last_fetch, doc_count)
|
||||
VALUES (?, ?, ?)
|
||||
ON CONFLICT(name) DO UPDATE SET last_fetch=excluded.last_fetch, doc_count=excluded.doc_count""",
|
||||
(name, now, doc_count),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def get_source(self, name: str) -> dict | None:
|
||||
row = self.conn.execute("SELECT * FROM sources WHERE name = ?", (name,)).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def all_sources(self) -> list[dict]:
|
||||
rows = self.conn.execute("SELECT * FROM sources ORDER BY name").fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def create_snapshot(self) -> int:
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
total = self.count_drafts()
|
||||
# Count new since last snapshot
|
||||
last = self.conn.execute(
|
||||
"SELECT snapshot_at FROM observatory_snapshots ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
new_count = 0
|
||||
if last:
|
||||
new_count = self.conn.execute(
|
||||
"SELECT COUNT(*) FROM drafts WHERE fetched_at > ?", (last["snapshot_at"],)
|
||||
).fetchone()[0]
|
||||
else:
|
||||
new_count = total
|
||||
cur = self.conn.execute(
|
||||
"""INSERT INTO observatory_snapshots (snapshot_at, total_docs, new_since_last, changed_gaps)
|
||||
VALUES (?, ?, ?, 0)""",
|
||||
(now, total, new_count),
|
||||
)
|
||||
self.conn.commit()
|
||||
return cur.lastrowid
|
||||
|
||||
def record_gap_history(self, snapshot_id: int, gaps: list[dict]) -> None:
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
for g in gaps:
|
||||
self.conn.execute(
|
||||
"""INSERT INTO gap_history (snapshot_id, gap_topic, gap_description, severity, status, recorded_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||
(snapshot_id, g["topic"], g["description"],
|
||||
g.get("severity", "medium"), g.get("status", "open"), now),
|
||||
)
|
||||
self.conn.commit()
|
||||
|
||||
def gap_history_timeline(self) -> list[dict]:
|
||||
rows = self.conn.execute(
|
||||
"""SELECT gh.*, os.snapshot_at FROM gap_history gh
|
||||
JOIN observatory_snapshots os ON gh.snapshot_id = os.id
|
||||
ORDER BY os.snapshot_at, gh.gap_topic"""
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def get_snapshots(self, limit: int = 20) -> list[dict]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM observatory_snapshots ORDER BY id DESC LIMIT ?", (limit,)
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
def drafts_by_source(self, source: str, limit: int = 500) -> list[Draft]:
|
||||
rows = self.conn.execute(
|
||||
"SELECT * FROM drafts WHERE source = ? ORDER BY time DESC LIMIT ?",
|
||||
(source, limit),
|
||||
).fetchall()
|
||||
return [self._row_to_draft(r) for r in rows]
|
||||
|
||||
# --- WG/Status ---
|
||||
|
||||
def draft_adoption_status(self) -> list[dict]:
|
||||
"""Return adoption status for all drafts based on naming convention.
|
||||
|
||||
Returns list of dicts: {name, title, time, wg_adopted, wg_name, stream}
|
||||
"""
|
||||
import re
|
||||
rows = self.conn.execute(
|
||||
'SELECT name, title, time FROM drafts'
|
||||
).fetchall()
|
||||
results = []
|
||||
for r in rows:
|
||||
name = r["name"]
|
||||
wg_adopted = False
|
||||
wg_name = ""
|
||||
stream = "individual"
|
||||
|
||||
# Primary signal: draft-ietf-{wg}-* naming convention
|
||||
m = re.match(r'^draft-ietf-(\w+)-', name)
|
||||
if m:
|
||||
wg_adopted = True
|
||||
wg_name = m.group(1)
|
||||
stream = "ietf"
|
||||
elif name.startswith("draft-irtf-"):
|
||||
m2 = re.match(r'^draft-irtf-(\w+)-', name)
|
||||
wg_name = m2.group(1) if m2 else ""
|
||||
stream = "irtf"
|
||||
|
||||
results.append({
|
||||
"name": name,
|
||||
"title": r["title"],
|
||||
"time": r["time"],
|
||||
"wg_adopted": wg_adopted,
|
||||
"wg_name": wg_name,
|
||||
"stream": stream,
|
||||
})
|
||||
return results
|
||||
|
||||
def revision_velocity(self) -> list[dict]:
|
||||
"""Return revision data for all drafts.
|
||||
|
||||
Returns list of dicts: {name, title, time, rev, rev_int}
|
||||
"""
|
||||
rows = self.conn.execute(
|
||||
"SELECT name, title, time, rev FROM drafts"
|
||||
).fetchall()
|
||||
return [
|
||||
{
|
||||
"name": r["name"],
|
||||
"title": r["title"],
|
||||
"time": r["time"],
|
||||
"rev": r["rev"],
|
||||
"rev_int": int(r["rev"]) if r["rev"].isdigit() else 0,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
@staticmethod
|
||||
@@ -580,11 +996,16 @@ class Database:
|
||||
categories=json.loads(d.get("categories") or "[]"),
|
||||
tags=json.loads(d.get("tags") or "[]"),
|
||||
fetched_at=d.get("fetched_at"),
|
||||
source=d.get("source", "ietf"),
|
||||
source_id=d.get("source_id", ""),
|
||||
source_url=d.get("source_url", ""),
|
||||
doc_status=d.get("doc_status", ""),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _row_to_rating(row: sqlite3.Row) -> Rating:
|
||||
d = dict(row)
|
||||
raw_cats = json.loads(d.get("categories") or "[]")
|
||||
return Rating(
|
||||
draft_name=d["draft_name"], novelty=d["novelty"], maturity=d["maturity"],
|
||||
overlap=d["overlap"], momentum=d["momentum"], relevance=d["relevance"],
|
||||
@@ -594,6 +1015,6 @@ class Database:
|
||||
overlap_note=d.get("overlap_note", ""),
|
||||
momentum_note=d.get("momentum_note", ""),
|
||||
relevance_note=d.get("relevance_note", ""),
|
||||
categories=json.loads(d.get("categories") or "[]"),
|
||||
categories=[normalize_category(c) for c in raw_cats],
|
||||
rated_at=d.get("rated_at"),
|
||||
)
|
||||
|
||||
@@ -5,6 +5,24 @@ from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
# Map old verbose category names to canonical short names
|
||||
CATEGORY_NORMALIZE: dict[str, str] = {
|
||||
"Agent-to-agent communication protocols": "A2A protocols",
|
||||
"AI safety / guardrails / alignment": "AI safety/alignment",
|
||||
"ML-based traffic management / optimization": "ML traffic mgmt",
|
||||
"Autonomous network operations": "Autonomous netops",
|
||||
"Identity / authentication for AI agents": "Agent identity/auth",
|
||||
"Data formats / semantics for AI interop": "Data formats/interop",
|
||||
"Policy / governance / ethical frameworks": "Policy/governance",
|
||||
"AI model serving / inference protocols": "Model serving/inference",
|
||||
"Agent discovery / registration": "Agent discovery/reg",
|
||||
}
|
||||
|
||||
|
||||
def normalize_category(cat: str) -> str:
|
||||
"""Normalize a category name to its canonical short form."""
|
||||
return CATEGORY_NORMALIZE.get(cat, cat)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Author:
|
||||
@@ -36,6 +54,10 @@ class Draft:
|
||||
categories: list[str] = field(default_factory=list)
|
||||
tags: list[str] = field(default_factory=list)
|
||||
fetched_at: str | None = None
|
||||
source: str = "ietf"
|
||||
source_id: str = ""
|
||||
source_url: str = ""
|
||||
doc_status: str = ""
|
||||
|
||||
@property
|
||||
def text_url(self) -> str:
|
||||
|
||||
286
src/ietf_analyzer/observatory.py
Normal file
286
src/ietf_analyzer/observatory.py
Normal file
@@ -0,0 +1,286 @@
|
||||
"""Observatory — orchestrates periodic update cycles across sources."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
MofNCompleteColumn,
|
||||
Progress,
|
||||
SpinnerColumn,
|
||||
TextColumn,
|
||||
)
|
||||
|
||||
from .config import Config
|
||||
from .db import Database
|
||||
from .models import Draft
|
||||
from .sources import get_fetcher
|
||||
from .sources.base import SourceDocument
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _doc_to_draft(doc: SourceDocument) -> Draft:
|
||||
"""Convert a SourceDocument to a Draft for DB storage."""
|
||||
extra = doc.extra or {}
|
||||
return Draft(
|
||||
name=doc.name,
|
||||
rev=extra.get("rev", "00"),
|
||||
title=doc.title,
|
||||
abstract=doc.abstract,
|
||||
time=doc.time,
|
||||
dt_id=int(doc.source_id) if doc.source_id and doc.source_id.isdigit() else None,
|
||||
pages=extra.get("pages"),
|
||||
words=extra.get("words"),
|
||||
group=extra.get("group"),
|
||||
group_uri=extra.get("group_uri"),
|
||||
expires=extra.get("expires"),
|
||||
ad=extra.get("ad"),
|
||||
shepherd=extra.get("shepherd"),
|
||||
states=extra.get("states", []),
|
||||
full_text=doc.full_text,
|
||||
fetched_at=datetime.now(timezone.utc).isoformat(),
|
||||
source=doc.source,
|
||||
source_id=doc.source_id,
|
||||
source_url=doc.source_url,
|
||||
doc_status=doc.doc_status,
|
||||
)
|
||||
|
||||
|
||||
class Observatory:
|
||||
"""Orchestrates the full observatory update cycle."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Config | None = None,
|
||||
db: Database | None = None,
|
||||
analyzer=None,
|
||||
):
|
||||
self.config = config or Config.load()
|
||||
self.db = db or Database(self.config)
|
||||
self._analyzer = analyzer
|
||||
|
||||
@property
|
||||
def analyzer(self):
|
||||
"""Lazy-load analyzer to avoid Anthropic key requirement for status/diff."""
|
||||
if self._analyzer is None:
|
||||
from .analyzer import Analyzer
|
||||
|
||||
self._analyzer = Analyzer(self.config, self.db)
|
||||
return self._analyzer
|
||||
|
||||
def update(
|
||||
self,
|
||||
sources: list[str] | None = None,
|
||||
full: bool = False,
|
||||
) -> dict:
|
||||
"""Full update cycle.
|
||||
|
||||
1. Snapshot current state
|
||||
2. Fetch from enabled sources (delta by default)
|
||||
3. Analyze unrated docs (Claude, with caching)
|
||||
4. Embed missing docs (Ollama)
|
||||
5. Extract ideas from new docs
|
||||
6. Re-run gap analysis if >= 5 new docs
|
||||
7. Record gap changes in gap_history
|
||||
8. Return summary stats
|
||||
"""
|
||||
sources = sources or self.config.observatory_sources
|
||||
stats: dict = {"sources": {}, "new_docs": 0, "analyzed": 0, "embedded": 0, "ideas": 0, "gaps_changed": False}
|
||||
|
||||
# 1. Snapshot current state
|
||||
console.print("[bold]1/7[/] Creating snapshot...")
|
||||
snapshot_id = self.db.create_snapshot()
|
||||
|
||||
# 2. Fetch from enabled sources
|
||||
console.print("[bold]2/7[/] Fetching from sources...")
|
||||
total_new = 0
|
||||
for src_name in sources:
|
||||
new_count = self._fetch_source(src_name, full=full)
|
||||
stats["sources"][src_name] = new_count
|
||||
total_new += new_count
|
||||
stats["new_docs"] = total_new
|
||||
console.print(f" Fetched [bold green]{total_new}[/] new documents total")
|
||||
|
||||
# 3. Analyze unrated docs
|
||||
console.print("[bold]3/7[/] Analyzing unrated documents...")
|
||||
analyzed = self.analyzer.rate_all_unrated(limit=200, batch_size=5)
|
||||
stats["analyzed"] = analyzed
|
||||
|
||||
# 4. Embed missing docs
|
||||
console.print("[bold]4/7[/] Embedding missing documents...")
|
||||
embedded = self._embed_missing()
|
||||
stats["embedded"] = embedded
|
||||
|
||||
# 5. Extract ideas from new docs
|
||||
console.print("[bold]5/7[/] Extracting ideas...")
|
||||
ideas = self.analyzer.extract_all_ideas(limit=200, batch_size=5, cheap=True)
|
||||
stats["ideas"] = ideas
|
||||
|
||||
# 6. Re-run gap analysis if enough new docs
|
||||
if total_new >= 5:
|
||||
console.print("[bold]6/7[/] Re-running gap analysis...")
|
||||
gaps = self.analyzer.gap_analysis()
|
||||
if gaps:
|
||||
self.db.record_gap_history(snapshot_id, gaps)
|
||||
stats["gaps_changed"] = True
|
||||
console.print(f" Found [bold]{len(gaps)}[/] gaps")
|
||||
else:
|
||||
console.print(f"[bold]6/7[/] Skipping gap analysis ({total_new} < 5 new docs)")
|
||||
# Record current gaps unchanged
|
||||
current_gaps = self.db.all_gaps()
|
||||
if current_gaps:
|
||||
self.db.record_gap_history(snapshot_id, current_gaps)
|
||||
|
||||
# 7. Update source records
|
||||
console.print("[bold]7/7[/] Updating source records...")
|
||||
for src_name in sources:
|
||||
count = len(self.db.drafts_by_source(src_name, limit=10000))
|
||||
self.db.upsert_source(src_name, doc_count=count)
|
||||
|
||||
console.print("\n[bold green]Observatory update complete![/]")
|
||||
console.print(f" New docs: {total_new} | Analyzed: {analyzed} | Embedded: {embedded} | Ideas: {ideas}")
|
||||
return stats
|
||||
|
||||
def _fetch_source(self, source_name: str, full: bool = False) -> int:
|
||||
"""Fetch documents from a single source. Returns count of new docs."""
|
||||
fetcher = get_fetcher(source_name, self.config)
|
||||
try:
|
||||
# Delta fetch: only since last fetch unless full=True
|
||||
since = None
|
||||
if not full:
|
||||
src = self.db.get_source(source_name)
|
||||
if src and src.get("last_fetch"):
|
||||
since = src["last_fetch"][:10] # Date portion only
|
||||
|
||||
docs = fetcher.search(self.config.search_keywords, since=since)
|
||||
|
||||
new_count = 0
|
||||
for doc in docs:
|
||||
existing = self.db.get_draft(doc.name)
|
||||
if existing is None:
|
||||
new_count += 1
|
||||
draft = _doc_to_draft(doc)
|
||||
self.db.upsert_draft(draft)
|
||||
|
||||
# Download text for docs missing it
|
||||
missing_text = [
|
||||
d for d in docs
|
||||
if self.db.get_draft(d.name) and self.db.get_draft(d.name).full_text is None
|
||||
]
|
||||
if missing_text:
|
||||
console.print(f" Downloading text for {len(missing_text)} {source_name} docs...")
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task(f"Downloading {source_name} texts...", total=len(missing_text))
|
||||
for doc in missing_text:
|
||||
text = fetcher.download_text(doc)
|
||||
if text:
|
||||
draft = self.db.get_draft(doc.name)
|
||||
if draft:
|
||||
draft.full_text = text
|
||||
self.db.upsert_draft(draft)
|
||||
progress.advance(task)
|
||||
|
||||
return new_count
|
||||
finally:
|
||||
fetcher.close()
|
||||
|
||||
def _embed_missing(self) -> int:
|
||||
"""Embed documents that don't have embeddings yet."""
|
||||
missing = self.db.drafts_without_embeddings(limit=500)
|
||||
if not missing:
|
||||
console.print(" All documents already embedded.")
|
||||
return 0
|
||||
|
||||
try:
|
||||
from .embeddings import Embedder
|
||||
|
||||
embedder = Embedder(self.config, self.db)
|
||||
except Exception as e:
|
||||
console.print(f" [yellow]Skipping embeddings (Ollama unavailable): {e}[/]")
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Embedding...", total=len(missing))
|
||||
for name in missing:
|
||||
try:
|
||||
vec = embedder.embed_draft(name)
|
||||
if vec is not None:
|
||||
count += 1
|
||||
except Exception:
|
||||
pass
|
||||
progress.advance(task)
|
||||
|
||||
console.print(f" Embedded [bold green]{count}[/] documents")
|
||||
return count
|
||||
|
||||
def status(self) -> dict:
|
||||
"""Current observatory state -- doc counts, sources, last update."""
|
||||
total = self.db.count_drafts()
|
||||
sources = self.db.all_sources()
|
||||
snapshots = self.db.get_snapshots(limit=1)
|
||||
gaps = self.db.all_gaps()
|
||||
|
||||
# Count by source
|
||||
source_counts = {}
|
||||
for src in sources:
|
||||
source_counts[src["name"]] = src["doc_count"]
|
||||
|
||||
# Unrated / unembedded
|
||||
unrated = len(self.db.unrated_drafts(limit=10000))
|
||||
unembedded = len(self.db.drafts_without_embeddings(limit=10000))
|
||||
|
||||
last_update = snapshots[0]["snapshot_at"] if snapshots else None
|
||||
|
||||
return {
|
||||
"total_docs": total,
|
||||
"sources": source_counts,
|
||||
"unrated": unrated,
|
||||
"unembedded": unembedded,
|
||||
"gaps": len(gaps),
|
||||
"last_update": last_update,
|
||||
"snapshots": len(self.db.get_snapshots(limit=100)),
|
||||
}
|
||||
|
||||
def diff(self, since: str | None = None) -> dict:
|
||||
"""What changed since a date -- new docs, gap changes."""
|
||||
if since is None:
|
||||
# Default to last snapshot
|
||||
snapshots = self.db.get_snapshots(limit=2)
|
||||
if len(snapshots) >= 2:
|
||||
since = snapshots[1]["snapshot_at"]
|
||||
else:
|
||||
since = "2000-01-01"
|
||||
|
||||
# New docs since date
|
||||
new_docs = self.db.conn.execute(
|
||||
"SELECT name, title, source, time FROM drafts WHERE fetched_at > ? ORDER BY time DESC",
|
||||
(since,),
|
||||
).fetchall()
|
||||
|
||||
# Gap changes
|
||||
gap_timeline = self.db.gap_history_timeline()
|
||||
recent_gaps = [g for g in gap_timeline if g.get("recorded_at", "") > since]
|
||||
|
||||
return {
|
||||
"since": since,
|
||||
"new_docs": [dict(r) for r in new_docs],
|
||||
"new_doc_count": len(new_docs),
|
||||
"gap_changes": recent_gaps,
|
||||
}
|
||||
291
src/ietf_analyzer/orgs.py
Normal file
291
src/ietf_analyzer/orgs.py
Normal file
@@ -0,0 +1,291 @@
|
||||
"""Organization normalization and team bloc detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .db import Database
|
||||
|
||||
# Maps raw affiliation strings to canonical org names.
|
||||
# Built from SELECT DISTINCT affiliation FROM draft_authors.
|
||||
ORG_ALIASES: dict[str, str] = {
|
||||
# Huawei
|
||||
"Huawei Technologies": "Huawei",
|
||||
"Huawei Technologies Co., Ltd.": "Huawei",
|
||||
"Huawei Technologies, Co., Ltd": "Huawei",
|
||||
"Huawei Tech": "Huawei",
|
||||
"Huawei Canada": "Huawei",
|
||||
"Huawei R&D": "Huawei",
|
||||
"Huawei Singapore": "Huawei",
|
||||
# Cisco
|
||||
"Cisco Systems": "Cisco",
|
||||
"Cisco Systems, Inc.": "Cisco",
|
||||
# Ericsson
|
||||
"Ericsson AB": "Ericsson",
|
||||
# RISE
|
||||
"RISE AB": "RISE",
|
||||
"RISE": "RISE",
|
||||
# Independent
|
||||
"Independent Researcher": "Independent",
|
||||
"Unaffiliated": "Independent",
|
||||
"Individual Contributor": "Independent",
|
||||
# Inria
|
||||
"INRIA": "Inria",
|
||||
# Google
|
||||
"Google LLC": "Google",
|
||||
"Google": "Google",
|
||||
# Apple
|
||||
"Apple Inc": "Apple",
|
||||
"Apple, Inc": "Apple",
|
||||
"Apple": "Apple",
|
||||
# Amazon
|
||||
"Amazon Web Services": "Amazon",
|
||||
"AWS": "Amazon",
|
||||
"Amazon": "Amazon",
|
||||
# Siemens
|
||||
"Siemens AG": "Siemens",
|
||||
# ZTE
|
||||
"ZTE": "ZTE Corporation",
|
||||
# Telefonica
|
||||
"Telefonica I+D": "Telefonica",
|
||||
# Deutsche Telekom
|
||||
"Deutsche Telecom": "Deutsche Telekom",
|
||||
# InterDigital
|
||||
"InterDigital Europe Ltd.": "InterDigital Europe",
|
||||
# Boeing
|
||||
"Boeing Technology Innovation": "Boeing",
|
||||
"Boeing Research & Technology": "Boeing",
|
||||
# Futurewei
|
||||
"Futurewei Technologies USA": "Futurewei",
|
||||
"Futurewei": "Futurewei",
|
||||
# IBM
|
||||
"IBM Research": "IBM",
|
||||
"IBM": "IBM",
|
||||
# China Telecom
|
||||
"China Telecom Research Institute": "China Telecom",
|
||||
# Beijing University (multiline variant from Datatracker)
|
||||
"Beijing University of Posts and\n Telecommunications": "BUPT",
|
||||
"Beijing University of Posts and Telecommunications": "BUPT",
|
||||
# AsiaInfo
|
||||
"AsiaInfo Technologies (China) Inc.": "AsiaInfo",
|
||||
"AsiaInfo Technologies (China) Inc": "AsiaInfo",
|
||||
# Dept of CS
|
||||
"Department of Computer Science and Engineering": "Department of Computer Science & Engineering",
|
||||
}
|
||||
|
||||
# Common suffixes to strip for fuzzy matching
|
||||
_SUFFIXES = [
|
||||
", Inc.", ", Inc", " Inc.", " Inc",
|
||||
" LLC", " Ltd.", " Ltd",
|
||||
" AB", " GmbH", " Corp",
|
||||
" Co., Ltd.", " Co., Ltd",
|
||||
" Technologies",
|
||||
]
|
||||
|
||||
|
||||
def normalize_org(raw: str) -> str:
|
||||
"""Normalize an affiliation string to a canonical org name."""
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
return ""
|
||||
# Exact match
|
||||
if raw in ORG_ALIASES:
|
||||
return ORG_ALIASES[raw]
|
||||
# Fuzzy: strip suffixes and check again
|
||||
stripped = raw
|
||||
for suffix in _SUFFIXES:
|
||||
if stripped.endswith(suffix):
|
||||
stripped = stripped[: -len(suffix)].strip()
|
||||
break
|
||||
if stripped in ORG_ALIASES:
|
||||
return ORG_ALIASES[stripped]
|
||||
if stripped != raw and stripped:
|
||||
# Check if the stripped form matches a canonical name directly
|
||||
for canonical in set(ORG_ALIASES.values()):
|
||||
if stripped.lower() == canonical.lower():
|
||||
return canonical
|
||||
return raw
|
||||
|
||||
|
||||
@dataclass
|
||||
class Bloc:
|
||||
"""A team of authors who consistently co-author together."""
|
||||
|
||||
members: list[tuple[int, str, str]] # (person_id, name, normalized_org)
|
||||
shared_drafts: int # drafts where >= 2 members co-author
|
||||
primary_org: str
|
||||
cohesion: float # avg pairwise cohesion
|
||||
|
||||
@property
|
||||
def member_pids(self) -> set[int]:
|
||||
return {pid for pid, _, _ in self.members}
|
||||
|
||||
@property
|
||||
def label(self) -> str:
|
||||
return f"{self.primary_org} team ({len(self.members)})"
|
||||
|
||||
|
||||
def detect_blocs(
|
||||
db: Database,
|
||||
cohesion_threshold: float = 0.70,
|
||||
min_size: int = 2,
|
||||
min_shared_drafts: int = 2,
|
||||
) -> list[Bloc]:
|
||||
"""Detect team blocs where all member pairs share >= threshold of drafts.
|
||||
|
||||
Uses connected components on a cohesion-filtered co-author graph,
|
||||
then merges overlapping groups into single blocs.
|
||||
"""
|
||||
draft_counts = db.author_draft_counts()
|
||||
draft_sets = db.author_draft_sets()
|
||||
|
||||
# Get enriched pair data with person_ids
|
||||
rows = db.conn.execute(
|
||||
"""SELECT a1.name, da1.person_id, a2.name, da2.person_id, COUNT(*) as shared
|
||||
FROM draft_authors da1
|
||||
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
|
||||
AND da1.person_id < da2.person_id
|
||||
JOIN authors a1 ON da1.person_id = a1.person_id
|
||||
JOIN authors a2 ON da2.person_id = a2.person_id
|
||||
GROUP BY da1.person_id, da2.person_id
|
||||
HAVING shared >= ?
|
||||
ORDER BY shared DESC""",
|
||||
(min_shared_drafts,),
|
||||
).fetchall()
|
||||
|
||||
# Get affiliations per person
|
||||
aff_rows = db.conn.execute(
|
||||
"SELECT person_id, affiliation FROM authors"
|
||||
).fetchall()
|
||||
person_aff = {r[0]: normalize_org(r[1]) for r in aff_rows}
|
||||
person_name: dict[int, str] = {}
|
||||
|
||||
# Build cohesion-filtered adjacency: only keep edges with high overlap
|
||||
adj: dict[int, set[int]] = defaultdict(set)
|
||||
pair_shared: dict[tuple[int, int], int] = {}
|
||||
pair_cohesion: dict[tuple[int, int], float] = {}
|
||||
|
||||
for r in rows:
|
||||
name_a, pid_a, name_b, pid_b, shared = r[0], r[1], r[2], r[3], r[4]
|
||||
person_name[pid_a] = name_a
|
||||
person_name[pid_b] = name_b
|
||||
min_d = min(draft_counts.get(pid_a, 1), draft_counts.get(pid_b, 1))
|
||||
cohesion = shared / min_d
|
||||
if cohesion >= cohesion_threshold:
|
||||
adj[pid_a].add(pid_b)
|
||||
adj[pid_b].add(pid_a)
|
||||
key = (min(pid_a, pid_b), max(pid_a, pid_b))
|
||||
pair_shared[key] = shared
|
||||
pair_cohesion[key] = cohesion
|
||||
|
||||
# Find connected components (each component = one merged bloc)
|
||||
visited: set[int] = set()
|
||||
components: list[set[int]] = []
|
||||
for pid in adj:
|
||||
if pid in visited:
|
||||
continue
|
||||
component: set[int] = set()
|
||||
stack = [pid]
|
||||
while stack:
|
||||
node = stack.pop()
|
||||
if node in visited:
|
||||
continue
|
||||
visited.add(node)
|
||||
component.add(node)
|
||||
stack.extend(adj[node] - visited)
|
||||
if len(component) >= min_size:
|
||||
components.append(component)
|
||||
|
||||
# Build Bloc objects from components
|
||||
blocs = []
|
||||
for comp in components:
|
||||
members = [
|
||||
(pid, person_name.get(pid, "?"), person_aff.get(pid, ""))
|
||||
for pid in comp
|
||||
]
|
||||
|
||||
# Shared drafts = drafts where >= 2 bloc members appear
|
||||
all_drafts: dict[str, int] = defaultdict(int)
|
||||
for pid in comp:
|
||||
for d in draft_sets.get(pid, set()):
|
||||
all_drafts[d] += 1
|
||||
shared_count = sum(1 for cnt in all_drafts.values() if cnt >= 2)
|
||||
|
||||
# Primary org = most common among members
|
||||
org_counts: dict[str, int] = defaultdict(int)
|
||||
for _, _, org in members:
|
||||
if org:
|
||||
org_counts[org] += 1
|
||||
primary = max(org_counts, key=org_counts.get) if org_counts else ""
|
||||
|
||||
# Average pairwise cohesion (only for connected pairs)
|
||||
edges = [
|
||||
pair_cohesion[key]
|
||||
for a in comp for b in comp if a < b
|
||||
for key in [(a, b)] if key in pair_cohesion
|
||||
]
|
||||
avg_coh = sum(edges) / len(edges) if edges else 0
|
||||
|
||||
blocs.append(Bloc(
|
||||
members=sorted(members, key=lambda m: -len(draft_sets.get(m[0], set()))),
|
||||
shared_drafts=shared_count,
|
||||
primary_org=primary,
|
||||
cohesion=avg_coh,
|
||||
))
|
||||
|
||||
# Sort: most shared drafts first (the interesting ones)
|
||||
blocs.sort(key=lambda b: (-b.shared_drafts, -len(b.members)))
|
||||
return blocs
|
||||
|
||||
|
||||
def top_orgs_normalized(
|
||||
db: Database, limit: int = 20
|
||||
) -> list[tuple[str, int, int]]:
|
||||
"""Return (canonical_org, unique_authors, unique_drafts) with merged orgs."""
|
||||
raw = db.org_data_raw()
|
||||
|
||||
org_people: dict[str, set[int]] = defaultdict(set)
|
||||
org_drafts: dict[str, set[str]] = defaultdict(set)
|
||||
|
||||
for aff, pid, draft_name in raw:
|
||||
canonical = normalize_org(aff)
|
||||
if canonical:
|
||||
org_people[canonical].add(pid)
|
||||
org_drafts[canonical].add(draft_name)
|
||||
|
||||
results = [
|
||||
(org, len(org_people[org]), len(org_drafts[org]))
|
||||
for org in org_people
|
||||
]
|
||||
results.sort(key=lambda x: -x[2])
|
||||
return results[:limit]
|
||||
|
||||
|
||||
def cross_org_normalized(
|
||||
db: Database, limit: int = 20
|
||||
) -> list[tuple[str, str, int]]:
|
||||
"""Return (org_a, org_b, shared_drafts) with normalized org names."""
|
||||
# Get all (aff_a, aff_b, draft_name) cross-org triples
|
||||
rows = db.conn.execute(
|
||||
"""SELECT da1.affiliation, da2.affiliation, da1.draft_name
|
||||
FROM draft_authors da1
|
||||
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
|
||||
AND da1.person_id < da2.person_id
|
||||
WHERE da1.affiliation != '' AND da2.affiliation != ''"""
|
||||
).fetchall()
|
||||
|
||||
pair_drafts: dict[tuple[str, str], set[str]] = defaultdict(set)
|
||||
for aff_a, aff_b, draft_name in rows:
|
||||
norm_a = normalize_org(aff_a)
|
||||
norm_b = normalize_org(aff_b)
|
||||
if norm_a and norm_b and norm_a != norm_b:
|
||||
key = tuple(sorted([norm_a, norm_b]))
|
||||
pair_drafts[key].add(draft_name)
|
||||
|
||||
results = [
|
||||
(org_a, org_b, len(drafts))
|
||||
for (org_a, org_b), drafts in pair_drafts.items()
|
||||
]
|
||||
results.sort(key=lambda x: -x[2])
|
||||
return results[:limit]
|
||||
6
src/ietf_analyzer/pipeline/__init__.py
Normal file
6
src/ietf_analyzer/pipeline/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Gap-to-Draft generation pipeline."""
|
||||
from .context import ContextBuilder
|
||||
from .generator import PipelineGenerator
|
||||
from .quality import QualityGates
|
||||
from .family import FamilyCoordinator
|
||||
from .formatter import DraftFormatter
|
||||
259
src/ietf_analyzer/pipeline/context.py
Normal file
259
src/ietf_analyzer/pipeline/context.py
Normal file
@@ -0,0 +1,259 @@
|
||||
"""Context builder — assembles rich context for draft generation from DB queries."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
||||
dot = np.dot(a, b)
|
||||
norm = np.linalg.norm(a) * np.linalg.norm(b)
|
||||
if norm == 0:
|
||||
return 0.0
|
||||
return float(dot / norm)
|
||||
|
||||
|
||||
class ContextBuilder:
|
||||
def __init__(self, config: Config, db: Database):
|
||||
self.config = config
|
||||
self.db = db
|
||||
|
||||
def build_context(self, gap_topic: str) -> dict:
|
||||
"""Assemble full context for a gap topic. All DB queries, zero Claude calls."""
|
||||
gap = self._find_gap(gap_topic)
|
||||
if not gap:
|
||||
console.print(f"[yellow]No gap found matching '{gap_topic}', using topic as-is[/]")
|
||||
gap = {
|
||||
"id": 0,
|
||||
"topic": gap_topic,
|
||||
"description": gap_topic,
|
||||
"category": "",
|
||||
"evidence": "",
|
||||
"severity": "medium",
|
||||
}
|
||||
|
||||
ideas = self._convergent_ideas(gap)
|
||||
rfcs = self._rfc_foundations(gap.get("category", ""))
|
||||
similar = self._similar_drafts(gap["description"])
|
||||
top_rated = self._top_rated_in_category(gap.get("category", ""))
|
||||
wg_context = self._wg_context()
|
||||
ecosystem = self._ecosystem_vision()
|
||||
siblings = self._sibling_context(gap_topic)
|
||||
|
||||
return {
|
||||
"gap": gap,
|
||||
"convergent_ideas": ideas,
|
||||
"rfc_foundations": rfcs,
|
||||
"similar_drafts": similar,
|
||||
"top_rated": top_rated,
|
||||
"wg_context": wg_context,
|
||||
"ecosystem_vision": ecosystem,
|
||||
"sibling_context": siblings,
|
||||
}
|
||||
|
||||
def _find_gap(self, topic: str) -> dict | None:
|
||||
"""Find a gap by topic string (fuzzy match)."""
|
||||
gaps = self.db.all_gaps()
|
||||
topic_lower = topic.lower()
|
||||
# Exact match first
|
||||
for g in gaps:
|
||||
if g["topic"].lower() == topic_lower:
|
||||
return g
|
||||
# Substring match
|
||||
for g in gaps:
|
||||
if topic_lower in g["topic"].lower() or topic_lower in g["description"].lower():
|
||||
return g
|
||||
# Word overlap match
|
||||
topic_words = set(topic_lower.split())
|
||||
best = None
|
||||
best_score = 0
|
||||
for g in gaps:
|
||||
gap_words = set(g["topic"].lower().split()) | set(g["description"].lower().split())
|
||||
overlap = len(topic_words & gap_words)
|
||||
if overlap > best_score:
|
||||
best_score = overlap
|
||||
best = g
|
||||
return best if best_score >= 2 else None
|
||||
|
||||
def _convergent_ideas(self, gap: dict, limit: int = 20) -> list[dict]:
|
||||
"""Find ideas that converge on this gap topic via keyword matching."""
|
||||
all_ideas = self.db.all_ideas()
|
||||
if not all_ideas:
|
||||
return []
|
||||
|
||||
# Build search terms from gap topic + description
|
||||
search_text = (gap["topic"] + " " + gap["description"]).lower()
|
||||
search_words = set(search_text.split())
|
||||
# Remove common words
|
||||
stop_words = {"the", "a", "an", "and", "or", "in", "of", "for", "to", "is",
|
||||
"are", "that", "this", "with", "not", "by", "on", "at", "from",
|
||||
"as", "be", "it", "no", "but", "has", "have", "do", "does"}
|
||||
search_words -= stop_words
|
||||
|
||||
scored = []
|
||||
for idea in all_ideas:
|
||||
idea_text = (idea["title"] + " " + idea["description"]).lower()
|
||||
idea_words = set(idea_text.split())
|
||||
overlap = len(search_words & idea_words)
|
||||
if overlap >= 1:
|
||||
scored.append((overlap, idea))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [item for _, item in scored[:limit]]
|
||||
|
||||
def _rfc_foundations(self, category: str, limit: int = 10) -> list[tuple[str, int]]:
|
||||
"""Get most-referenced RFCs, optionally filtered by category."""
|
||||
top_refs = self.db.top_referenced(ref_type="rfc", limit=limit * 2)
|
||||
if not category:
|
||||
return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
|
||||
|
||||
# Filter to RFCs referenced by drafts in this category
|
||||
category_lower = category.lower()
|
||||
pairs = self.db.drafts_with_ratings(limit=500)
|
||||
category_drafts = set()
|
||||
for draft, rating in pairs:
|
||||
for cat in rating.categories:
|
||||
if category_lower in cat.lower():
|
||||
category_drafts.add(draft.name)
|
||||
|
||||
if not category_drafts:
|
||||
return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
|
||||
|
||||
filtered = []
|
||||
for ref_id, count, draft_names in top_refs:
|
||||
cat_count = sum(1 for d in draft_names if d in category_drafts)
|
||||
if cat_count > 0:
|
||||
filtered.append((ref_id, cat_count))
|
||||
|
||||
filtered.sort(key=lambda x: x[1], reverse=True)
|
||||
return filtered[:limit]
|
||||
|
||||
def _similar_drafts(self, gap_desc: str, limit: int = 8) -> list[tuple[str, float]]:
|
||||
"""Find semantically similar existing drafts via embeddings."""
|
||||
all_embeddings = self.db.all_embeddings()
|
||||
if not all_embeddings:
|
||||
return []
|
||||
|
||||
# Try to embed the gap description via Ollama
|
||||
try:
|
||||
import ollama as ollama_lib
|
||||
client = ollama_lib.Client(host=self.config.ollama_url)
|
||||
resp = client.embed(
|
||||
model=self.config.ollama_embed_model,
|
||||
input=gap_desc[:8000],
|
||||
)
|
||||
gap_vec = np.array(resp["embeddings"][0], dtype=np.float32)
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Ollama embedding failed, skipping similarity: {e}[/]")
|
||||
return []
|
||||
|
||||
similarities = []
|
||||
for name, vec in all_embeddings.items():
|
||||
sim = _cosine_similarity(gap_vec, vec)
|
||||
similarities.append((name, sim))
|
||||
|
||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
return similarities[:limit]
|
||||
|
||||
def _top_rated_in_category(self, category: str, limit: int = 5) -> list[tuple]:
|
||||
"""Get top-rated drafts in a category."""
|
||||
pairs = self.db.drafts_with_ratings(limit=500)
|
||||
if not category:
|
||||
return [
|
||||
(draft.name, draft.title, rating.composite_score)
|
||||
for draft, rating in pairs[:limit]
|
||||
]
|
||||
|
||||
category_lower = category.lower()
|
||||
matching = []
|
||||
for draft, rating in pairs:
|
||||
for cat in rating.categories:
|
||||
if category_lower in cat.lower():
|
||||
matching.append((draft.name, draft.title, rating.composite_score))
|
||||
break
|
||||
|
||||
return matching[:limit]
|
||||
|
||||
def _wg_context(self) -> str:
|
||||
"""Summarize WG adoption status."""
|
||||
adoption = self.db.draft_adoption_status()
|
||||
wg_counts: dict[str, int] = {}
|
||||
adopted_count = 0
|
||||
for d in adoption:
|
||||
if d["wg_adopted"]:
|
||||
adopted_count += 1
|
||||
wg = d["wg_name"]
|
||||
wg_counts[wg] = wg_counts.get(wg, 0) + 1
|
||||
|
||||
total = len(adoption)
|
||||
if not wg_counts:
|
||||
return f"{total} drafts, none WG-adopted yet."
|
||||
|
||||
top_wgs = sorted(wg_counts.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||
wg_lines = ", ".join(f"{wg} ({n})" for wg, n in top_wgs)
|
||||
return f"{total} drafts, {adopted_count} WG-adopted. Top WGs: {wg_lines}"
|
||||
|
||||
def _ecosystem_vision(self) -> str:
|
||||
"""Load ecosystem vision document if it exists."""
|
||||
vision_path = Path(self.config.data_dir) / "reports" / "holistic-agent-ecosystem-draft-outlines.md"
|
||||
if not vision_path.exists():
|
||||
return "(No ecosystem vision document found)"
|
||||
|
||||
text = vision_path.read_text()
|
||||
# Return the pitch section (compact) rather than the full document
|
||||
if "## 8. One-Page Pitch" in text:
|
||||
pitch = text.split("## 8. One-Page Pitch")[1].strip()
|
||||
return pitch[:2000]
|
||||
# Fallback: return the vision summary
|
||||
if "## 1. Vision Summary" in text:
|
||||
parts = text.split("## 1. Vision Summary")[1]
|
||||
if "## 2." in parts:
|
||||
parts = parts.split("## 2.")[0]
|
||||
return parts.strip()[:2000]
|
||||
return text[:2000]
|
||||
|
||||
def _sibling_context(self, gap_topic: str) -> list[dict]:
|
||||
"""Get outlines of sibling drafts from the same family."""
|
||||
# Check all family drafts
|
||||
families = self.db.get_generated_drafts()
|
||||
if not families:
|
||||
return []
|
||||
|
||||
# Find which family this gap_topic belongs to
|
||||
topic_lower = gap_topic.lower()
|
||||
family_name = ""
|
||||
for gd in families:
|
||||
if topic_lower in gd.get("gap_topic", "").lower():
|
||||
family_name = gd.get("family_name", "")
|
||||
break
|
||||
|
||||
if not family_name:
|
||||
return []
|
||||
|
||||
siblings = self.db.get_family_drafts(family_name)
|
||||
result = []
|
||||
for s in siblings:
|
||||
if s.get("gap_topic", "").lower() == topic_lower:
|
||||
continue # Skip self
|
||||
outline = {}
|
||||
if s.get("outline_json"):
|
||||
try:
|
||||
outline = json.loads(s["outline_json"]) if isinstance(s["outline_json"], str) else s["outline_json"]
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
result.append({
|
||||
"role": s.get("family_role", ""),
|
||||
"title": s.get("title", ""),
|
||||
"abstract": s.get("abstract", ""),
|
||||
"outline": outline,
|
||||
})
|
||||
return result
|
||||
219
src/ietf_analyzer/pipeline/family.py
Normal file
219
src/ietf_analyzer/pipeline/family.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Family coordinator — orchestrates generation of the 5-draft ecosystem."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
from .generator import PipelineGenerator
|
||||
from .quality import QualityGates
|
||||
|
||||
console = Console()
|
||||
|
||||
FAMILY_DRAFTS = [
|
||||
{
|
||||
"role": "AEM",
|
||||
"topic": "Agent Ecosystem Model",
|
||||
"description": (
|
||||
"Core architecture and terminology for the agent ecosystem. "
|
||||
"Defines shared concepts: DAG execution model, HITL points, "
|
||||
"assurance levels, protocol agnosticism. Foundation for all "
|
||||
"companion drafts."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "ATD",
|
||||
"topic": "Agent Task DAG",
|
||||
"description": (
|
||||
"Execution model using DAG structure with checkpoints and rollback. "
|
||||
"Defines node semantics (pending/running/done/failed/rolled-back), "
|
||||
"resource hints, circuit breakers, and rollback protocol. "
|
||||
"Uses ECT as token and DAG format."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "HITL",
|
||||
"topic": "Human-in-the-Loop",
|
||||
"description": (
|
||||
"Human oversight as first-class primitive. Approval gates, "
|
||||
"escalation paths, emergency override (PAUSE/CONSTRAIN/STOP/TAKEOVER), "
|
||||
"and explainability hooks. Integrates with DAG as HITL nodes."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "AEPB",
|
||||
"topic": "Agent Ecosystem Protocol Bindings",
|
||||
"description": (
|
||||
"Cross-protocol interoperability layer. Capability advertisement, "
|
||||
"protocol binding requirements, translation gateways, negotiation. "
|
||||
"Makes ecosystem semantics available over any A2A protocol."
|
||||
),
|
||||
},
|
||||
{
|
||||
"role": "APAE",
|
||||
"topic": "Agent Provenance Assurance Ecosystem",
|
||||
"description": (
|
||||
"Trust, verification, and provenance for dual-regime operation. "
|
||||
"Assurance profiles (relaxed/standard/regulated), behavior verification, "
|
||||
"dynamic trust scoring (AIMD model), provenance chains. "
|
||||
"Same stack from K8s to fully proven."
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class FamilyCoordinator:
|
||||
def __init__(self, config: Config, db: Database, analyzer):
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.analyzer = analyzer
|
||||
self.generator = PipelineGenerator(config, db, analyzer)
|
||||
self.quality = QualityGates(config, db, analyzer)
|
||||
|
||||
def generate_family(self, family_name: str = "agent-ecosystem", cheap: bool = False) -> list[dict]:
|
||||
"""Generate all 5 drafts in order. AEM first, then B-E with sibling context."""
|
||||
console.print(f"\n[bold cyan]Generating draft family: {family_name}[/]")
|
||||
console.print(f"Drafts: {len(FAMILY_DRAFTS)}, cheap={cheap}")
|
||||
|
||||
# Log the generation run
|
||||
run_id = self.db.log_generation_run({
|
||||
"family_name": family_name,
|
||||
"gap_ids": [d["role"] for d in FAMILY_DRAFTS],
|
||||
"model_used": self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
"status": "running",
|
||||
})
|
||||
|
||||
results = []
|
||||
total_in = 0
|
||||
total_out = 0
|
||||
|
||||
for i, draft_spec in enumerate(FAMILY_DRAFTS):
|
||||
console.print(
|
||||
f"\n[bold]{'='*60}[/]"
|
||||
f"\n[bold]Draft {i+1}/{len(FAMILY_DRAFTS)}: "
|
||||
f"[cyan]{draft_spec['role']}[/] — {draft_spec['topic']}[/]"
|
||||
f"\n[bold]{'='*60}[/]"
|
||||
)
|
||||
|
||||
try:
|
||||
result = self.generator.generate_full(
|
||||
gap_topic=draft_spec["topic"],
|
||||
cheap=cheap,
|
||||
family_name=family_name,
|
||||
family_role=draft_spec["role"],
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
# Run quality gates
|
||||
draft_id = result.get("id")
|
||||
if draft_id:
|
||||
console.print(f"\n[dim]Running quality gates for {draft_spec['role']}...[/]")
|
||||
qr = self.quality.run_all(draft_id)
|
||||
result["quality_results"] = qr
|
||||
|
||||
except Exception as e:
|
||||
console.print(f"[red]Failed to generate {draft_spec['role']}: {e}[/]")
|
||||
results.append({
|
||||
"role": draft_spec["role"],
|
||||
"topic": draft_spec["topic"],
|
||||
"error": str(e),
|
||||
})
|
||||
|
||||
# Update run
|
||||
self.db.update_generation_run(
|
||||
run_id,
|
||||
status="completed",
|
||||
completed_at=_now_iso(),
|
||||
)
|
||||
|
||||
# Summary
|
||||
console.print(f"\n[bold cyan]{'='*60}[/]")
|
||||
console.print(f"[bold]Family generation complete: {family_name}[/]")
|
||||
successful = [r for r in results if "error" not in r]
|
||||
console.print(f" Generated: {len(successful)}/{len(FAMILY_DRAFTS)} drafts")
|
||||
for r in results:
|
||||
if "error" in r:
|
||||
console.print(f" [red]FAIL[/] {r['role']}: {r['error']}")
|
||||
else:
|
||||
console.print(f" [green]OK[/] {r.get('family_role', '?')}: {r.get('title', '?')}")
|
||||
|
||||
return results
|
||||
|
||||
def check_consistency(self, family_name: str) -> dict:
|
||||
"""Check terminology consistency across family drafts."""
|
||||
drafts = self.db.get_family_drafts(family_name)
|
||||
if not drafts:
|
||||
return {"consistent": False, "details": "No drafts found for family"}
|
||||
|
||||
# Collect terminology from all outlines
|
||||
all_terms: dict[str, dict[str, str]] = {} # term -> {role: definition}
|
||||
for gd in drafts:
|
||||
role = gd.get("family_role", "?")
|
||||
outline_raw = gd.get("outline_json", "{}")
|
||||
try:
|
||||
outline = json.loads(outline_raw) if isinstance(outline_raw, str) else outline_raw
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
terms = outline.get("terminology", {})
|
||||
if not isinstance(terms, dict):
|
||||
continue
|
||||
for term, defn in terms.items():
|
||||
term_lower = term.lower()
|
||||
if term_lower not in all_terms:
|
||||
all_terms[term_lower] = {}
|
||||
all_terms[term_lower][role] = defn
|
||||
|
||||
# Find terms used in multiple drafts
|
||||
shared_terms = {t: roles for t, roles in all_terms.items() if len(roles) > 1}
|
||||
if not shared_terms:
|
||||
return {
|
||||
"consistent": True,
|
||||
"shared_terms": 0,
|
||||
"details": "No shared terminology found across drafts",
|
||||
}
|
||||
|
||||
# Check for inconsistencies (simple: different definitions for same term)
|
||||
inconsistencies = []
|
||||
for term, roles in shared_terms.items():
|
||||
definitions = list(roles.values())
|
||||
# Rough check: if definitions differ significantly
|
||||
unique_defs = set(d.lower().strip().rstrip(".") for d in definitions)
|
||||
if len(unique_defs) > 1:
|
||||
inconsistencies.append({
|
||||
"term": term,
|
||||
"definitions": roles,
|
||||
})
|
||||
|
||||
consistent = len(inconsistencies) == 0
|
||||
details_parts = [f"{len(shared_terms)} shared terms across drafts"]
|
||||
if inconsistencies:
|
||||
details_parts.append(f"{len(inconsistencies)} inconsistencies found:")
|
||||
for inc in inconsistencies:
|
||||
details_parts.append(f" '{inc['term']}': {inc['definitions']}")
|
||||
|
||||
console.print(f"\n[bold]Consistency check: {family_name}[/]")
|
||||
console.print(f" Shared terms: {len(shared_terms)}")
|
||||
console.print(f" Inconsistencies: {len(inconsistencies)}")
|
||||
if consistent:
|
||||
console.print(" [green]All terminology consistent[/]")
|
||||
else:
|
||||
for inc in inconsistencies:
|
||||
console.print(f" [yellow]Inconsistent: '{inc['term']}'[/]")
|
||||
for role, defn in inc["definitions"].items():
|
||||
console.print(f" {role}: {defn[:80]}")
|
||||
|
||||
return {
|
||||
"consistent": consistent,
|
||||
"shared_terms": len(shared_terms),
|
||||
"inconsistencies": inconsistencies,
|
||||
"details": "; ".join(details_parts),
|
||||
}
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
from datetime import datetime, timezone
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
203
src/ietf_analyzer/pipeline/formatter.py
Normal file
203
src/ietf_analyzer/pipeline/formatter.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""Draft formatter — assembles outline + sections into I-D text format."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
|
||||
class DraftFormatter:
|
||||
@staticmethod
|
||||
def format_draft(outline: dict, sections: list[str], family_name: str = "") -> str:
|
||||
"""Assemble outline + sections into I-D text format."""
|
||||
title = outline["title"]
|
||||
draft_name = DraftFormatter._make_draft_name(title, family_name)
|
||||
|
||||
parts = []
|
||||
parts.append(DraftFormatter._header_block(outline, draft_name))
|
||||
parts.append("")
|
||||
parts.append("Abstract")
|
||||
parts.append("")
|
||||
parts.append(DraftFormatter._wrap_text(outline.get("abstract", "")))
|
||||
parts.append("")
|
||||
parts.append(DraftFormatter._status_memo(outline))
|
||||
parts.append("")
|
||||
|
||||
# Terminology section (if outline has terminology)
|
||||
terms = outline.get("terminology", {})
|
||||
if terms:
|
||||
parts.append(DraftFormatter._terminology_section(outline))
|
||||
parts.append("")
|
||||
|
||||
# Table of Contents
|
||||
parts.append("Table of Contents")
|
||||
parts.append("")
|
||||
section_list = outline.get("sections", [])
|
||||
for i, section in enumerate(section_list, 1):
|
||||
stitle = section.get("title", f"Section {i}")
|
||||
dots = "." * max(1, 60 - len(stitle))
|
||||
parts.append(f" {i}. {stitle} {dots} {i + 2}")
|
||||
ref_num = len(section_list) + 1
|
||||
parts.append(f" {ref_num}. References {'.' * (60 - len('References'))} {ref_num + 2}")
|
||||
parts.append("")
|
||||
|
||||
# Sections
|
||||
for i, (section_info, section_text) in enumerate(
|
||||
zip(section_list, sections), 1
|
||||
):
|
||||
stitle = section_info.get("title", f"Section {i}")
|
||||
parts.append(f"{i}. {stitle}")
|
||||
parts.append("")
|
||||
parts.append(DraftFormatter._wrap_text(section_text))
|
||||
parts.append("")
|
||||
|
||||
# References section
|
||||
parts.append(DraftFormatter._references_section(outline))
|
||||
parts.append("")
|
||||
|
||||
# Author's Address
|
||||
parts.append("Author's Address")
|
||||
parts.append("")
|
||||
parts.append(" Generated by IETF Draft Analyzer")
|
||||
if family_name:
|
||||
parts.append(f" Family: {family_name}")
|
||||
parts.append(f" {datetime.now(timezone.utc).strftime('%Y-%m-%d')}")
|
||||
parts.append("")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
@staticmethod
|
||||
def _make_draft_name(title: str, family_name: str = "") -> str:
|
||||
"""Generate a draft name from title."""
|
||||
words = title.lower().split()
|
||||
slug = "-".join(w for w in words[:4] if w.isalnum())
|
||||
if family_name:
|
||||
return f"draft-{family_name}-{slug}-00"
|
||||
return f"draft-ai-{slug}-00"
|
||||
|
||||
@staticmethod
|
||||
def _header_block(outline: dict, draft_name: str) -> str:
|
||||
"""Proper I-D header."""
|
||||
now = datetime.now(timezone.utc)
|
||||
expires = now + timedelta(days=185)
|
||||
date_str = now.strftime("%B %Y")
|
||||
exp_str = expires.strftime("%B %d, %Y")
|
||||
status = outline.get("intended_status", "Informational")
|
||||
wg = outline.get("target_wg", "individual")
|
||||
title = outline["title"]
|
||||
|
||||
lines = []
|
||||
lines.append(f"Internet-Draft{' ' * 45}{wg}")
|
||||
lines.append(f"Intended status: {status:<44s}{date_str}")
|
||||
lines.append(f"Expires: {exp_str}")
|
||||
lines.append("")
|
||||
lines.append("")
|
||||
lines.append(f" {title}")
|
||||
lines.append(f" {draft_name}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _status_memo(outline: dict) -> str:
|
||||
"""Status of This Memo boilerplate."""
|
||||
status = outline.get("intended_status", "Informational")
|
||||
lines = []
|
||||
lines.append("Status of This Memo")
|
||||
lines.append("")
|
||||
lines.append(DraftFormatter._wrap_text(
|
||||
"This Internet-Draft is submitted in full conformance with the "
|
||||
"provisions of BCP 78 and BCP 79."
|
||||
))
|
||||
lines.append("")
|
||||
lines.append(DraftFormatter._wrap_text(
|
||||
f"This document is intended to have {status} status. "
|
||||
"Distribution of this memo is unlimited."
|
||||
))
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _references_section(outline: dict) -> str:
|
||||
"""Normative + Informative References from outline data."""
|
||||
lines = []
|
||||
norm_refs = outline.get("normative_refs", [])
|
||||
info_refs = outline.get("informative_refs", [])
|
||||
|
||||
ref_num = len(outline.get("sections", [])) + 1
|
||||
lines.append(f"{ref_num}. References")
|
||||
lines.append("")
|
||||
|
||||
if norm_refs:
|
||||
lines.append(f"{ref_num}.1. Normative References")
|
||||
lines.append("")
|
||||
for ref in norm_refs:
|
||||
lines.append(f" [{ref}]")
|
||||
lines.append(f" {ref}")
|
||||
lines.append("")
|
||||
|
||||
if info_refs:
|
||||
sub = "2" if norm_refs else "1"
|
||||
lines.append(f"{ref_num}.{sub}. Informative References")
|
||||
lines.append("")
|
||||
for ref in info_refs:
|
||||
lines.append(f" [{ref}]")
|
||||
lines.append(f" {ref}")
|
||||
lines.append("")
|
||||
|
||||
if not norm_refs and not info_refs:
|
||||
lines.append(" (No references specified)")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _terminology_section(outline: dict) -> str:
|
||||
"""Terminology section from outline terminology dict."""
|
||||
terms = outline.get("terminology", {})
|
||||
if not terms:
|
||||
return ""
|
||||
|
||||
lines = []
|
||||
lines.append("Terminology")
|
||||
lines.append("")
|
||||
lines.append(DraftFormatter._wrap_text(
|
||||
'The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL '
|
||||
'NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", '
|
||||
'"MAY", and "OPTIONAL" in this document are to be interpreted as '
|
||||
'described in BCP 14 [RFC2119] [RFC8174] when, and only when, they '
|
||||
'appear in all capitals, as shown here.'
|
||||
))
|
||||
lines.append("")
|
||||
|
||||
for term, definition in terms.items():
|
||||
lines.append(f" {term}")
|
||||
lines.append(DraftFormatter._wrap_text(definition, indent=6))
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _wrap_text(text: str, indent: int = 3, width: int = 69) -> str:
|
||||
"""72-char line wrapping for I-D format."""
|
||||
prefix = " " * indent
|
||||
paragraphs = text.strip().split("\n\n")
|
||||
wrapped = []
|
||||
for para in paragraphs:
|
||||
# Preserve list items
|
||||
if para.strip().startswith("-") or para.strip().startswith("*"):
|
||||
inner_lines = para.strip().split("\n")
|
||||
for line in inner_lines:
|
||||
line = line.strip()
|
||||
sub_lines = textwrap.wrap(
|
||||
line, width=width,
|
||||
initial_indent=prefix,
|
||||
subsequent_indent=prefix + " ",
|
||||
)
|
||||
wrapped.append("\n".join(sub_lines))
|
||||
else:
|
||||
para = " ".join(para.split()) # Normalize whitespace
|
||||
lines = textwrap.wrap(
|
||||
para, width=width,
|
||||
initial_indent=prefix,
|
||||
subsequent_indent=prefix,
|
||||
)
|
||||
wrapped.append("\n".join(lines))
|
||||
return "\n\n".join(wrapped)
|
||||
269
src/ietf_analyzer/pipeline/generator.py
Normal file
269
src/ietf_analyzer/pipeline/generator.py
Normal file
@@ -0,0 +1,269 @@
|
||||
"""Pipeline generator — enhanced outline + section generation with rich context."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
|
||||
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
from .context import ContextBuilder
|
||||
from .prompts import OUTLINE_PROMPT_V2, SECTION_PROMPT_V2
|
||||
from .formatter import DraftFormatter
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _prompt_hash(text: str) -> str:
|
||||
return hashlib.sha256(text.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
class PipelineGenerator:
|
||||
def __init__(self, config: Config, db: Database, analyzer):
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.analyzer = analyzer
|
||||
self.context_builder = ContextBuilder(config, db)
|
||||
|
||||
def _format_ideas_for_prompt(self, ideas: list[dict]) -> str:
|
||||
if not ideas:
|
||||
return "(none found)"
|
||||
lines = []
|
||||
for idea in ideas:
|
||||
lines.append(
|
||||
f"- [{idea.get('type', '?')}] {idea['title']}: "
|
||||
f"{idea['description']} (from {idea.get('draft_name', '?')})"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_rfcs_for_prompt(self, rfcs: list[tuple[str, int]]) -> str:
|
||||
if not rfcs:
|
||||
return "(none found)"
|
||||
return "\n".join(f"- RFC {ref_id} (cited by {count} drafts)" for ref_id, count in rfcs)
|
||||
|
||||
def _format_similar_for_prompt(self, similar: list[tuple[str, float]]) -> str:
|
||||
if not similar:
|
||||
return "(none found)"
|
||||
lines = []
|
||||
for name, sim in similar:
|
||||
draft = self.db.get_draft(name)
|
||||
title = draft.title if draft else name
|
||||
lines.append(f"- {name}: {title} (similarity: {sim:.2f})")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_top_rated_for_prompt(self, top_rated: list[tuple]) -> str:
|
||||
if not top_rated:
|
||||
return "(none found)"
|
||||
return "\n".join(
|
||||
f"- {name}: {title} (score: {score:.1f})"
|
||||
for name, title, score in top_rated
|
||||
)
|
||||
|
||||
def _format_siblings_for_prompt(self, siblings: list[dict]) -> str:
|
||||
if not siblings:
|
||||
return "(none — this is the first draft in the family)"
|
||||
lines = []
|
||||
for s in siblings:
|
||||
role = s.get("role", "?")
|
||||
title = s.get("title", "?")
|
||||
abstract = s.get("abstract", "")[:200]
|
||||
outline = s.get("outline", {})
|
||||
sections = outline.get("sections", [])
|
||||
section_titles = [sec.get("title", "") for sec in sections]
|
||||
lines.append(
|
||||
f"- [{role}] {title}\n"
|
||||
f" Abstract: {abstract}\n"
|
||||
f" Sections: {', '.join(section_titles)}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
def _format_terminology_for_prompt(self, outline: dict) -> str:
|
||||
terms = outline.get("terminology", {})
|
||||
if not terms:
|
||||
return "(none defined yet)"
|
||||
return "\n".join(f"- **{term}**: {defn}" for term, defn in terms.items())
|
||||
|
||||
def generate_outline(self, context: dict, cheap: bool = False) -> dict:
|
||||
"""Generate outline from assembled context. Returns outline dict."""
|
||||
gap = context["gap"]
|
||||
|
||||
prompt = OUTLINE_PROMPT_V2.format(
|
||||
gap_topic=gap["topic"],
|
||||
gap_description=gap["description"],
|
||||
gap_category=gap.get("category", ""),
|
||||
gap_evidence=gap.get("evidence", ""),
|
||||
gap_severity=gap.get("severity", "medium"),
|
||||
convergent_ideas=self._format_ideas_for_prompt(context["convergent_ideas"]),
|
||||
rfc_foundations=self._format_rfcs_for_prompt(context["rfc_foundations"]),
|
||||
similar_drafts=self._format_similar_for_prompt(context["similar_drafts"]),
|
||||
top_rated=self._format_top_rated_for_prompt(context["top_rated"]),
|
||||
wg_context=context["wg_context"],
|
||||
ecosystem_vision=context["ecosystem_vision"],
|
||||
sibling_context=self._format_siblings_for_prompt(context["sibling_context"]),
|
||||
)
|
||||
|
||||
phash = _prompt_hash("pipeline-outline-" + prompt)
|
||||
cache_key = f"_pipeline_{gap['topic']}_"
|
||||
|
||||
# Check cache
|
||||
cached = self.db.get_cached_response(cache_key, phash)
|
||||
if cached:
|
||||
try:
|
||||
return json.loads(cached)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
|
||||
text, in_tok, out_tok = self.analyzer._call_claude(
|
||||
prompt, max_tokens=4096, cheap=cheap
|
||||
)
|
||||
text = self.analyzer._extract_json(text)
|
||||
outline = json.loads(text)
|
||||
|
||||
self.db.cache_response(
|
||||
cache_key, phash,
|
||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
prompt, text, in_tok, out_tok,
|
||||
)
|
||||
|
||||
return outline
|
||||
|
||||
def generate_section(self, outline: dict, section_idx: int, context: dict, cheap: bool = False) -> str:
|
||||
"""Generate a single section with relevant ideas and refs."""
|
||||
sections = outline["sections"]
|
||||
section = sections[section_idx]
|
||||
|
||||
outline_text = "\n".join(
|
||||
f"{i+1}. {s['title']}: {s.get('summary', '')}"
|
||||
for i, s in enumerate(sections)
|
||||
)
|
||||
|
||||
# Find ideas relevant to this section
|
||||
key_ideas = section.get("key_ideas", [])
|
||||
relevant_ideas = []
|
||||
if key_ideas and context["convergent_ideas"]:
|
||||
for idea in context["convergent_ideas"]:
|
||||
for key in key_ideas:
|
||||
if key.lower() in idea["title"].lower() or key.lower() in idea["description"].lower():
|
||||
relevant_ideas.append(idea)
|
||||
break
|
||||
if not relevant_ideas:
|
||||
# Use top 3 convergent ideas as fallback
|
||||
relevant_ideas = context["convergent_ideas"][:3]
|
||||
|
||||
# Format RFC refs
|
||||
rfc_refs = ""
|
||||
norm_refs = outline.get("normative_refs", [])
|
||||
info_refs = outline.get("informative_refs", [])
|
||||
all_refs = norm_refs + info_refs
|
||||
if all_refs:
|
||||
rfc_refs = "\n".join(f"- {ref}" for ref in all_refs[:10])
|
||||
else:
|
||||
rfc_refs = self._format_rfcs_for_prompt(context["rfc_foundations"][:5])
|
||||
|
||||
# Format cross-references to siblings
|
||||
cross_refs = self._format_siblings_for_prompt(context["sibling_context"])
|
||||
|
||||
prompt = SECTION_PROMPT_V2.format(
|
||||
draft_title=outline["title"],
|
||||
abstract=outline["abstract"],
|
||||
outline_text=outline_text,
|
||||
section_num=section_idx + 1,
|
||||
section_title=section["title"],
|
||||
section_summary=section.get("summary", ""),
|
||||
relevant_ideas=self._format_ideas_for_prompt(relevant_ideas),
|
||||
rfc_refs=rfc_refs,
|
||||
cross_refs=cross_refs,
|
||||
terminology=self._format_terminology_for_prompt(outline),
|
||||
)
|
||||
|
||||
phash = _prompt_hash("pipeline-section-" + prompt)
|
||||
cache_key = f"_pipeline_{outline['title']}_s{section_idx}_"
|
||||
|
||||
# Check cache
|
||||
cached = self.db.get_cached_response(cache_key, phash)
|
||||
if cached:
|
||||
return cached
|
||||
|
||||
text, in_tok, out_tok = self.analyzer._call_claude(
|
||||
prompt, max_tokens=2048, cheap=cheap
|
||||
)
|
||||
|
||||
self.db.cache_response(
|
||||
cache_key, phash,
|
||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
prompt, text, in_tok, out_tok,
|
||||
)
|
||||
|
||||
return text
|
||||
|
||||
def generate_full(self, gap_topic: str, cheap: bool = False,
|
||||
family_name: str = "", family_role: str = "") -> dict:
|
||||
"""Full pipeline: context -> outline -> sections -> assemble -> store in DB."""
|
||||
console.print(f"\n[bold]Pipeline: {gap_topic}[/]")
|
||||
|
||||
# Step 1: Build context
|
||||
console.print("[dim]Step 1/4:[/] Building context...")
|
||||
context = self.context_builder.build_context(gap_topic)
|
||||
console.print(
|
||||
f" Ideas: {len(context['convergent_ideas'])}, "
|
||||
f"RFCs: {len(context['rfc_foundations'])}, "
|
||||
f"Similar: {len(context['similar_drafts'])}, "
|
||||
f"Siblings: {len(context['sibling_context'])}"
|
||||
)
|
||||
|
||||
# Step 2: Generate outline
|
||||
console.print("[dim]Step 2/4:[/] Generating outline...")
|
||||
outline = self.generate_outline(context, cheap=cheap)
|
||||
console.print(f" Title: [cyan]{outline['title']}[/]")
|
||||
console.print(f" Sections: {len(outline['sections'])}")
|
||||
console.print(f" Status: {outline.get('intended_status', '?')}")
|
||||
|
||||
# Step 3: Generate sections
|
||||
console.print("[dim]Step 3/4:[/] Generating sections...")
|
||||
sections = []
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Writing...", total=len(outline["sections"]))
|
||||
for i, s in enumerate(outline["sections"]):
|
||||
progress.update(task, description=f"Section: {s['title'][:30]}")
|
||||
text = self.generate_section(outline, i, context, cheap=cheap)
|
||||
sections.append(text)
|
||||
progress.advance(task)
|
||||
|
||||
# Step 4: Assemble and store
|
||||
console.print("[dim]Step 4/4:[/] Assembling draft...")
|
||||
full_text = DraftFormatter.format_draft(outline, sections, family_name=family_name)
|
||||
|
||||
# Generate draft name from title
|
||||
words = outline["title"].lower().split()
|
||||
slug = "-".join(w for w in words[:4] if w.isalnum())
|
||||
draft_name = f"draft-ai-{slug}-00"
|
||||
|
||||
data = {
|
||||
"gap_topic": gap_topic,
|
||||
"draft_name": draft_name,
|
||||
"title": outline["title"],
|
||||
"abstract": outline.get("abstract", ""),
|
||||
"outline": outline,
|
||||
"sections": sections,
|
||||
"full_text": full_text,
|
||||
"family_name": family_name,
|
||||
"family_role": family_role,
|
||||
"version": 0,
|
||||
"status": "draft",
|
||||
}
|
||||
|
||||
draft_id = self.db.upsert_generated_draft(data)
|
||||
console.print(f" Stored as generated_draft id={draft_id}, name={draft_name}")
|
||||
|
||||
data["id"] = draft_id
|
||||
return data
|
||||
92
src/ietf_analyzer/pipeline/prompts.py
Normal file
92
src/ietf_analyzer/pipeline/prompts.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Prompt templates for the gap-to-draft generation pipeline."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
OUTLINE_PROMPT_V2 = """\
|
||||
You are writing an IETF Internet-Draft to address a gap in the AI/agent standardization landscape.
|
||||
|
||||
## Gap to Address
|
||||
Topic: {gap_topic}
|
||||
Description: {gap_description}
|
||||
Category: {gap_category}
|
||||
Evidence: {gap_evidence}
|
||||
Severity: {gap_severity}
|
||||
|
||||
## Convergent Ideas from Existing Drafts
|
||||
These ideas from the current landscape converge on this topic — build on them, don't duplicate:
|
||||
{convergent_ideas}
|
||||
|
||||
## RFC Foundations
|
||||
Most-referenced RFCs in this space — cite where relevant:
|
||||
{rfc_foundations}
|
||||
|
||||
## Similar Existing Drafts
|
||||
These drafts are closest to this gap — differentiate from them:
|
||||
{similar_drafts}
|
||||
|
||||
## Top-Rated Drafts in Category
|
||||
Drafts the community considers strong in this area:
|
||||
{top_rated}
|
||||
|
||||
## Working Group Context
|
||||
{wg_context}
|
||||
|
||||
## Ecosystem Vision
|
||||
{ecosystem_vision}
|
||||
|
||||
## Sibling Drafts (same family)
|
||||
{sibling_context}
|
||||
|
||||
Generate a detailed outline for an Internet-Draft that fills this gap.
|
||||
Return JSON:
|
||||
{{
|
||||
"title": "full draft title",
|
||||
"abstract": "150-250 word abstract",
|
||||
"sections": [
|
||||
{{"title": "section title", "summary": "2-3 sentence summary of content", "key_ideas": ["idea titles to incorporate"]}}
|
||||
],
|
||||
"normative_refs": ["RFC NNNN", "draft-name"],
|
||||
"informative_refs": ["RFC NNNN", "draft-name"],
|
||||
"terminology": {{"term": "definition"}},
|
||||
"target_wg": "suggested IETF working group",
|
||||
"intended_status": "informational|standards-track|experimental"
|
||||
}}
|
||||
|
||||
Requirements:
|
||||
- Include standard sections: Introduction, Terminology, Problem Statement, then 2-4 technical sections, Security Considerations, IANA Considerations
|
||||
- Reference specific RFCs and drafts from the context above
|
||||
- Use terminology consistent with sibling drafts if any
|
||||
- Abstract should clearly state the problem, approach, and contribution
|
||||
JSON only, no fences."""
|
||||
|
||||
SECTION_PROMPT_V2 = """\
|
||||
Write the following section of an Internet-Draft titled "{draft_title}".
|
||||
|
||||
Abstract: {abstract}
|
||||
|
||||
Full outline:
|
||||
{outline_text}
|
||||
|
||||
Write section {section_num}: {section_title}
|
||||
Summary: {section_summary}
|
||||
|
||||
## Relevant Ideas to Incorporate
|
||||
{relevant_ideas}
|
||||
|
||||
## RFC References to Cite
|
||||
{rfc_refs}
|
||||
|
||||
## Cross-References to Sister Drafts
|
||||
{cross_refs}
|
||||
|
||||
## Terminology
|
||||
{terminology}
|
||||
|
||||
Follow IETF Internet-Draft conventions:
|
||||
- Formal, precise technical language
|
||||
- Use RFC 2119 keywords (MUST, SHOULD, MAY) where appropriate
|
||||
- Reference existing RFCs and drafts where relevant (use [RFCNNNN] format)
|
||||
- 3-6 paragraphs per section
|
||||
- Use the terminology definitions provided above consistently
|
||||
|
||||
Write the section content only (no section number or title). Plain text."""
|
||||
277
src/ietf_analyzer/pipeline/quality.py
Normal file
277
src/ietf_analyzer/pipeline/quality.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""Quality gates for generated drafts — novelty, references, format, self-rating."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
||||
dot = np.dot(a, b)
|
||||
norm = np.linalg.norm(a) * np.linalg.norm(b)
|
||||
if norm == 0:
|
||||
return 0.0
|
||||
return float(dot / norm)
|
||||
|
||||
|
||||
REQUIRED_SECTIONS = ["introduction", "security considerations", "iana considerations"]
|
||||
|
||||
|
||||
class QualityGates:
|
||||
def __init__(self, config: Config, db: Database, analyzer):
|
||||
self.config = config
|
||||
self.db = db
|
||||
self.analyzer = analyzer
|
||||
|
||||
def run_all(self, draft_id: int) -> dict:
|
||||
"""Run all quality gates. Returns {gate_name: {passed: bool, score: float, details: str}}"""
|
||||
results = {}
|
||||
results["novelty"] = self.check_novelty(draft_id)
|
||||
results["references"] = self.check_references(draft_id)
|
||||
results["format"] = self.check_format(draft_id)
|
||||
results["self_rating"] = self.check_self_rating(draft_id)
|
||||
|
||||
passed = sum(1 for r in results.values() if r["passed"])
|
||||
total = len(results)
|
||||
console.print(
|
||||
f"Quality gates: [{'green' if passed == total else 'yellow'}]"
|
||||
f"{passed}/{total} passed[/]"
|
||||
)
|
||||
for name, result in results.items():
|
||||
status = "[green]PASS[/]" if result["passed"] else "[red]FAIL[/]"
|
||||
console.print(f" {status} {name}: {result['details']}")
|
||||
|
||||
return results
|
||||
|
||||
def check_novelty(self, draft_id: int) -> dict:
|
||||
"""Embed generated abstract, compare against all existing drafts.
|
||||
Flag if max_similarity > 0.90."""
|
||||
gd = self.db.get_generated_draft(draft_id)
|
||||
if not gd:
|
||||
return {"passed": False, "score": 0.0, "details": "Draft not found"}
|
||||
|
||||
abstract = gd.get("abstract", "")
|
||||
title = gd.get("title", "")
|
||||
text_to_embed = f"{title}\n\n{abstract}"
|
||||
|
||||
if not text_to_embed.strip():
|
||||
return {"passed": False, "score": 0.0, "details": "No abstract to check"}
|
||||
|
||||
# Embed via Ollama
|
||||
try:
|
||||
import ollama as ollama_lib
|
||||
client = ollama_lib.Client(host=self.config.ollama_url)
|
||||
resp = client.embed(
|
||||
model=self.config.ollama_embed_model,
|
||||
input=text_to_embed[:8000],
|
||||
)
|
||||
gen_vec = np.array(resp["embeddings"][0], dtype=np.float32)
|
||||
except Exception as e:
|
||||
return {"passed": True, "score": 0.0,
|
||||
"details": f"Ollama unavailable, skipping novelty check: {e}"}
|
||||
|
||||
all_embeddings = self.db.all_embeddings()
|
||||
if not all_embeddings:
|
||||
return {"passed": True, "score": 1.0, "details": "No existing embeddings to compare"}
|
||||
|
||||
max_sim = 0.0
|
||||
most_similar = ""
|
||||
for name, vec in all_embeddings.items():
|
||||
sim = _cosine_similarity(gen_vec, vec)
|
||||
if sim > max_sim:
|
||||
max_sim = sim
|
||||
most_similar = name
|
||||
|
||||
passed = max_sim < 0.90
|
||||
return {
|
||||
"passed": passed,
|
||||
"score": 1.0 - max_sim,
|
||||
"details": (
|
||||
f"Max similarity: {max_sim:.3f} with {most_similar}"
|
||||
+ ("" if passed else " — too similar, needs differentiation")
|
||||
),
|
||||
}
|
||||
|
||||
def check_references(self, draft_id: int) -> dict:
|
||||
"""Extract RFC/draft refs via regex, cross-check against draft_refs table."""
|
||||
gd = self.db.get_generated_draft(draft_id)
|
||||
if not gd:
|
||||
return {"passed": False, "score": 0.0, "details": "Draft not found"}
|
||||
|
||||
full_text = gd.get("full_text", "")
|
||||
if not full_text:
|
||||
return {"passed": False, "score": 0.0, "details": "No full text"}
|
||||
|
||||
# Extract references from generated text
|
||||
rfc_pattern = re.compile(r'\[?RFC\s*(\d{3,5})\]?', re.IGNORECASE)
|
||||
draft_pattern = re.compile(r'(draft-[a-z0-9-]+)', re.IGNORECASE)
|
||||
|
||||
found_rfcs = set(rfc_pattern.findall(full_text))
|
||||
found_drafts = set(draft_pattern.findall(full_text))
|
||||
|
||||
total_refs = len(found_rfcs) + len(found_drafts)
|
||||
|
||||
# Cross-check: how many of these RFCs are actually in our DB?
|
||||
known_rfcs = set()
|
||||
for ref_id in found_rfcs:
|
||||
drafts = self.db.drafts_referencing("rfc", ref_id)
|
||||
if drafts:
|
||||
known_rfcs.add(ref_id)
|
||||
|
||||
# Cross-check: how many referenced drafts exist in our DB?
|
||||
known_drafts = set()
|
||||
for dname in found_drafts:
|
||||
if self.db.get_draft(dname):
|
||||
known_drafts.add(dname)
|
||||
|
||||
verified = len(known_rfcs) + len(known_drafts)
|
||||
score = verified / total_refs if total_refs > 0 else 0.0
|
||||
|
||||
passed = total_refs >= 3 and score >= 0.3
|
||||
return {
|
||||
"passed": passed,
|
||||
"score": score,
|
||||
"details": (
|
||||
f"{total_refs} refs found ({len(found_rfcs)} RFCs, {len(found_drafts)} drafts), "
|
||||
f"{verified} verified in DB ({score:.0%})"
|
||||
),
|
||||
}
|
||||
|
||||
def check_format(self, draft_id: int) -> dict:
|
||||
"""Check line length <= 72, required sections present, no markdown leaked."""
|
||||
gd = self.db.get_generated_draft(draft_id)
|
||||
if not gd:
|
||||
return {"passed": False, "score": 0.0, "details": "Draft not found"}
|
||||
|
||||
full_text = gd.get("full_text", "")
|
||||
if not full_text:
|
||||
return {"passed": False, "score": 0.0, "details": "No full text"}
|
||||
|
||||
issues = []
|
||||
|
||||
# Check line length
|
||||
lines = full_text.split("\n")
|
||||
long_lines = [i + 1 for i, line in enumerate(lines) if len(line) > 72]
|
||||
if long_lines:
|
||||
issues.append(f"{len(long_lines)} lines exceed 72 chars")
|
||||
|
||||
# Check required sections
|
||||
text_lower = full_text.lower()
|
||||
for section in REQUIRED_SECTIONS:
|
||||
if section not in text_lower:
|
||||
issues.append(f"Missing required section: {section}")
|
||||
|
||||
# Check for leaked markdown
|
||||
markdown_patterns = [
|
||||
(r'^#{1,3}\s', "markdown headers (# )"),
|
||||
(r'\*\*[^*]+\*\*', "bold markdown (**text**)"),
|
||||
(r'```', "code fences (```)"),
|
||||
(r'\[([^\]]+)\]\(http', "markdown links"),
|
||||
]
|
||||
for pattern, desc in markdown_patterns:
|
||||
if re.search(pattern, full_text, re.MULTILINE):
|
||||
issues.append(f"Leaked markdown: {desc}")
|
||||
|
||||
if not issues:
|
||||
return {"passed": True, "score": 1.0, "details": "All format checks pass"}
|
||||
|
||||
score = max(0.0, 1.0 - len(issues) * 0.25)
|
||||
return {
|
||||
"passed": len(issues) <= 1, # Allow one minor issue
|
||||
"score": score,
|
||||
"details": "; ".join(issues),
|
||||
}
|
||||
|
||||
def check_self_rating(self, draft_id: int) -> dict:
|
||||
"""Feed through existing rate_draft() pipeline. Score on same 1-5 scale."""
|
||||
gd = self.db.get_generated_draft(draft_id)
|
||||
if not gd:
|
||||
return {"passed": False, "score": 0.0, "details": "Draft not found"}
|
||||
|
||||
# Create a temporary prompt matching the analyzer's rating format
|
||||
title = gd.get("title", "")
|
||||
abstract = gd.get("abstract", "")
|
||||
draft_name = gd.get("draft_name", "")
|
||||
|
||||
from ..analyzer import RATE_PROMPT_COMPACT, CATEGORIES_SHORT, _prompt_hash
|
||||
|
||||
prompt = RATE_PROMPT_COMPACT.format(
|
||||
name=draft_name,
|
||||
title=title,
|
||||
time=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
|
||||
pages="?",
|
||||
abstract=abstract[:2000],
|
||||
categories=", ".join(CATEGORIES_SHORT),
|
||||
)
|
||||
|
||||
phash = _prompt_hash("self-rate-" + prompt)
|
||||
cache_key = f"_selfrate_{draft_id}_"
|
||||
|
||||
# Check cache
|
||||
cached = self.db.get_cached_response(cache_key, phash)
|
||||
if cached:
|
||||
try:
|
||||
data = json.loads(cached)
|
||||
return self._parse_self_rating(data, draft_id)
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass
|
||||
|
||||
try:
|
||||
text, in_tok, out_tok = self.analyzer._call_claude(prompt, max_tokens=512, cheap=True)
|
||||
text = self.analyzer._extract_json(text)
|
||||
data = json.loads(text)
|
||||
|
||||
self.db.cache_response(
|
||||
cache_key, phash,
|
||||
self.config.claude_model_cheap,
|
||||
prompt, text, in_tok, out_tok,
|
||||
)
|
||||
|
||||
return self._parse_self_rating(data, draft_id)
|
||||
except Exception as e:
|
||||
return {"passed": False, "score": 0.0,
|
||||
"details": f"Self-rating failed: {e}"}
|
||||
|
||||
def _parse_self_rating(self, data: dict, draft_id: int) -> dict:
|
||||
"""Parse self-rating result and update the generated draft."""
|
||||
novelty = int(data.get("n", data.get("novelty", 3)))
|
||||
maturity = int(data.get("m", data.get("maturity", 3)))
|
||||
relevance = int(data.get("r", data.get("relevance", 3)))
|
||||
overlap = int(data.get("o", data.get("overlap", 3)))
|
||||
momentum = int(data.get("mo", data.get("momentum", 3)))
|
||||
|
||||
composite = (
|
||||
novelty * 0.30
|
||||
+ relevance * 0.25
|
||||
+ maturity * 0.20
|
||||
+ momentum * 0.15
|
||||
+ (6 - overlap) * 0.10
|
||||
)
|
||||
|
||||
# Store rating on the generated draft
|
||||
gd = self.db.get_generated_draft(draft_id)
|
||||
if gd:
|
||||
self.db.conn.execute(
|
||||
"UPDATE generated_drafts SET rating_json = ?, quality_score = ? WHERE id = ?",
|
||||
(json.dumps(data), composite, draft_id),
|
||||
)
|
||||
self.db.conn.commit()
|
||||
|
||||
passed = composite >= 2.5
|
||||
return {
|
||||
"passed": passed,
|
||||
"score": composite / 5.0,
|
||||
"details": (
|
||||
f"Composite: {composite:.1f}/5 "
|
||||
f"(N:{novelty} M:{maturity} O:{overlap} Mo:{momentum} R:{relevance})"
|
||||
),
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
14
src/ietf_analyzer/sources/__init__.py
Normal file
14
src/ietf_analyzer/sources/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Multi-source document fetcher registry."""
|
||||
|
||||
from .base import SourceDocument, SourceFetcher
|
||||
from .ietf import IETFFetcher
|
||||
from .w3c import W3CFetcher
|
||||
|
||||
FETCHERS = {"ietf": IETFFetcher, "w3c": W3CFetcher}
|
||||
|
||||
|
||||
def get_fetcher(source_name: str, config=None):
|
||||
cls = FETCHERS.get(source_name)
|
||||
if cls is None:
|
||||
raise ValueError(f"Unknown source: {source_name}")
|
||||
return cls(config)
|
||||
32
src/ietf_analyzer/sources/base.py
Normal file
32
src/ietf_analyzer/sources/base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
@dataclass
|
||||
class SourceDocument:
|
||||
"""Generic document from any standards body."""
|
||||
|
||||
name: str # Unique identifier (e.g. "draft-foo-bar", "webnn-api")
|
||||
title: str
|
||||
abstract: str
|
||||
source: str # "ietf", "w3c", etc.
|
||||
source_id: str = "" # Body-specific ID
|
||||
source_url: str = "" # Canonical URL
|
||||
full_text: str | None = None
|
||||
time: str = "" # ISO date
|
||||
doc_status: str = "" # "active", "published", "expired", etc.
|
||||
extra: dict = field(default_factory=dict) # Body-specific metadata
|
||||
|
||||
|
||||
class SourceFetcher(Protocol):
|
||||
"""Protocol for standards body fetchers."""
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]: ...
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None: ...
|
||||
|
||||
def close(self) -> None: ...
|
||||
82
src/ietf_analyzer/sources/ietf.py
Normal file
82
src/ietf_analyzer/sources/ietf.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""IETF Datatracker adapter — delegates to existing Fetcher."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from ..config import Config
|
||||
from ..fetcher import Fetcher
|
||||
from ..models import Draft
|
||||
from .base import SourceDocument
|
||||
|
||||
|
||||
class IETFFetcher:
|
||||
"""IETF Datatracker adapter wrapping the existing Fetcher class."""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self._fetcher = Fetcher(self.config)
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]:
|
||||
"""Search Datatracker, convert Draft -> SourceDocument."""
|
||||
drafts = self._fetcher.search_drafts(keywords=keywords, since=since)
|
||||
return [self._draft_to_doc(d) for d in drafts]
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None:
|
||||
"""Download full text for a SourceDocument."""
|
||||
draft = self._doc_to_draft(doc)
|
||||
return self._fetcher.download_full_text(draft)
|
||||
|
||||
def close(self) -> None:
|
||||
self._fetcher.close()
|
||||
|
||||
@staticmethod
|
||||
def _draft_to_doc(draft: Draft) -> SourceDocument:
|
||||
return SourceDocument(
|
||||
name=draft.name,
|
||||
title=draft.title,
|
||||
abstract=draft.abstract,
|
||||
source="ietf",
|
||||
source_id=str(draft.dt_id) if draft.dt_id else "",
|
||||
source_url=draft.datatracker_url,
|
||||
full_text=draft.full_text,
|
||||
time=draft.time or "",
|
||||
doc_status="active",
|
||||
extra={
|
||||
"rev": draft.rev,
|
||||
"pages": draft.pages,
|
||||
"words": draft.words,
|
||||
"group": draft.group,
|
||||
"group_uri": draft.group_uri,
|
||||
"expires": draft.expires,
|
||||
"ad": draft.ad,
|
||||
"shepherd": draft.shepherd,
|
||||
"states": draft.states,
|
||||
"fetched_at": draft.fetched_at,
|
||||
},
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _doc_to_draft(doc: SourceDocument) -> Draft:
|
||||
extra = doc.extra or {}
|
||||
return Draft(
|
||||
name=doc.name,
|
||||
rev=extra.get("rev", "00"),
|
||||
title=doc.title,
|
||||
abstract=doc.abstract,
|
||||
time=doc.time,
|
||||
dt_id=int(doc.source_id) if doc.source_id else None,
|
||||
pages=extra.get("pages"),
|
||||
words=extra.get("words"),
|
||||
group=extra.get("group"),
|
||||
group_uri=extra.get("group_uri"),
|
||||
expires=extra.get("expires"),
|
||||
ad=extra.get("ad"),
|
||||
shepherd=extra.get("shepherd"),
|
||||
states=extra.get("states", []),
|
||||
full_text=doc.full_text,
|
||||
fetched_at=extra.get("fetched_at"),
|
||||
source="ietf",
|
||||
source_id=doc.source_id,
|
||||
source_url=doc.source_url,
|
||||
)
|
||||
187
src/ietf_analyzer/sources/w3c.py
Normal file
187
src/ietf_analyzer/sources/w3c.py
Normal file
@@ -0,0 +1,187 @@
|
||||
"""Fetch specs from W3C public API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time as time_mod
|
||||
|
||||
import httpx
|
||||
from rich.console import Console
|
||||
from rich.progress import (
|
||||
BarColumn,
|
||||
MofNCompleteColumn,
|
||||
Progress,
|
||||
SpinnerColumn,
|
||||
TextColumn,
|
||||
)
|
||||
|
||||
from ..config import Config
|
||||
from .base import SourceDocument
|
||||
|
||||
W3C_API = "https://api.w3.org"
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _strip_html(html: str) -> str:
|
||||
"""Minimal HTML tag stripper — no heavy dependencies."""
|
||||
text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL)
|
||||
text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = re.sub(r" ", " ", text)
|
||||
text = re.sub(r"&", "&", text)
|
||||
text = re.sub(r"<", "<", text)
|
||||
text = re.sub(r">", ">", text)
|
||||
text = re.sub(r"&#\d+;", "", text)
|
||||
text = re.sub(r"\s+", " ", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
class W3CFetcher:
|
||||
"""Fetch specs from the W3C public API (no auth needed)."""
|
||||
|
||||
def __init__(self, config: Config | None = None):
|
||||
self.config = config or Config.load()
|
||||
self.client = httpx.Client(timeout=30, follow_redirects=True)
|
||||
self.groups = self.config.w3c_groups
|
||||
|
||||
def search(
|
||||
self, keywords: list[str], since: str | None = None
|
||||
) -> list[SourceDocument]:
|
||||
"""Fetch specs from AI-relevant W3C groups, filtered by keywords."""
|
||||
seen: dict[str, SourceDocument] = {}
|
||||
kw_lower = [k.lower() for k in keywords]
|
||||
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Fetching W3C specs...", total=len(self.groups))
|
||||
|
||||
for group in self.groups:
|
||||
progress.update(task, description=f"W3C group: {group}")
|
||||
specs = self._fetch_group_specs(group)
|
||||
for spec in specs:
|
||||
# Client-side keyword filter on title + description
|
||||
haystack = (spec.title + " " + spec.abstract).lower()
|
||||
if any(kw in haystack for kw in kw_lower):
|
||||
if since and spec.time and spec.time < since:
|
||||
continue
|
||||
if spec.name not in seen:
|
||||
seen[spec.name] = spec
|
||||
progress.advance(task)
|
||||
|
||||
console.print(f"Found [bold green]{len(seen)}[/] W3C specs matching keywords")
|
||||
return list(seen.values())
|
||||
|
||||
def _fetch_group_specs(self, group_shortname: str) -> list[SourceDocument]:
|
||||
"""Fetch all specifications for a W3C group."""
|
||||
url = f"{W3C_API}/groups/{group_shortname}/specifications"
|
||||
specs: list[SourceDocument] = []
|
||||
|
||||
try:
|
||||
page = 1
|
||||
while True:
|
||||
resp = self.client.get(
|
||||
url,
|
||||
params={"format": "json", "page": page},
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
spec_list = data if isinstance(data, list) else data.get("_links", {}).get("specifications", [])
|
||||
if not spec_list:
|
||||
# Try alternate response shape
|
||||
spec_list = data.get("specifications", [])
|
||||
if not spec_list:
|
||||
break
|
||||
|
||||
for item in spec_list:
|
||||
href = item.get("href", "")
|
||||
shortname = item.get("shortname", "")
|
||||
title = item.get("title", shortname)
|
||||
|
||||
if not shortname and href:
|
||||
# Extract shortname from href like /specifications/webnn
|
||||
parts = href.rstrip("/").split("/")
|
||||
shortname = parts[-1] if parts else ""
|
||||
|
||||
if not shortname:
|
||||
continue
|
||||
|
||||
# Fetch spec detail for abstract/description
|
||||
detail = self._fetch_spec_detail(shortname)
|
||||
abstract = detail.get("description", title)
|
||||
spec_url = detail.get("editor-draft", detail.get("url", f"https://www.w3.org/TR/{shortname}/"))
|
||||
status = detail.get("status", "")
|
||||
date = detail.get("date", "")
|
||||
|
||||
specs.append(
|
||||
SourceDocument(
|
||||
name=f"w3c-{shortname}",
|
||||
title=title,
|
||||
abstract=abstract,
|
||||
source="w3c",
|
||||
source_id=shortname,
|
||||
source_url=spec_url,
|
||||
time=date,
|
||||
doc_status=status,
|
||||
extra={"group": group_shortname},
|
||||
)
|
||||
)
|
||||
time_mod.sleep(0.3)
|
||||
|
||||
# Check pagination
|
||||
pages = data.get("pages", 1) if isinstance(data, dict) else 1
|
||||
if page >= pages:
|
||||
break
|
||||
page += 1
|
||||
time_mod.sleep(0.3)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[yellow]W3C API error for {group_shortname}: {e}[/]")
|
||||
|
||||
return specs
|
||||
|
||||
def _fetch_spec_detail(self, shortname: str) -> dict:
|
||||
"""Fetch detail for a single spec."""
|
||||
try:
|
||||
resp = self.client.get(
|
||||
f"{W3C_API}/specifications/{shortname}",
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return {
|
||||
"description": data.get("description", ""),
|
||||
"title": data.get("title", shortname),
|
||||
"editor-draft": data.get("editor-draft", ""),
|
||||
"url": data.get("_links", {}).get("latest-version", {}).get("href", ""),
|
||||
"status": data.get("_links", {}).get("latest-version", {}).get("status", ""),
|
||||
"date": data.get("_links", {}).get("latest-version", {}).get("date", ""),
|
||||
}
|
||||
except httpx.HTTPError:
|
||||
return {}
|
||||
|
||||
def download_text(self, doc: SourceDocument) -> str | None:
|
||||
"""Fetch spec URL content and strip HTML to plain text."""
|
||||
url = doc.source_url
|
||||
if not url:
|
||||
return None
|
||||
try:
|
||||
resp = self.client.get(url)
|
||||
resp.raise_for_status()
|
||||
content_type = resp.headers.get("content-type", "")
|
||||
if "html" in content_type:
|
||||
return _strip_html(resp.text)[:50000]
|
||||
return resp.text[:50000]
|
||||
except httpx.HTTPError as e:
|
||||
console.print(f"[dim]Could not download text for {doc.name}: {e}[/]")
|
||||
return None
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
@@ -449,9 +449,10 @@ class Visualizer:
|
||||
if len(G.nodes) == 0:
|
||||
raise RuntimeError(f"No edges with min_shared={min_shared}.")
|
||||
|
||||
# Get affiliations for coloring
|
||||
# Get affiliations for coloring (normalized)
|
||||
from .orgs import normalize_org
|
||||
top_authors = self.db.top_authors(limit=200)
|
||||
author_aff = {name: aff for name, aff, _, _ in top_authors}
|
||||
author_aff = {name: normalize_org(aff) for name, aff, _, _ in top_authors}
|
||||
|
||||
# Node sizing by degree
|
||||
degrees = dict(G.degree())
|
||||
@@ -650,7 +651,8 @@ class Visualizer:
|
||||
"""
|
||||
import plotly.express as px
|
||||
|
||||
orgs = self.db.top_orgs(limit=20)
|
||||
from .orgs import top_orgs_normalized
|
||||
orgs = top_orgs_normalized(self.db, limit=20)
|
||||
if not orgs:
|
||||
raise RuntimeError("No author data. Run `ietf authors --fetch` first.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user