v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series

Gap-to-Draft Pipeline (ietf pipeline):
- Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision
- Generator produces outlines + sections using rich context with Claude
- Quality gates: novelty (embedding similarity), references, format, self-rating
- Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE)
- I-D formatter with proper headers, references, 72-char wrapping

Living Standards Observatory (ietf observatory):
- Source abstraction with IETF + W3C fetchers
- 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record
- Static GitHub Pages dashboard (explorer, gap tracker, timeline)
- Weekly CI/CD automation via GitHub Actions

Also includes:
- 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps
- Blog series (8 posts planned), reports, arXiv paper figures
- Agent team infrastructure (CLAUDE.md, scripts, dev journal)
- 5 new DB tables, schema migration, ~15 new query methods

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-04 00:48:57 +01:00
parent be9cf9c5d9
commit d6beb9c0a0
87 changed files with 24471 additions and 401 deletions

View File

@@ -0,0 +1,259 @@
"""Context builder — assembles rich context for draft generation from DB queries."""
from __future__ import annotations
import json
from pathlib import Path
import numpy as np
from rich.console import Console
from ..config import Config
from ..db import Database
console = Console()
def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
dot = np.dot(a, b)
norm = np.linalg.norm(a) * np.linalg.norm(b)
if norm == 0:
return 0.0
return float(dot / norm)
class ContextBuilder:
def __init__(self, config: Config, db: Database):
self.config = config
self.db = db
def build_context(self, gap_topic: str) -> dict:
"""Assemble full context for a gap topic. All DB queries, zero Claude calls."""
gap = self._find_gap(gap_topic)
if not gap:
console.print(f"[yellow]No gap found matching '{gap_topic}', using topic as-is[/]")
gap = {
"id": 0,
"topic": gap_topic,
"description": gap_topic,
"category": "",
"evidence": "",
"severity": "medium",
}
ideas = self._convergent_ideas(gap)
rfcs = self._rfc_foundations(gap.get("category", ""))
similar = self._similar_drafts(gap["description"])
top_rated = self._top_rated_in_category(gap.get("category", ""))
wg_context = self._wg_context()
ecosystem = self._ecosystem_vision()
siblings = self._sibling_context(gap_topic)
return {
"gap": gap,
"convergent_ideas": ideas,
"rfc_foundations": rfcs,
"similar_drafts": similar,
"top_rated": top_rated,
"wg_context": wg_context,
"ecosystem_vision": ecosystem,
"sibling_context": siblings,
}
def _find_gap(self, topic: str) -> dict | None:
"""Find a gap by topic string (fuzzy match)."""
gaps = self.db.all_gaps()
topic_lower = topic.lower()
# Exact match first
for g in gaps:
if g["topic"].lower() == topic_lower:
return g
# Substring match
for g in gaps:
if topic_lower in g["topic"].lower() or topic_lower in g["description"].lower():
return g
# Word overlap match
topic_words = set(topic_lower.split())
best = None
best_score = 0
for g in gaps:
gap_words = set(g["topic"].lower().split()) | set(g["description"].lower().split())
overlap = len(topic_words & gap_words)
if overlap > best_score:
best_score = overlap
best = g
return best if best_score >= 2 else None
def _convergent_ideas(self, gap: dict, limit: int = 20) -> list[dict]:
"""Find ideas that converge on this gap topic via keyword matching."""
all_ideas = self.db.all_ideas()
if not all_ideas:
return []
# Build search terms from gap topic + description
search_text = (gap["topic"] + " " + gap["description"]).lower()
search_words = set(search_text.split())
# Remove common words
stop_words = {"the", "a", "an", "and", "or", "in", "of", "for", "to", "is",
"are", "that", "this", "with", "not", "by", "on", "at", "from",
"as", "be", "it", "no", "but", "has", "have", "do", "does"}
search_words -= stop_words
scored = []
for idea in all_ideas:
idea_text = (idea["title"] + " " + idea["description"]).lower()
idea_words = set(idea_text.split())
overlap = len(search_words & idea_words)
if overlap >= 1:
scored.append((overlap, idea))
scored.sort(key=lambda x: x[0], reverse=True)
return [item for _, item in scored[:limit]]
def _rfc_foundations(self, category: str, limit: int = 10) -> list[tuple[str, int]]:
"""Get most-referenced RFCs, optionally filtered by category."""
top_refs = self.db.top_referenced(ref_type="rfc", limit=limit * 2)
if not category:
return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
# Filter to RFCs referenced by drafts in this category
category_lower = category.lower()
pairs = self.db.drafts_with_ratings(limit=500)
category_drafts = set()
for draft, rating in pairs:
for cat in rating.categories:
if category_lower in cat.lower():
category_drafts.add(draft.name)
if not category_drafts:
return [(ref_id, count) for ref_id, count, _ in top_refs[:limit]]
filtered = []
for ref_id, count, draft_names in top_refs:
cat_count = sum(1 for d in draft_names if d in category_drafts)
if cat_count > 0:
filtered.append((ref_id, cat_count))
filtered.sort(key=lambda x: x[1], reverse=True)
return filtered[:limit]
def _similar_drafts(self, gap_desc: str, limit: int = 8) -> list[tuple[str, float]]:
"""Find semantically similar existing drafts via embeddings."""
all_embeddings = self.db.all_embeddings()
if not all_embeddings:
return []
# Try to embed the gap description via Ollama
try:
import ollama as ollama_lib
client = ollama_lib.Client(host=self.config.ollama_url)
resp = client.embed(
model=self.config.ollama_embed_model,
input=gap_desc[:8000],
)
gap_vec = np.array(resp["embeddings"][0], dtype=np.float32)
except Exception as e:
console.print(f"[yellow]Ollama embedding failed, skipping similarity: {e}[/]")
return []
similarities = []
for name, vec in all_embeddings.items():
sim = _cosine_similarity(gap_vec, vec)
similarities.append((name, sim))
similarities.sort(key=lambda x: x[1], reverse=True)
return similarities[:limit]
def _top_rated_in_category(self, category: str, limit: int = 5) -> list[tuple]:
"""Get top-rated drafts in a category."""
pairs = self.db.drafts_with_ratings(limit=500)
if not category:
return [
(draft.name, draft.title, rating.composite_score)
for draft, rating in pairs[:limit]
]
category_lower = category.lower()
matching = []
for draft, rating in pairs:
for cat in rating.categories:
if category_lower in cat.lower():
matching.append((draft.name, draft.title, rating.composite_score))
break
return matching[:limit]
def _wg_context(self) -> str:
"""Summarize WG adoption status."""
adoption = self.db.draft_adoption_status()
wg_counts: dict[str, int] = {}
adopted_count = 0
for d in adoption:
if d["wg_adopted"]:
adopted_count += 1
wg = d["wg_name"]
wg_counts[wg] = wg_counts.get(wg, 0) + 1
total = len(adoption)
if not wg_counts:
return f"{total} drafts, none WG-adopted yet."
top_wgs = sorted(wg_counts.items(), key=lambda x: x[1], reverse=True)[:5]
wg_lines = ", ".join(f"{wg} ({n})" for wg, n in top_wgs)
return f"{total} drafts, {adopted_count} WG-adopted. Top WGs: {wg_lines}"
def _ecosystem_vision(self) -> str:
"""Load ecosystem vision document if it exists."""
vision_path = Path(self.config.data_dir) / "reports" / "holistic-agent-ecosystem-draft-outlines.md"
if not vision_path.exists():
return "(No ecosystem vision document found)"
text = vision_path.read_text()
# Return the pitch section (compact) rather than the full document
if "## 8. One-Page Pitch" in text:
pitch = text.split("## 8. One-Page Pitch")[1].strip()
return pitch[:2000]
# Fallback: return the vision summary
if "## 1. Vision Summary" in text:
parts = text.split("## 1. Vision Summary")[1]
if "## 2." in parts:
parts = parts.split("## 2.")[0]
return parts.strip()[:2000]
return text[:2000]
def _sibling_context(self, gap_topic: str) -> list[dict]:
"""Get outlines of sibling drafts from the same family."""
# Check all family drafts
families = self.db.get_generated_drafts()
if not families:
return []
# Find which family this gap_topic belongs to
topic_lower = gap_topic.lower()
family_name = ""
for gd in families:
if topic_lower in gd.get("gap_topic", "").lower():
family_name = gd.get("family_name", "")
break
if not family_name:
return []
siblings = self.db.get_family_drafts(family_name)
result = []
for s in siblings:
if s.get("gap_topic", "").lower() == topic_lower:
continue # Skip self
outline = {}
if s.get("outline_json"):
try:
outline = json.loads(s["outline_json"]) if isinstance(s["outline_json"], str) else s["outline_json"]
except (json.JSONDecodeError, TypeError):
pass
result.append({
"role": s.get("family_role", ""),
"title": s.get("title", ""),
"abstract": s.get("abstract", ""),
"outline": outline,
})
return result