Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

View File

@@ -38,7 +38,7 @@ CATEGORIES_SHORT = [
# Compact prompt — abstract only, saves ~10x tokens vs full-text
RATE_PROMPT_COMPACT = """\
Rate this IETF draft. JSON only.
Rate this {doc_type}. JSON only.
{name} | {title} | {time} | {pages}pg
Abstract: {abstract}
@@ -51,7 +51,7 @@ JSON only, no fences."""
# Batch prompt — rate multiple drafts in one call
BATCH_PROMPT = """\
Rate each IETF draft below. Return a JSON array with one object per draft, in order.
Rate each document below. Return a JSON array with one object per draft, in order.
{drafts_block}
@@ -62,14 +62,14 @@ Categories: {categories}
Return ONLY a JSON array, no fences."""
COMPARE_PROMPT = """\
Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
Compare these documents — overlaps, unique ideas, complementary vs competing vs redundant.
{drafts_section}
Be specific about concrete mechanisms and design choices."""
EXTRACT_IDEAS_PROMPT = """\
Extract discrete technical ideas and mechanisms from this IETF draft.
Extract discrete technical ideas and mechanisms from this {doc_type}.
Return a JSON array. Each element: {{"title":"short name","description":"1-2 sentences","type":"mechanism|protocol|pattern|requirement|architecture|extension"}}
{name} | {title} | {pages}pg
@@ -81,7 +81,7 @@ Return 1-4 ideas. Extract only TOP-LEVEL novel contributions. Do NOT list sub-fe
JSON array only, no fences."""
BATCH_IDEAS_PROMPT = """\
Extract ideas from each IETF draft below. Return a JSON object mapping draft name -> array of ideas.
Extract ideas from each document below. Return a JSON object mapping document name -> array of ideas.
Per idea: {{"title":"short name","description":"1 sentence","type":"mechanism|protocol|pattern|requirement|architecture|extension"}}
{drafts_block}
@@ -135,6 +135,15 @@ def _prompt_hash(text: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()[:16]
def _doc_type_label(source: str) -> str:
"""Return a human-readable document type based on source."""
labels = {
"ietf": "IETF draft",
"w3c": "W3C specification",
}
return labels.get(source, f"{source} document")
class Analyzer:
def __init__(self, config: Config | None = None, db: Database | None = None):
self.config = config or Config.load()
@@ -199,6 +208,7 @@ class Analyzer:
return None
prompt = RATE_PROMPT_COMPACT.format(
doc_type=_doc_type_label(draft.source),
name=draft.name, title=draft.title, time=draft.date,
pages=draft.pages or "?",
abstract=draft.abstract[:2000],
@@ -302,6 +312,7 @@ class Analyzer:
console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
count = 0
failures: list[tuple[str, str]] = []
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
@@ -314,15 +325,29 @@ class Analyzer:
batch = unrated[i:i + batch_size]
names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
progress.update(task, description=f"Batch: {names}")
n = self.rate_batch(batch, batch_size=batch_size)
count += n
try:
n = self.rate_batch(batch, batch_size=batch_size)
count += n
except Exception as e:
batch_names = [d.name for d in batch]
for bn in batch_names:
failures.append((bn, str(e)))
console.print(f"[red]Batch failed: {e}[/]")
progress.advance(task, advance=len(batch))
in_tok, out_tok = self.db.total_tokens_used()
total_attempted = len(unrated)
console.print(
f"Rated [bold green]{count}[/] drafts "
f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
)
if failures:
console.print(
f"[yellow]Processed {count}/{total_attempted} drafts, "
f"{len(failures)} failure(s):[/]"
)
for name, err in failures[:20]:
console.print(f" [red]{name}[/]: {err}")
return count
def extract_ideas(self, draft_name: str, use_cache: bool = True) -> list[dict] | None:
@@ -337,6 +362,7 @@ class Analyzer:
text_excerpt = draft.full_text[:3000]
prompt = EXTRACT_IDEAS_PROMPT.format(
doc_type=_doc_type_label(draft.source),
name=draft.name, title=draft.title,
pages=draft.pages or "?",
abstract=draft.abstract[:2000],
@@ -451,6 +477,7 @@ class Analyzer:
console.print(f"Extracting ideas from [bold]{len(missing)}[/] drafts ({model_label})...")
count = 0
failures: list[tuple[str, str]] = []
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
@@ -465,23 +492,40 @@ class Analyzer:
batch = missing[i:i + batch_size]
names = ", ".join(n.split("-")[-1][:10] for n in batch)
progress.update(task, description=f"Batch: {names}")
n = self.extract_ideas_batch(batch, cheap=cheap)
count += n
try:
n = self.extract_ideas_batch(batch, cheap=cheap)
count += n
except Exception as e:
for bn in batch:
failures.append((bn, str(e)))
console.print(f"[red]Batch failed: {e}[/]")
progress.advance(task, advance=len(batch))
else:
for name in missing:
progress.update(task, description=f"Ideas: {name.split('-')[-1][:15]}")
result = self.extract_ideas(name)
if result:
count += 1
try:
result = self.extract_ideas(name)
if result:
count += 1
except Exception as e:
failures.append((name, str(e)))
console.print(f"[red]Failed {name}: {e}[/]")
progress.advance(task)
total_attempted = len(missing)
in_tok, out_tok = self.db.total_tokens_used()
console.print(
f"Extracted ideas from [bold green]{count}[/] drafts "
f"({self.db.idea_count()} total ideas) "
f"| Tokens: {in_tok:,} in + {out_tok:,} out"
)
if failures:
console.print(
f"[yellow]Processed {count}/{total_attempted} drafts, "
f"{len(failures)} failure(s):[/]"
)
for name, err in failures[:20]:
console.print(f" [red]{name}[/]: {err}")
return count
def gap_analysis(self) -> list[dict]:
@@ -551,28 +595,49 @@ class Analyzer:
console.print(f"[red]Gap analysis failed: {e}[/]")
return []
def compare_drafts(self, draft_names: list[str]) -> str:
"""Compare multiple drafts and return analysis text."""
def compare_drafts(self, draft_names: list[str], use_cache: bool = True) -> dict:
"""Compare multiple drafts and return structured comparison.
Returns dict with keys: text, drafts (list of names that were compared),
or a dict with key 'error' on failure.
"""
valid_names = []
parts = []
for name in draft_names:
draft = self.db.get_draft(name)
if draft is None:
console.print(f"[yellow]Skipping unknown draft: {name}[/]")
continue
valid_names.append(name)
parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")
if len(parts) < 2:
return "Need at least 2 valid drafts to compare."
return {"error": "Need at least 2 valid drafts to compare.", "drafts": valid_names}
prompt = COMPARE_PROMPT.format(
drafts_section="\n\n---\n\n".join(parts)
)
phash = _prompt_hash(prompt)
cache_key = "_compare_" + "_".join(sorted(valid_names))
# Check cache
if use_cache:
cached = self.db.get_cached_response(cache_key, phash)
if cached:
return {"text": cached, "drafts": valid_names}
try:
text, _, _ = self._call_claude(prompt, max_tokens=2048)
return text
text, in_tok, out_tok = self._call_claude(prompt, max_tokens=2048)
# Cache the result
self.db.cache_response(
cache_key, phash, self.config.claude_model,
prompt, text, in_tok, out_tok,
)
return {"text": text, "drafts": valid_names}
except anthropic.APIError as e:
return f"Error: {e}"
return {"error": f"API error: {e}", "drafts": valid_names}
def dedup_ideas(self, threshold: float = 0.85, dry_run: bool = True,
draft_name: str | None = None) -> dict: