Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams: - Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis) - Global search across drafts, ideas, authors, gaps - REST API expansion (14 endpoints, up from 3) with CSV/JSON export - Citation graph visualization (D3.js, 440 nodes, 2422 edges) - Standards readiness scoring (0-100 composite from 6 factors) - Side-by-side draft comparison view with shared/unique analysis - Annotation system (notes + tags per draft, DB-persisted) - Docker deployment (Dockerfile + docker-compose with Ollama) - Scheduled updates (cron script with log rotation) - Pipeline health dashboard (stage progress bars, cost tracking) - Test suite foundation (54 pytest tests covering DB, models, web data) Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug, source-aware analysis prompts, config env var overrides + validation, resilient batch error handling with --retry-failed, observatory --dry-run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions
--- a/src/ietf_analyzer/analyzer.py
+++ b/src/ietf_analyzer/analyzer.py
@@ -38,7 +38,7 @@ CATEGORIES_SHORT = [

 # Compact prompt — abstract only, saves ~10x tokens vs full-text
 RATE_PROMPT_COMPACT = """\
-Rate this IETF draft. JSON only.
+Rate this {doc_type}. JSON only.

 {name} | {title} | {time} | {pages}pg
 Abstract: {abstract}
@@ -51,7 +51,7 @@ JSON only, no fences."""

 # Batch prompt — rate multiple drafts in one call
 BATCH_PROMPT = """\
-Rate each IETF draft below. Return a JSON array with one object per draft, in order.
+Rate each document below. Return a JSON array with one object per draft, in order.

 {drafts_block}

@@ -62,14 +62,14 @@ Categories: {categories}
 Return ONLY a JSON array, no fences."""

 COMPARE_PROMPT = """\
-Compare these IETF drafts — overlaps, unique ideas, complementary vs competing vs redundant.
+Compare these documents — overlaps, unique ideas, complementary vs competing vs redundant.

 {drafts_section}

 Be specific about concrete mechanisms and design choices."""

 EXTRACT_IDEAS_PROMPT = """\
-Extract discrete technical ideas and mechanisms from this IETF draft.
+Extract discrete technical ideas and mechanisms from this {doc_type}.
 Return a JSON array. Each element: {{"title":"short name","description":"1-2 sentences","type":"mechanism|protocol|pattern|requirement|architecture|extension"}}

 {name} | {title} | {pages}pg
@@ -81,7 +81,7 @@ Return 1-4 ideas. Extract only TOP-LEVEL novel contributions. Do NOT list sub-fe
 JSON array only, no fences."""

 BATCH_IDEAS_PROMPT = """\
-Extract ideas from each IETF draft below. Return a JSON object mapping draft name -> array of ideas.
+Extract ideas from each document below. Return a JSON object mapping document name -> array of ideas.
 Per idea: {{"title":"short name","description":"1 sentence","type":"mechanism|protocol|pattern|requirement|architecture|extension"}}

 {drafts_block}
@@ -135,6 +135,15 @@ def _prompt_hash(text: str) -> str:
    return hashlib.sha256(text.encode()).hexdigest()[:16]


+def _doc_type_label(source: str) -> str:
+    """Return a human-readable document type based on source."""
+    labels = {
+        "ietf": "IETF draft",
+        "w3c": "W3C specification",
+    }
+    return labels.get(source, f"{source} document")
+
+
 class Analyzer:
    def __init__(self, config: Config | None = None, db: Database | None = None):
        self.config = config or Config.load()
@@ -199,6 +208,7 @@ class Analyzer:
            return None

        prompt = RATE_PROMPT_COMPACT.format(
+            doc_type=_doc_type_label(draft.source),
            name=draft.name, title=draft.title, time=draft.date,
            pages=draft.pages or "?",
            abstract=draft.abstract[:2000],
@@ -302,6 +312,7 @@ class Analyzer:

        console.print(f"Rating [bold]{len(unrated)}[/] drafts in batches of {batch_size}...")
        count = 0
+        failures: list[tuple[str, str]] = []
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
@@ -314,15 +325,29 @@ class Analyzer:
                batch = unrated[i:i + batch_size]
                names = ", ".join(d.name.split("-")[-1][:12] for d in batch)
                progress.update(task, description=f"Batch: {names}")
-                n = self.rate_batch(batch, batch_size=batch_size)
-                count += n
+                try:
+                    n = self.rate_batch(batch, batch_size=batch_size)
+                    count += n
+                except Exception as e:
+                    batch_names = [d.name for d in batch]
+                    for bn in batch_names:
+                        failures.append((bn, str(e)))
+                    console.print(f"[red]Batch failed: {e}[/]")
                progress.advance(task, advance=len(batch))

        in_tok, out_tok = self.db.total_tokens_used()
+        total_attempted = len(unrated)
        console.print(
            f"Rated [bold green]{count}[/] drafts "
            f"| Total tokens used: {in_tok:,} in + {out_tok:,} out"
        )
+        if failures:
+            console.print(
+                f"[yellow]Processed {count}/{total_attempted} drafts, "
+                f"{len(failures)} failure(s):[/]"
+            )
+            for name, err in failures[:20]:
+                console.print(f"  [red]{name}[/]: {err}")
        return count

    def extract_ideas(self, draft_name: str, use_cache: bool = True) -> list[dict] | None:
@@ -337,6 +362,7 @@ class Analyzer:
            text_excerpt = draft.full_text[:3000]

        prompt = EXTRACT_IDEAS_PROMPT.format(
+            doc_type=_doc_type_label(draft.source),
            name=draft.name, title=draft.title,
            pages=draft.pages or "?",
            abstract=draft.abstract[:2000],
@@ -451,6 +477,7 @@ class Analyzer:
            console.print(f"Extracting ideas from [bold]{len(missing)}[/] drafts ({model_label})...")

        count = 0
+        failures: list[tuple[str, str]] = []
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
@@ -465,23 +492,40 @@ class Analyzer:
                    batch = missing[i:i + batch_size]
                    names = ", ".join(n.split("-")[-1][:10] for n in batch)
                    progress.update(task, description=f"Batch: {names}")
-                    n = self.extract_ideas_batch(batch, cheap=cheap)
-                    count += n
+                    try:
+                        n = self.extract_ideas_batch(batch, cheap=cheap)
+                        count += n
+                    except Exception as e:
+                        for bn in batch:
+                            failures.append((bn, str(e)))
+                        console.print(f"[red]Batch failed: {e}[/]")
                    progress.advance(task, advance=len(batch))
            else:
                for name in missing:
                    progress.update(task, description=f"Ideas: {name.split('-')[-1][:15]}")
-                    result = self.extract_ideas(name)
-                    if result:
-                        count += 1
+                    try:
+                        result = self.extract_ideas(name)
+                        if result:
+                            count += 1
+                    except Exception as e:
+                        failures.append((name, str(e)))
+                        console.print(f"[red]Failed {name}: {e}[/]")
                    progress.advance(task)

+        total_attempted = len(missing)
        in_tok, out_tok = self.db.total_tokens_used()
        console.print(
            f"Extracted ideas from [bold green]{count}[/] drafts "
            f"({self.db.idea_count()} total ideas) "
            f"| Tokens: {in_tok:,} in + {out_tok:,} out"
        )
+        if failures:
+            console.print(
+                f"[yellow]Processed {count}/{total_attempted} drafts, "
+                f"{len(failures)} failure(s):[/]"
+            )
+            for name, err in failures[:20]:
+                console.print(f"  [red]{name}[/]: {err}")
        return count

    def gap_analysis(self) -> list[dict]:
@@ -551,28 +595,49 @@ class Analyzer:
            console.print(f"[red]Gap analysis failed: {e}[/]")
            return []

-    def compare_drafts(self, draft_names: list[str]) -> str:
-        """Compare multiple drafts and return analysis text."""
+    def compare_drafts(self, draft_names: list[str], use_cache: bool = True) -> dict:
+        """Compare multiple drafts and return structured comparison.
+
+        Returns dict with keys: text, drafts (list of names that were compared),
+        or a dict with key 'error' on failure.
+        """
+        valid_names = []
        parts = []
        for name in draft_names:
            draft = self.db.get_draft(name)
            if draft is None:
                console.print(f"[yellow]Skipping unknown draft: {name}[/]")
                continue
+            valid_names.append(name)
            parts.append(f"### {draft.title}\n**{name}**\n{draft.abstract}")

        if len(parts) < 2:
-            return "Need at least 2 valid drafts to compare."
+            return {"error": "Need at least 2 valid drafts to compare.", "drafts": valid_names}

        prompt = COMPARE_PROMPT.format(
            drafts_section="\n\n---\n\n".join(parts)
        )
+        phash = _prompt_hash(prompt)
+        cache_key = "_compare_" + "_".join(sorted(valid_names))
+
+        # Check cache
+        if use_cache:
+            cached = self.db.get_cached_response(cache_key, phash)
+            if cached:
+                return {"text": cached, "drafts": valid_names}

        try:
-            text, _, _ = self._call_claude(prompt, max_tokens=2048)
-            return text
+            text, in_tok, out_tok = self._call_claude(prompt, max_tokens=2048)
+
+            # Cache the result
+            self.db.cache_response(
+                cache_key, phash, self.config.claude_model,
+                prompt, text, in_tok, out_tok,
+            )
+
+            return {"text": text, "drafts": valid_names}
        except anthropic.APIError as e:
-            return f"Error: {e}"
+            return {"error": f"API error: {e}", "drafts": valid_names}

    def dedup_ideas(self, threshold: float = 0.85, dry_run: bool = True,
                    draft_name: str | None = None) -> dict: