Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams: - Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis) - Global search across drafts, ideas, authors, gaps - REST API expansion (14 endpoints, up from 3) with CSV/JSON export - Citation graph visualization (D3.js, 440 nodes, 2422 edges) - Standards readiness scoring (0-100 composite from 6 factors) - Side-by-side draft comparison view with shared/unique analysis - Annotation system (notes + tags per draft, DB-persisted) - Docker deployment (Dockerfile + docker-compose with Ollama) - Scheduled updates (cron script with log rotation) - Pipeline health dashboard (stage progress bars, cost tracking) - Test suite foundation (54 pytest tests covering DB, models, web data) Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug, source-aware analysis prompts, config env var overrides + validation, resilient batch error handling with --retry-failed, observatory --dry-run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions
--- a/src/ietf_analyzer/db.py
+++ b/src/ietf_analyzer/db.py
@@ -192,6 +192,17 @@ CREATE TABLE IF NOT EXISTS gap_history (
    recorded_at TEXT
 );

+-- Annotations (user notes + tags per draft)
+CREATE TABLE IF NOT EXISTS annotations (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    draft_name TEXT NOT NULL REFERENCES drafts(name),
+    note TEXT DEFAULT '',
+    tags TEXT DEFAULT '[]',
+    created_at TEXT,
+    updated_at TEXT,
+    UNIQUE(draft_name)
+);
+
 -- Monitor runs
 CREATE TABLE IF NOT EXISTS monitor_runs (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -529,14 +540,17 @@ class Database:
            ORDER BY da.author_order""",
            (draft_name,),
        ).fetchall()
-        cols = rows[0].keys() if rows else []
-        return [Author(
-            person_id=r["person_id"], name=r["name"],
-            ascii_name=r["ascii_name"] if "ascii_name" in cols else "",
-            affiliation=r["affiliation"] if "affiliation" in cols else "",
-            resource_uri=r["resource_uri"] if "resource_uri" in cols else "",
-            fetched_at=r["fetched_at"] if "fetched_at" in cols else None,
-        ) for r in rows]
+        results = []
+        for r in rows:
+            d = dict(r)
+            results.append(Author(
+                person_id=d["person_id"], name=d["name"],
+                ascii_name=d.get("ascii_name", ""),
+                affiliation=d.get("affiliation", ""),
+                resource_uri=d.get("resource_uri", ""),
+                fetched_at=d.get("fetched_at"),
+            ))
+        return results

    def drafts_without_authors(self, limit: int = 500) -> list[str]:
        rows = self.conn.execute(
@@ -681,7 +695,8 @@ class Database:
            "SELECT * FROM ideas WHERE draft_name = ?", (draft_name,)
        ).fetchall()
        return [{"id": r["id"], "title": r["title"], "description": r["description"],
-                 "type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
+                 "type": r["idea_type"], "draft_name": r["draft_name"],
+                 "novelty_score": r["novelty_score"]} for r in rows]

    def delete_idea(self, idea_id: int) -> None:
        """Delete a single idea and its embedding by ID."""
@@ -706,7 +721,8 @@ class Database:
            "SELECT * FROM ideas ORDER BY draft_name"
        ).fetchall()
        return [{"title": r["title"], "description": r["description"],
-                 "type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
+                 "type": r["idea_type"], "draft_name": r["draft_name"],
+                 "novelty_score": r["novelty_score"]} for r in rows]

    def idea_count(self) -> int:
        return self.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0]
@@ -1380,6 +1396,75 @@ class Database:
        ).fetchone()
        return dict(row) if row else None

+    # --- Annotations ---
+
+    def upsert_annotation(self, draft_name: str, note: str | None = None, tags: list[str] | None = None) -> None:
+        """Insert or update an annotation for a draft."""
+        now = datetime.now(timezone.utc).isoformat()
+        existing = self.conn.execute(
+            "SELECT id, note, tags FROM annotations WHERE draft_name = ?",
+            (draft_name,),
+        ).fetchone()
+        if existing:
+            current_note = note if note is not None else existing["note"]
+            current_tags = tags if tags is not None else json.loads(existing["tags"] or "[]")
+            self.conn.execute(
+                "UPDATE annotations SET note = ?, tags = ?, updated_at = ? WHERE draft_name = ?",
+                (current_note, json.dumps(current_tags), now, draft_name),
+            )
+        else:
+            self.conn.execute(
+                """INSERT INTO annotations (draft_name, note, tags, created_at, updated_at)
+                VALUES (?, ?, ?, ?, ?)""",
+                (draft_name, note or "", json.dumps(tags or []), now, now),
+            )
+        self.conn.commit()
+
+    def get_annotation(self, draft_name: str) -> dict | None:
+        """Return annotation for a draft, or None."""
+        row = self.conn.execute(
+            "SELECT * FROM annotations WHERE draft_name = ?", (draft_name,)
+        ).fetchone()
+        if not row:
+            return None
+        return {
+            "id": row["id"],
+            "draft_name": row["draft_name"],
+            "note": row["note"],
+            "tags": json.loads(row["tags"] or "[]"),
+            "created_at": row["created_at"],
+            "updated_at": row["updated_at"],
+        }
+
+    def get_all_annotations(self) -> list[dict]:
+        """Return all annotations."""
+        rows = self.conn.execute(
+            "SELECT * FROM annotations ORDER BY updated_at DESC"
+        ).fetchall()
+        return [
+            {
+                "id": r["id"],
+                "draft_name": r["draft_name"],
+                "note": r["note"],
+                "tags": json.loads(r["tags"] or "[]"),
+                "created_at": r["created_at"],
+                "updated_at": r["updated_at"],
+            }
+            for r in rows
+        ]
+
+    def search_by_tag(self, tag: str) -> list[str]:
+        """Return draft names that have a specific tag in their annotation."""
+        rows = self.conn.execute(
+            "SELECT draft_name, tags FROM annotations"
+        ).fetchall()
+        results = []
+        for r in rows:
+            tags = json.loads(r["tags"] or "[]")
+            if tag in tags:
+                results.append(r["draft_name"])
+        return results
+
    # --- Helpers ---

    @staticmethod