Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

View File

@@ -192,6 +192,17 @@ CREATE TABLE IF NOT EXISTS gap_history (
recorded_at TEXT
);
-- Annotations (user notes + tags per draft)
CREATE TABLE IF NOT EXISTS annotations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
draft_name TEXT NOT NULL REFERENCES drafts(name),
note TEXT DEFAULT '',
tags TEXT DEFAULT '[]',
created_at TEXT,
updated_at TEXT,
UNIQUE(draft_name)
);
-- Monitor runs
CREATE TABLE IF NOT EXISTS monitor_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -529,14 +540,17 @@ class Database:
ORDER BY da.author_order""",
(draft_name,),
).fetchall()
cols = rows[0].keys() if rows else []
return [Author(
person_id=r["person_id"], name=r["name"],
ascii_name=r["ascii_name"] if "ascii_name" in cols else "",
affiliation=r["affiliation"] if "affiliation" in cols else "",
resource_uri=r["resource_uri"] if "resource_uri" in cols else "",
fetched_at=r["fetched_at"] if "fetched_at" in cols else None,
) for r in rows]
results = []
for r in rows:
d = dict(r)
results.append(Author(
person_id=d["person_id"], name=d["name"],
ascii_name=d.get("ascii_name", ""),
affiliation=d.get("affiliation", ""),
resource_uri=d.get("resource_uri", ""),
fetched_at=d.get("fetched_at"),
))
return results
def drafts_without_authors(self, limit: int = 500) -> list[str]:
rows = self.conn.execute(
@@ -681,7 +695,8 @@ class Database:
"SELECT * FROM ideas WHERE draft_name = ?", (draft_name,)
).fetchall()
return [{"id": r["id"], "title": r["title"], "description": r["description"],
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
"type": r["idea_type"], "draft_name": r["draft_name"],
"novelty_score": r["novelty_score"]} for r in rows]
def delete_idea(self, idea_id: int) -> None:
"""Delete a single idea and its embedding by ID."""
@@ -706,7 +721,8 @@ class Database:
"SELECT * FROM ideas ORDER BY draft_name"
).fetchall()
return [{"title": r["title"], "description": r["description"],
"type": r["idea_type"], "draft_name": r["draft_name"]} for r in rows]
"type": r["idea_type"], "draft_name": r["draft_name"],
"novelty_score": r["novelty_score"]} for r in rows]
def idea_count(self) -> int:
return self.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0]
@@ -1380,6 +1396,75 @@ class Database:
).fetchone()
return dict(row) if row else None
# --- Annotations ---
def upsert_annotation(self, draft_name: str, note: str | None = None, tags: list[str] | None = None) -> None:
"""Insert or update an annotation for a draft."""
now = datetime.now(timezone.utc).isoformat()
existing = self.conn.execute(
"SELECT id, note, tags FROM annotations WHERE draft_name = ?",
(draft_name,),
).fetchone()
if existing:
current_note = note if note is not None else existing["note"]
current_tags = tags if tags is not None else json.loads(existing["tags"] or "[]")
self.conn.execute(
"UPDATE annotations SET note = ?, tags = ?, updated_at = ? WHERE draft_name = ?",
(current_note, json.dumps(current_tags), now, draft_name),
)
else:
self.conn.execute(
"""INSERT INTO annotations (draft_name, note, tags, created_at, updated_at)
VALUES (?, ?, ?, ?, ?)""",
(draft_name, note or "", json.dumps(tags or []), now, now),
)
self.conn.commit()
def get_annotation(self, draft_name: str) -> dict | None:
"""Return annotation for a draft, or None."""
row = self.conn.execute(
"SELECT * FROM annotations WHERE draft_name = ?", (draft_name,)
).fetchone()
if not row:
return None
return {
"id": row["id"],
"draft_name": row["draft_name"],
"note": row["note"],
"tags": json.loads(row["tags"] or "[]"),
"created_at": row["created_at"],
"updated_at": row["updated_at"],
}
def get_all_annotations(self) -> list[dict]:
"""Return all annotations."""
rows = self.conn.execute(
"SELECT * FROM annotations ORDER BY updated_at DESC"
).fetchall()
return [
{
"id": r["id"],
"draft_name": r["draft_name"],
"note": r["note"],
"tags": json.loads(r["tags"] or "[]"),
"created_at": r["created_at"],
"updated_at": r["updated_at"],
}
for r in rows
]
def search_by_tag(self, tag: str) -> list[str]:
"""Return draft names that have a specific tag in their annotation."""
rows = self.conn.execute(
"SELECT draft_name, tags FROM annotations"
).fetchall()
results = []
for r in rows:
tags = json.loads(r["tags"] or "[]")
if tag in tags:
results.append(r["draft_name"])
return results
# --- Helpers ---
@staticmethod