Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

View File

@@ -12,7 +12,11 @@ from pathlib import Path
_project_root = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(_project_root / "src"))
from flask import Flask, render_template, request, jsonify, abort, g
import csv
import io
import json
from flask import Flask, render_template, request, jsonify, abort, g, Response
from webui.data import (
get_db,
@@ -39,6 +43,10 @@ from webui.data import (
get_idea_clusters,
get_monitor_status,
get_author_network_full,
get_citation_graph,
get_comparison_data,
get_ask_data,
global_search,
)
app = Flask(
@@ -91,6 +99,7 @@ def drafts():
page = request.args.get("page", 1, type=int)
search = request.args.get("q", "")
category = request.args.get("cat", "")
source = request.args.get("source", "")
min_score = request.args.get("min_score", 0.0, type=float)
sort = request.args.get("sort", "score")
sort_dir = request.args.get("dir", "desc")
@@ -103,6 +112,7 @@ def drafts():
min_score=min_score,
sort=sort,
sort_dir=sort_dir,
source=source,
)
categories = get_category_counts(db())
return render_template(
@@ -111,6 +121,7 @@ def drafts():
categories=categories,
search=search,
current_cat=category,
current_source=source,
min_score=min_score,
sort=sort,
sort_dir=sort_dir,
@@ -272,6 +283,12 @@ def authors():
)
@app.route("/citations")
def citations():
graph = get_citation_graph(db())
return render_template("citations.html", graph=graph)
@app.route("/monitor")
def monitor_page():
status = get_monitor_status(db())
@@ -294,21 +311,121 @@ def datenschutz():
return render_template("datenschutz.html")
@app.route("/search")
def search():
q = request.args.get("q", "").strip()
results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
total = sum(len(v) for v in results.values())
return render_template("search_results.html", query=q, results=results, total=total)
@app.route("/ask")
def ask_page():
question = request.args.get("q", "")
result = None
if question:
top_k = request.args.get("top", 5, type=int)
result = get_ask_data(db(), question, top_k=top_k)
return render_template("ask.html", question=question, result=result)
@app.route("/api/ask", methods=["POST"])
def api_ask():
"""Answer a question via hybrid search + Claude. Returns JSON."""
data = request.get_json(force=True, silent=True)
if not data or "question" not in data:
return jsonify({"error": "Missing 'question' in request body"}), 400
question = data["question"]
top_k = data.get("top_k", 5)
cheap = data.get("cheap", True)
result = get_ask_data(db(), question, top_k=top_k, cheap=cheap)
return jsonify(result)
@app.route("/compare")
def compare_page():
draft_names = request.args.get("drafts", "")
names = [n.strip() for n in draft_names.split(",") if n.strip()] if draft_names else []
data = None
if len(names) >= 2:
data = get_comparison_data(db(), names)
return render_template("comparison.html", names=names, data=data)
@app.route("/api/compare", methods=["POST"])
def api_compare():
"""Run Claude comparison for drafts. Returns JSON with comparison text."""
req_data = request.get_json(force=True, silent=True)
if not req_data or "drafts" not in req_data:
return jsonify({"error": "Missing 'drafts' in request body"}), 400
names = req_data["drafts"]
if len(names) < 2:
return jsonify({"error": "Need at least 2 drafts to compare"}), 400
try:
from ietf_analyzer.config import Config
from ietf_analyzer.analyzer import Analyzer
cfg = Config.load()
database = db()
analyzer = Analyzer(cfg, database)
result = analyzer.compare_drafts(names)
return jsonify(result)
except Exception as e:
return jsonify({"error": str(e)}), 500
# --- API endpoints for AJAX (used by client-side charts) ---
def _to_csv_response(rows: list[dict], filename: str = "export.csv") -> Response:
"""Convert a list of dicts to a CSV download response."""
if not rows:
return Response("", mimetype="text/csv",
headers={"Content-Disposition": f"attachment; filename={filename}"})
si = io.StringIO()
writer = csv.DictWriter(si, fieldnames=rows[0].keys())
writer.writeheader()
for row in rows:
# Flatten any list/dict values to JSON strings
flat = {}
for k, v in row.items():
if isinstance(v, (list, dict)):
flat[k] = json.dumps(v)
else:
flat[k] = v
writer.writerow(flat)
return Response(si.getvalue(), mimetype="text/csv",
headers={"Content-Disposition": f"attachment; filename={filename}"})
def _results_to_csv(results: dict) -> Response:
"""Convert global search results (multi-category) to a single CSV."""
rows = []
for category, items in results.items():
for item in items:
row = {"_category": category}
row.update(item)
rows.append(row)
return _to_csv_response(rows, "search_results.csv")
@app.route("/api/drafts")
def api_drafts():
page = request.args.get("page", 1, type=int)
search = request.args.get("q", "")
category = request.args.get("cat", "")
source = request.args.get("source", "")
min_score = request.args.get("min_score", 0.0, type=float)
sort = request.args.get("sort", "score")
sort_dir = request.args.get("dir", "desc")
return jsonify(
get_drafts_page(db(), page=page, search=search, category=category,
min_score=min_score, sort=sort, sort_dir=sort_dir)
)
data = get_drafts_page(db(), page=page, search=search, category=category,
min_score=min_score, sort=sort, sort_dir=sort_dir,
source=source)
if request.args.get("format") == "csv":
return _to_csv_response(data.get("drafts", []), "drafts.csv")
return jsonify(data)
@app.route("/api/stats")
@@ -321,6 +438,148 @@ def api_author_network():
return jsonify(get_author_network_full(db()))
@app.route("/api/citations")
def api_citations():
min_refs = request.args.get("min_refs", 2, type=int)
return jsonify(get_citation_graph(db(), min_refs=min_refs))
@app.route("/api/search")
def api_search():
q = request.args.get("q", "").strip()
results = global_search(db(), q) if q else {"drafts": [], "ideas": [], "authors": [], "gaps": []}
if request.args.get("format") == "csv":
return _results_to_csv(results)
return jsonify(results)
@app.route("/api/ideas")
def api_ideas():
data = get_ideas_by_type(db())
if request.args.get("format") == "csv":
return _to_csv_response(data.get("ideas", []), "ideas.csv")
return jsonify(data)
@app.route("/api/gaps")
def api_gaps():
data = get_all_gaps(db())
if request.args.get("format") == "csv":
return _to_csv_response(data, "gaps.csv")
return jsonify(data)
@app.route("/api/gaps/<int:gap_id>")
def api_gap_detail(gap_id: int):
gap = get_gap_detail(db(), gap_id)
if not gap:
return jsonify({"error": "Gap not found"}), 404
return jsonify(gap)
@app.route("/api/ratings")
def api_ratings():
data = get_rating_distributions(db())
if request.args.get("format") == "csv":
# Transpose columnar data to rows
rows = []
for i in range(len(data.get("names", []))):
rows.append({
"name": data["names"][i],
"score": data["scores"][i],
"novelty": data["novelty"][i],
"maturity": data["maturity"][i],
"overlap": data["overlap"][i],
"momentum": data["momentum"][i],
"relevance": data["relevance"][i],
"category": data["categories"][i],
})
return _to_csv_response(rows, "ratings.csv")
return jsonify(data)
@app.route("/api/timeline")
def api_timeline():
data = get_timeline_data(db())
return jsonify(data)
@app.route("/api/landscape")
def api_landscape():
data = get_landscape_tsne(db())
if request.args.get("format") == "csv":
return _to_csv_response(data, "landscape.csv")
return jsonify(data)
@app.route("/api/similarity")
def api_similarity():
data = get_similarity_graph(db())
return jsonify(data)
@app.route("/api/idea-clusters")
def api_idea_clusters():
data = get_idea_clusters(db())
return jsonify(data)
@app.route("/api/monitor")
def api_monitor():
data = get_monitor_status(db())
return jsonify(data)
@app.route("/api/drafts/<path:name>")
def api_draft_detail(name: str):
detail = get_draft_detail(db(), name)
if not detail:
return jsonify({"error": "Draft not found"}), 404
return jsonify(detail)
@app.route("/api/categories")
def api_categories():
data = get_category_counts(db())
if request.args.get("format") == "csv":
rows = [{"category": k, "count": v} for k, v in data.items()]
return _to_csv_response(rows, "categories.csv")
return jsonify(data)
@app.route("/api/drafts/<path:name>/annotate", methods=["POST"])
def api_annotate(name: str):
"""Add or update annotation for a draft."""
import json as _json
database = db()
draft = database.get_draft(name)
if not draft:
return jsonify({"error": "Draft not found"}), 404
data = request.get_json(force=True, silent=True)
if not data:
return jsonify({"error": "Invalid JSON body"}), 400
note = data.get("note")
tags = data.get("tags")
add_tag = data.get("add_tag")
remove_tag = data.get("remove_tag")
# Handle add/remove tag operations
if add_tag or remove_tag:
existing = database.get_annotation(name)
current_tags = existing["tags"] if existing else []
if add_tag and add_tag not in current_tags:
current_tags.append(add_tag)
if remove_tag and remove_tag in current_tags:
current_tags.remove(remove_tag)
tags = current_tags
database.upsert_annotation(name, note=note, tags=tags)
annotation = database.get_annotation(name)
return jsonify({"success": True, "annotation": annotation})
if __name__ == "__main__":
print("Starting IETF Draft Analyzer Dashboard on http://127.0.0.1:5000")
app.run(debug=True, host="127.0.0.1", port=5000)