Complete remaining medium/low issues: performance, CLI, types, CI, tests

Performance:
- Batch readiness computation (~200 queries → ~6 per page)
- Batch draft lookup in author network (N+1 → single query)
- File-based similarity matrix cache (.npy + metadata sidecar)
- 5-minute TTL embedding cache for search queries

CLI quality:
- Add pass_cfg_db decorator, convert ~30 commands to shared config/db lifecycle
- Add --dry-run to analyze, embed, embed-ideas, ideas, gaps commands
- Move 15+ in-function imports to top of data.py

Types & documentation:
- Add 16 TypedDicts to data.py, annotate 12 function return types
- Add ethics section to Post 06 (premature standardization, power asymmetry)
- Add EU AI Act Article 43 conformity mapping to Post 06
- Add NIS2 and CRA references to Post 04

CI & testing:
- Add GitHub Actions CI workflow (Python 3.11+3.12, ruff, pytest)
- Add API documentation for all 20 endpoints (data/reports/api-docs.md)
- Add 41 new tests (test_analyzer.py, test_search.py) — 64 total pass

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-08 14:06:54 +01:00
parent e7527ad68e
commit 20c45a7eba
14 changed files with 2305 additions and 1238 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -13,16 +13,15 @@ CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
DEFAULT_KEYWORDS = [
"agent",
"ai-agent",
"llm",
"autonomous",
"machine-learning",
"artificial-intelligence",
"mcp",
"agentic",
"autonomous",
"mcp",
"inference",
"generative",
"intelligent",
"aipref",
"large language model",
"multi-agent",
"trustworth",
]
# Environment variable overrides (env var name -> config field name)
@@ -39,6 +38,7 @@ class Config:
db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
ollama_url: str = "http://localhost:11434"
ollama_embed_model: str = "nomic-embed-text"
ollama_classify_model: str = "llama3.2"
claude_model: str = "claude-sonnet-4-20250514"
claude_model_cheap: str = "claude-haiku-4-5-20251001"
search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))

View File

@@ -326,6 +326,23 @@ class Database:
return None
return self._row_to_draft(row)
def get_drafts_by_names(self, names: list[str]) -> dict[str, "Draft"]:
"""Batch-fetch drafts by name. Returns {name: Draft} dict."""
if not names:
return {}
result = {}
# SQLite has a variable limit (~999), so chunk if needed
for i in range(0, len(names), 900):
chunk = names[i : i + 900]
placeholders = ",".join("?" for _ in chunk)
rows = self.conn.execute(
f"SELECT * FROM drafts WHERE name IN ({placeholders})", chunk
).fetchall()
for r in rows:
d = self._row_to_draft(r)
result[d.name] = d
return result
def list_drafts(
self,
limit: int = 100,

View File

@@ -2,6 +2,10 @@
from __future__ import annotations
import hashlib
import json
from pathlib import Path
import numpy as np
import ollama as ollama_lib
from rich.console import Console
@@ -111,16 +115,49 @@ class Embedder:
return similarities[:top_n]
def similarity_matrix(self) -> tuple[list[str], np.ndarray]:
"""Compute pairwise similarity matrix for all embedded drafts."""
"""Compute pairwise similarity matrix for all embedded drafts.
Uses a file-based cache keyed by the hash of embedding draft names.
If the set of embedded drafts hasn't changed, the cached matrix is
reloaded from disk instead of recomputing O(n^2) cosine similarities.
"""
all_embeddings = self.db.all_embeddings()
names = sorted(all_embeddings.keys())
n = len(names)
# Build cache key from sorted draft names
names_hash = hashlib.sha256("\n".join(names).encode()).hexdigest()[:16]
cache_dir = Path(self.config.db_path).parent / ".cache"
cache_meta = cache_dir / f"sim_matrix_{names_hash}.json"
cache_npy = cache_dir / f"sim_matrix_{names_hash}.npy"
# Try loading from cache
if cache_meta.exists() and cache_npy.exists():
try:
cached_names = json.loads(cache_meta.read_text())
if cached_names == names:
matrix = np.load(cache_npy)
if matrix.shape == (n, n):
return names, matrix
except Exception:
pass # Cache corrupted, recompute
# Compute fresh
matrix = np.zeros((n, n), dtype=np.float32)
for i in range(n):
for j in range(i, n):
sim = _cosine_similarity(all_embeddings[names[i]], all_embeddings[names[j]])
matrix[i, j] = sim
matrix[j, i] = sim
# Save to cache
try:
cache_dir.mkdir(exist_ok=True)
np.save(cache_npy, matrix)
cache_meta.write_text(json.dumps(names))
except Exception:
pass # Non-fatal if caching fails
return names, matrix
def find_clusters(self, threshold: float = 0.85) -> list[list[str]]:

View File

@@ -100,3 +100,136 @@ def compute_readiness(db, draft_name: str) -> dict:
f["contribution"] = round(f["value"] * f["weight"] * 100, 1)
return {"score": score, "factors": factors}
def compute_readiness_batch(db, draft_names: list[str]) -> dict[str, dict]:
"""Batch-compute readiness for multiple drafts using bulk queries.
Returns {draft_name: {score, factors}} — same format as compute_readiness.
Reduces ~6 queries per draft to ~6 queries total.
"""
if not draft_names:
return {}
# Batch-load drafts
drafts_map = db.get_drafts_by_names(draft_names)
# Batch-load ref counts per draft
ref_counts: dict[str, int] = {}
rows = db.conn.execute(
"SELECT draft_name, COUNT(*) as cnt FROM draft_refs GROUP BY draft_name"
).fetchall()
for r in rows:
ref_counts[r["draft_name"]] = r["cnt"]
# Max refs across corpus (single query)
max_refs_row = db.conn.execute(
"SELECT MAX(cnt) FROM (SELECT COUNT(*) as cnt FROM draft_refs GROUP BY draft_name)"
).fetchone()
max_refs = (max_refs_row[0] or 1) if max_refs_row else 1
# Batch-load cited-by counts
cited_by_counts: dict[str, int] = {}
rows = db.conn.execute(
"SELECT ref_id, COUNT(DISTINCT draft_name) as cnt FROM draft_refs "
"WHERE ref_type = 'draft' GROUP BY ref_id"
).fetchall()
for r in rows:
cited_by_counts[r["ref_id"]] = r["cnt"]
# Batch-load author experience: person_id -> draft count
author_draft_counts: dict[int, int] = {}
rows = db.conn.execute(
"SELECT person_id, COUNT(*) as cnt FROM draft_authors GROUP BY person_id"
).fetchall()
for r in rows:
author_draft_counts[r["person_id"]] = r["cnt"]
# Batch-load draft->author mappings
draft_authors: dict[str, list[int]] = {}
rows = db.conn.execute(
"SELECT draft_name, person_id FROM draft_authors"
).fetchall()
for r in rows:
draft_authors.setdefault(r["draft_name"], []).append(r["person_id"])
# Batch-load ratings (momentum)
ratings_map: dict[str, float] = {}
rows = db.conn.execute(
"SELECT draft_name, momentum FROM ratings"
).fetchall()
for r in rows:
ratings_map[r["draft_name"]] = r["momentum"]
# Now compute readiness for each draft using pre-loaded data
results = {}
for name in draft_names:
draft = drafts_map.get(name)
if not draft:
results[name] = {"score": 0, "factors": {}}
continue
factors = {}
# 1. WG Adopted
wg_val = 1.0 if name.startswith("draft-ietf-") else 0.0
factors["wg_adopted"] = {"value": wg_val, "weight": 0.25,
"label": "WG Adopted",
"detail": "draft-ietf-*" if wg_val else "individual"}
# 2. Revision Maturity
try:
rev_num = int(draft.rev) if draft.rev else 0
except (ValueError, TypeError):
rev_num = 0
rev_val = min(rev_num / 5.0, 1.0)
factors["revision_maturity"] = {"value": round(rev_val, 3), "weight": 0.15,
"label": "Revision Maturity",
"detail": f"rev {rev_num}"}
# 3. Reference Density
ref_count = ref_counts.get(name, 0)
ref_val = min(ref_count / max_refs, 1.0)
factors["reference_density"] = {"value": round(ref_val, 3), "weight": 0.15,
"label": "Reference Density",
"detail": f"{ref_count} refs (max {max_refs})"}
# 4. Cited By Count
cited_by = cited_by_counts.get(name, 0)
cited_val = min(cited_by / 5.0, 1.0)
factors["cited_by_count"] = {"value": round(cited_val, 3), "weight": 0.15,
"label": "Cited By Others",
"detail": f"{cited_by} draft(s)"}
# 5. Author Experience
person_ids = draft_authors.get(name, [])
if person_ids:
counts = [author_draft_counts.get(pid, 1) for pid in person_ids]
avg_exp = sum(counts) / len(counts)
exp_val = min(avg_exp / 5.0, 1.0)
else:
exp_val = 0.0
avg_exp = 0
factors["author_experience"] = {"value": round(exp_val, 3), "weight": 0.15,
"label": "Author Experience",
"detail": f"avg {avg_exp:.1f} drafts/author"}
# 6. Momentum Rating
momentum = ratings_map.get(name)
if momentum is not None:
mom_val = (momentum - 1) / 4.0
else:
mom_val = 0.0
factors["momentum_rating"] = {"value": round(mom_val, 3), "weight": 0.15,
"label": "Momentum",
"detail": f"{momentum}/5" if momentum else "unrated"}
# Compute weighted score
total = sum(f["value"] * f["weight"] for f in factors.values())
score = round(total * 100, 1)
for f in factors.values():
f["contribution"] = round(f["value"] * f["weight"] * 100, 1)
results[name] = {"score": score, "factors": factors}
return results

View File

@@ -4,6 +4,7 @@ from __future__ import annotations
import hashlib
import re
import time
from collections import defaultdict
import numpy as np
@@ -50,6 +51,9 @@ class HybridSearch:
self.db = db
self._embedder = embedder
self._ollama_available: bool | None = None
self._embeddings_cache: dict[str, np.ndarray] | None = None
self._embeddings_cache_time: float = 0
self._EMBEDDINGS_TTL: float = 300 # 5 minutes
@property
def embedder(self):
@@ -79,6 +83,16 @@ class HybridSearch:
self._ollama_available = False
return self._ollama_available
def _get_all_embeddings(self) -> dict[str, np.ndarray]:
"""Return all embeddings, cached with TTL to avoid reloading on every query."""
now = time.monotonic()
if (self._embeddings_cache is not None
and now - self._embeddings_cache_time < self._EMBEDDINGS_TTL):
return self._embeddings_cache
self._embeddings_cache = self.db.all_embeddings()
self._embeddings_cache_time = now
return self._embeddings_cache
def search(self, query: str, top_k: int = 10) -> list[dict]:
"""Combine FTS5 keyword search + embedding similarity search.
@@ -144,7 +158,7 @@ class HybridSearch:
self._ollama_available = False
return []
all_embeddings = self.db.all_embeddings()
all_embeddings = self._get_all_embeddings()
if not all_embeddings:
return []

View File

@@ -7,11 +7,176 @@ ready for JSON serialization or Jinja2 template rendering.
from __future__ import annotations
import json
import re
import sys
import time
from collections import Counter, defaultdict
from functools import lru_cache
from pathlib import Path
from typing import TypedDict
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import TSNE
from sklearn.preprocessing import normalize as sk_normalize
# ---------------------------------------------------------------------------
# TypedDicts for common return shapes
# ---------------------------------------------------------------------------
class OverviewStats(TypedDict):
"""High-level dashboard statistics from :func:`get_overview_stats`."""
total_drafts: int
rated_count: int
author_count: int
idea_count: int
gap_count: int
input_tokens: int
output_tokens: int
false_positive_count: int
class DraftListItem(TypedDict):
"""Single draft in the paginated listing from :func:`get_drafts_page`."""
name: str
title: str
date: str | None
url: str
pages: int
group: str
source: str
score: float
novelty: float
maturity: float
overlap: float
momentum: float
relevance: float
categories: list[str]
summary: str
readiness: float
class DraftsPage(TypedDict):
"""Paginated draft listing from :func:`get_drafts_page`."""
drafts: list[DraftListItem]
total: int
page: int
per_page: int
pages: int
class AuthorInfo(TypedDict):
"""Author entry from :func:`get_top_authors`."""
name: str
affiliation: str
draft_count: int
drafts: list[str]
class AuthorNetworkNode(TypedDict):
"""Node in the author network graph."""
id: str
name: str
org: str
draft_count: int
avg_score: float
drafts: list[str]
class AuthorNetworkEdge(TypedDict):
"""Edge in the author network graph."""
source: str
target: str
weight: int
class AuthorCluster(TypedDict):
"""Cluster in the author network."""
id: int
members: list[str]
org_mix: dict[str, int]
size: int
drafts: list[dict[str, str]]
draft_count: int
class AuthorNetwork(TypedDict):
"""Full author network from :func:`get_author_network_full`."""
nodes: list[AuthorNetworkNode]
edges: list[AuthorNetworkEdge]
clusters: list[AuthorCluster]
class SimilarityGraphStats(TypedDict):
"""Stats sub-dict in similarity graph."""
node_count: int
edge_count: int
avg_similarity: float
class SimilarityGraph(TypedDict):
"""Draft similarity network from :func:`get_similarity_graph`."""
nodes: list[dict]
edges: list[dict]
stats: SimilarityGraphStats
class TimelineData(TypedDict):
"""Monthly category counts from :func:`get_timeline_data`."""
months: list[str]
series: dict[str, list[int]]
categories: list[str]
class MonitorCost(TypedDict):
"""Cost sub-dict in monitor status."""
input_tokens: int
output_tokens: int
estimated_usd: float
class MonitorPipeline(TypedDict):
"""Pipeline sub-dict in monitor status."""
total_drafts: int
rated: int
embedded: int
with_ideas: int
idea_total: int
gap_count: int
class MonitorStatus(TypedDict):
"""Monitor status from :func:`get_monitor_status`."""
last_run: dict | None
runs: list[dict]
unprocessed: dict[str, int]
total_runs: int
pipeline: MonitorPipeline
cost: MonitorCost
class SearchResults(TypedDict):
"""Global search results from :func:`global_search`."""
drafts: list[dict]
ideas: list[dict]
authors: list[dict]
gaps: list[dict]
class CitationGraphStats(TypedDict):
"""Stats sub-dict in citation graph."""
node_count: int
edge_count: int
rfc_count: int
draft_count: int
class CitationGraph(TypedDict):
"""Citation network from :func:`get_citation_graph`."""
nodes: list[dict]
edges: list[dict]
stats: CitationGraphStats
# Add project root to path so we can import ietf_analyzer
_project_root = Path(__file__).resolve().parent.parent.parent
@@ -20,6 +185,8 @@ if str(_project_root) not in sys.path:
from ietf_analyzer.config import Config
from ietf_analyzer.db import Database
from ietf_analyzer.readiness import compute_readiness, compute_readiness_batch
from ietf_analyzer.search import HybridSearch
def _extract_month(time_str: str | None) -> str:
"""Normalize a date string to YYYY-MM format."""
@@ -55,7 +222,7 @@ def get_db() -> Database:
return Database(config)
def get_overview_stats(db: Database) -> dict:
def get_overview_stats(db: Database) -> OverviewStats:
"""Return high-level stats for the dashboard home page.
Excludes drafts flagged as false positives from rated counts.
@@ -204,7 +371,7 @@ def get_drafts_page(
sort: str = "score",
sort_dir: str = "desc",
source: str = "",
) -> dict:
) -> DraftsPage:
"""Return a paginated, filtered list of drafts with ratings.
Returns dict with keys: drafts, total, page, per_page, pages.
@@ -262,11 +429,9 @@ def get_drafts_page(
start = (page - 1) * per_page
page_items = filtered[start : start + per_page]
# Pre-compute readiness for page items (lightweight version)
from ietf_analyzer.readiness import compute_readiness
readiness_cache = {}
for draft, rating in page_items:
readiness_cache[draft.name] = compute_readiness(db, draft.name)
# Pre-compute readiness in batch (~6 queries total instead of ~200)
readiness_cache = compute_readiness_batch(db, [d.name for d, _ in page_items])
drafts = []
for draft, rating in page_items:
@@ -350,7 +515,7 @@ def get_draft_detail(db: Database, name: str) -> dict | None:
}
# Readiness score
from ietf_analyzer.readiness import compute_readiness
result["readiness"] = compute_readiness(db, name)
# Annotation
@@ -387,7 +552,7 @@ def get_rating_distributions(db: Database) -> dict:
return dims
def get_timeline_data(db: Database) -> dict:
def get_timeline_data(db: Database) -> TimelineData:
"""Return monthly counts by category for timeline chart."""
pairs = db.drafts_with_ratings(limit=1000)
all_drafts = db.list_drafts(limit=1000, order_by="time ASC")
@@ -482,7 +647,7 @@ def read_generated_draft(filename: str) -> str | None:
return path.read_text(errors="replace")
def get_top_authors(db: Database, limit: int = 30) -> list[dict]:
def get_top_authors(db: Database, limit: int = 30) -> list[AuthorInfo]:
"""Return top authors by draft count."""
rows = db.top_authors(limit=limit)
return [
@@ -561,19 +726,19 @@ def get_coauthor_network(db: Database, min_shared: int = 1) -> dict:
return {"nodes": nodes, "edges": edges}
def get_similarity_graph(db: Database, threshold: float = 0.75) -> dict:
def get_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
"""Return draft similarity network (cached)."""
return _cached(f"similarity_{threshold}", lambda: _compute_similarity_graph(db, threshold))
def _compute_similarity_graph(db: Database, threshold: float = 0.75) -> dict:
def _compute_similarity_graph(db: Database, threshold: float = 0.75) -> SimilarityGraph:
"""Return draft similarity network for force-directed graph.
Returns {nodes: [{name, title, category, score}],
edges: [{source, target, similarity}],
stats: {node_count, edge_count, avg_similarity}}
"""
import numpy as np
embeddings = db.all_embeddings()
if len(embeddings) < 2:
@@ -639,12 +804,12 @@ def get_cross_org_data(db: Database, limit: int = 20) -> list[dict]:
]
def get_author_network_full(db: Database) -> dict:
def get_author_network_full(db: Database) -> AuthorNetwork:
"""Return author network (cached for 5 min)."""
return _cached("author_network", lambda: _compute_author_network_full(db))
def _compute_author_network_full(db: Database) -> dict:
def _compute_author_network_full(db: Database) -> AuthorNetwork:
"""Return enriched co-authorship network with avg scores and cluster info.
Returns {
@@ -704,6 +869,12 @@ def _compute_author_network_full(db: Database) -> dict:
visited: set[str] = set()
clusters = []
# Batch-load all drafts referenced by authors (avoid N+1 in cluster loop)
_all_dn = set()
for _ai in author_info.values():
_all_dn.update(_ai.get("drafts", []))
_all_drafts_map = db.get_drafts_by_names(list(_all_dn))
for node in sorted(node_set):
if node in visited:
continue
@@ -728,7 +899,7 @@ def _compute_author_network_full(db: Database) -> dict:
org_mix[org] += 1
for dn in author_info.get(m, {}).get("drafts", []):
if dn not in cluster_drafts:
d = db.get_draft(dn)
d = _all_drafts_map.get(dn)
cluster_drafts[dn] = d.title[:80] if d else dn
clusters.append({
"id": len(clusters),
@@ -756,9 +927,7 @@ def _compute_idea_clusters(db: Database) -> dict:
a target of ~30 clusters for readable groupings. Enriches each cluster
with WG info and category breakdown.
"""
import json as _json
import numpy as np
from sklearn.preprocessing import normalize as sk_normalize
embeddings = db.all_idea_embeddings()
if not embeddings:
@@ -777,8 +946,8 @@ def _compute_idea_clusters(db: Database) -> dict:
draft_cats: dict[str, list[str]] = {}
for r in rating_rows:
try:
draft_cats[r["draft_name"]] = _json.loads(r["categories"]) if r["categories"] else []
except (_json.JSONDecodeError, TypeError):
draft_cats[r["draft_name"]] = json.loads(r["categories"]) if r["categories"] else []
except (json.JSONDecodeError, TypeError):
draft_cats[r["draft_name"]] = []
# Build matrix from embeddings that have matching ideas
@@ -792,7 +961,6 @@ def _compute_idea_clusters(db: Database) -> dict:
# Ward clustering on normalized vectors — target ~30 clusters scaled by dataset size
n_target = max(10, min(40, len(idea_ids) // 12))
try:
from sklearn.cluster import AgglomerativeClustering
clustering = AgglomerativeClustering(n_clusters=n_target, linkage='ward')
labels = clustering.fit_predict(matrix_norm)
except Exception:
@@ -877,7 +1045,6 @@ def _compute_idea_clusters(db: Database) -> dict:
# t-SNE for scatter
scatter = []
try:
from sklearn.manifold import TSNE
perp = min(30, len(idea_ids) - 1)
tsne = TSNE(n_components=2, perplexity=perp, random_state=42, max_iter=500)
coords = tsne.fit_transform(matrix_norm)
@@ -917,7 +1084,7 @@ def _compute_timeline_animation_data(db: Database) -> dict:
animation frames. Each point carries a ``month`` field (YYYY-MM) so the
front-end can build cumulative animation frames.
"""
import numpy as np
embeddings = db.all_embeddings()
if len(embeddings) < 5:
@@ -935,7 +1102,6 @@ def _compute_timeline_animation_data(db: Database) -> dict:
matrix = np.array([embeddings[n] for n in names])
try:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
random_state=42, max_iter=500)
coords = tsne.fit_transform(matrix)
@@ -975,7 +1141,7 @@ def _compute_timeline_animation_data(db: Database) -> dict:
}
def get_monitor_status(db: Database) -> dict:
def get_monitor_status(db: Database) -> MonitorStatus:
"""Return monitoring status data for dashboard."""
runs = db.get_monitor_runs(limit=20)
last = runs[0] if runs else None
@@ -1014,12 +1180,12 @@ def get_monitor_status(db: Database) -> dict:
}
def get_citation_graph(db: Database, min_refs: int = 2) -> dict:
def get_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
"""Return citation graph (cached for 5 min)."""
return _cached(f"citation_graph_{min_refs}", lambda: _compute_citation_graph(db, min_refs))
def _compute_citation_graph(db: Database, min_refs: int = 2) -> dict:
def _compute_citation_graph(db: Database, min_refs: int = 2) -> CitationGraph:
"""Return citation network data for force-directed graph.
Returns {nodes: [{id, type, title, influence, ...}],
@@ -1131,7 +1297,7 @@ def _compute_citation_graph(db: Database, min_refs: int = 2) -> dict:
}
def global_search(db: Database, query: str) -> dict:
def global_search(db: Database, query: str) -> SearchResults:
"""Search across drafts (FTS5), ideas, authors, and gaps.
Returns {drafts: [...], ideas: [...], authors: [...], gaps: [...]}.
@@ -1144,7 +1310,6 @@ def global_search(db: Database, query: str) -> dict:
# 1. Drafts via FTS5
try:
import re
fts_query = re.sub(r'[^\w\s]', '', q)
fts_query = re.sub(r'\b(NEAR|OR|AND|NOT)\b', '', fts_query, flags=re.IGNORECASE)
fts_query = re.sub(r'\s+', ' ', fts_query).strip()
@@ -1242,7 +1407,7 @@ def get_landscape_tsne(db: Database) -> list[dict]:
def _compute_landscape_tsne(db: Database) -> list[dict]:
"""Compute t-SNE from embeddings, return [{name, title, x, y, category, score}]."""
import numpy as np
embeddings = db.all_embeddings()
if len(embeddings) < 5:
@@ -1260,7 +1425,6 @@ def _compute_landscape_tsne(db: Database) -> list[dict]:
matrix = np.array([embeddings[n] for n in names])
try:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=min(30, len(names) - 1),
random_state=42, max_iter=500)
coords = tsne.fit_transform(matrix)
@@ -1295,7 +1459,7 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
comparison_text: str | None,
}
"""
import numpy as np
drafts_data = []
all_ideas: dict[str, list[dict]] = {}
@@ -1384,9 +1548,6 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
"""Search-only (free) — returns sources + cached answer if available."""
from ietf_analyzer.config import Config
from ietf_analyzer.search import HybridSearch
config = Config.load()
searcher = HybridSearch(config, db)
return searcher.search_only(question, top_k=top_k)
@@ -1394,9 +1555,6 @@ def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
"""Run Claude synthesis (costs tokens, result is cached permanently)."""
from ietf_analyzer.config import Config
from ietf_analyzer.search import HybridSearch
config = Config.load()
searcher = HybridSearch(config, db)
return searcher.ask(question, top_k=top_k, cheap=cheap)