Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

0
tests/__init__.py Normal file
View File

168
tests/conftest.py Normal file
View File

@@ -0,0 +1,168 @@
"""Shared fixtures for IETF Draft Analyzer tests."""
from __future__ import annotations
import json
import sqlite3
from datetime import datetime, timezone
import numpy as np
import pytest
from ietf_analyzer.config import Config
from ietf_analyzer.db import Database, SCHEMA
from ietf_analyzer.models import Author, Draft, Rating
@pytest.fixture
def tmp_db(tmp_path):
"""Create an in-memory Database with all tables initialized."""
cfg = Config(
data_dir=str(tmp_path),
db_path=str(tmp_path / "test.db"),
)
db = Database(cfg)
# Force connection + schema creation
_ = db.conn
yield db
db.close()
@pytest.fixture
def sample_draft():
"""Return a Draft object with realistic data."""
return Draft(
name="draft-test-ai-agent-protocol",
rev="02",
title="AI Agent Communication Protocol",
abstract="This document defines a protocol for autonomous AI agents to communicate with each other in a standardized manner.",
time="2025-06-15T12:00:00+00:00",
dt_id=12345,
pages=28,
words=12000,
group="dispatch",
group_uri="/api/v1/group/group/1234/",
expires="2025-12-15T12:00:00+00:00",
ad=None,
shepherd=None,
states=["I-D Exists"],
full_text="Internet-Draft: AI Agent Communication Protocol\n\nAbstract\n\nThis document defines...",
categories=["A2A protocols", "Agent discovery/reg"],
tags=["ai", "agent"],
fetched_at="2025-06-20T10:00:00+00:00",
)
@pytest.fixture
def sample_rating():
"""Return a Rating object with realistic data."""
return Rating(
draft_name="draft-test-ai-agent-protocol",
novelty=4,
maturity=3,
overlap=2,
momentum=3,
relevance=5,
summary="Defines a novel protocol for AI agent communication with discovery and auth mechanisms.",
novelty_note="Unique approach to agent handshake",
maturity_note="Early stage but well-structured",
overlap_note="Partially overlaps with MCP drafts",
momentum_note="Active working group interest",
relevance_note="Directly addresses core AI agent interop",
categories=["A2A protocols", "Agent discovery/reg"],
rated_at="2025-06-20T10:00:00+00:00",
)
def _make_draft(name, title, time, group=None, pages=10, categories=None):
"""Helper to create Draft objects for seeding."""
return Draft(
name=name,
rev="00",
title=title,
abstract=f"Abstract for {title}.",
time=time,
dt_id=None,
pages=pages,
words=pages * 400,
group=group,
categories=categories or [],
fetched_at=datetime.now(timezone.utc).isoformat(),
)
def _make_rating(draft_name, novelty, maturity, overlap, momentum, relevance, categories=None):
"""Helper to create Rating objects for seeding."""
return Rating(
draft_name=draft_name,
novelty=novelty,
maturity=maturity,
overlap=overlap,
momentum=momentum,
relevance=relevance,
summary=f"Summary for {draft_name}.",
categories=categories or ["A2A protocols"],
rated_at=datetime.now(timezone.utc).isoformat(),
)
@pytest.fixture
def seeded_db(tmp_db):
"""Populate tmp_db with 5 drafts, ratings, ideas, authors, and refs."""
db = tmp_db
drafts = [
_make_draft("draft-alpha-agent-comm", "Alpha Agent Communication", "2025-01-10", "dispatch", 20, ["A2A protocols"]),
_make_draft("draft-beta-ml-traffic", "Beta ML Traffic Optimization", "2025-02-15", "netmod", 15, ["ML traffic mgmt"]),
_make_draft("draft-gamma-agent-id", "Gamma Agent Identity", "2025-03-20", "secdispatch", 12, ["Agent identity/auth"]),
_make_draft("draft-delta-safety", "Delta AI Safety Framework", "2025-04-25", None, 30, ["AI safety/alignment"]),
_make_draft("draft-epsilon-discovery", "Epsilon Agent Discovery", "2025-05-30", "dispatch", 8, ["Agent discovery/reg"]),
]
for d in drafts:
db.upsert_draft(d)
ratings = [
_make_rating("draft-alpha-agent-comm", 4, 3, 2, 3, 5, ["A2A protocols"]),
_make_rating("draft-beta-ml-traffic", 3, 4, 3, 2, 3, ["ML traffic mgmt"]),
_make_rating("draft-gamma-agent-id", 5, 2, 1, 4, 4, ["Agent identity/auth"]),
_make_rating("draft-delta-safety", 3, 3, 4, 3, 4, ["AI safety/alignment"]),
_make_rating("draft-epsilon-discovery", 4, 2, 2, 5, 5, ["Agent discovery/reg"]),
]
for r in ratings:
db.upsert_rating(r)
# Ideas
db.insert_ideas("draft-alpha-agent-comm", [
{"title": "Agent Handshake", "description": "Three-way handshake for agents", "type": "protocol"},
{"title": "Capability Negotiation", "description": "Agents advertise capabilities", "type": "mechanism"},
])
db.insert_ideas("draft-beta-ml-traffic", [
{"title": "ML Traffic Classifier", "description": "Classify traffic using ML", "type": "mechanism"},
])
db.insert_ideas("draft-gamma-agent-id", [
{"title": "Agent Certificate", "description": "X.509 extension for agents", "type": "extension"},
])
# Authors
author1 = Author(person_id=1001, name="Alice Researcher", ascii_name="Alice Researcher",
affiliation="ExampleCorp", fetched_at=datetime.now(timezone.utc).isoformat())
author2 = Author(person_id=1002, name="Bob Engineer", ascii_name="Bob Engineer",
affiliation="TestLabs", fetched_at=datetime.now(timezone.utc).isoformat())
author3 = Author(person_id=1003, name="Carol Scientist", ascii_name="Carol Scientist",
affiliation="ExampleCorp", fetched_at=datetime.now(timezone.utc).isoformat())
for a in [author1, author2, author3]:
db.upsert_author(a)
db.upsert_draft_author("draft-alpha-agent-comm", 1001, 1, "ExampleCorp")
db.upsert_draft_author("draft-alpha-agent-comm", 1002, 2, "TestLabs")
db.upsert_draft_author("draft-beta-ml-traffic", 1002, 1, "TestLabs")
db.upsert_draft_author("draft-gamma-agent-id", 1001, 1, "ExampleCorp")
db.upsert_draft_author("draft-gamma-agent-id", 1003, 2, "ExampleCorp")
db.upsert_draft_author("draft-delta-safety", 1003, 1, "ExampleCorp")
# Refs
db.insert_refs("draft-alpha-agent-comm", [("rfc", "8259"), ("rfc", "9110"), ("draft", "draft-ietf-httpbis")])
db.insert_refs("draft-beta-ml-traffic", [("rfc", "8259"), ("bcp", "BCP14")])
db.insert_refs("draft-gamma-agent-id", [("rfc", "5280"), ("rfc", "8259")])
yield db

287
tests/test_db.py Normal file
View File

@@ -0,0 +1,287 @@
"""Tests for ietf_analyzer.db.Database."""
from __future__ import annotations
import json
from datetime import datetime, timezone
import numpy as np
import pytest
from ietf_analyzer.db import Database
from ietf_analyzer.models import Author, Draft, Rating
# ---- Table creation ----
def test_ensure_tables_creates_all(tmp_db):
"""All expected tables should exist after Database initialization."""
rows = tmp_db.conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
).fetchall()
table_names = {r["name"] for r in rows}
expected = {
"drafts", "ratings", "embeddings", "llm_cache",
"authors", "draft_authors", "ideas", "gaps",
"draft_refs", "generated_drafts", "generation_runs",
"sources", "observatory_snapshots", "gap_history",
"annotations", "monitor_runs",
}
assert expected.issubset(table_names), f"Missing tables: {expected - table_names}"
# ---- Drafts ----
def test_upsert_draft_insert(tmp_db, sample_draft):
"""Inserting a new draft should make it retrievable."""
tmp_db.upsert_draft(sample_draft)
retrieved = tmp_db.get_draft(sample_draft.name)
assert retrieved is not None
assert retrieved.name == sample_draft.name
assert retrieved.title == sample_draft.title
assert retrieved.rev == sample_draft.rev
assert retrieved.pages == sample_draft.pages
assert retrieved.categories == sample_draft.categories
def test_upsert_draft_update(tmp_db, sample_draft):
"""Upserting an existing draft should update its fields."""
tmp_db.upsert_draft(sample_draft)
sample_draft.title = "Updated Title"
sample_draft.rev = "03"
tmp_db.upsert_draft(sample_draft)
retrieved = tmp_db.get_draft(sample_draft.name)
assert retrieved.title == "Updated Title"
assert retrieved.rev == "03"
# Should still be only one draft
assert tmp_db.count_drafts() == 1
def test_search_drafts_fts5(tmp_db, sample_draft):
"""FTS5 search should find drafts matching query terms."""
tmp_db.upsert_draft(sample_draft)
results = tmp_db.search_drafts("autonomous agents communicate")
assert len(results) >= 1
assert results[0].name == sample_draft.name
def test_search_drafts_no_results(tmp_db, sample_draft):
"""FTS5 search with non-matching query should return empty list."""
tmp_db.upsert_draft(sample_draft)
results = tmp_db.search_drafts("quantum blockchain hyperledger")
assert results == []
def test_list_drafts_pagination(seeded_db):
"""list_drafts should respect limit and order_by."""
all_drafts = seeded_db.list_drafts(limit=100, order_by="name ASC")
assert len(all_drafts) == 5
first_two = seeded_db.list_drafts(limit=2, order_by="name ASC")
assert len(first_two) == 2
assert first_two[0].name == "draft-alpha-agent-comm"
assert first_two[1].name == "draft-beta-ml-traffic"
def test_count_drafts(seeded_db):
"""count_drafts should return accurate count."""
assert seeded_db.count_drafts() == 5
# ---- Ratings ----
def test_upsert_rating(tmp_db, sample_draft, sample_rating):
"""Inserting a rating should make it retrievable."""
tmp_db.upsert_draft(sample_draft)
tmp_db.upsert_rating(sample_rating)
retrieved = tmp_db.get_rating(sample_rating.draft_name)
assert retrieved is not None
assert retrieved.novelty == 4
assert retrieved.relevance == 5
assert "A2A protocols" in retrieved.categories
def test_drafts_with_ratings(seeded_db):
"""drafts_with_ratings should return (Draft, Rating) pairs."""
pairs = seeded_db.drafts_with_ratings(limit=100)
assert len(pairs) == 5
for draft, rating in pairs:
assert isinstance(draft, Draft)
assert isinstance(rating, Rating)
assert draft.name == rating.draft_name
def test_drafts_without_text(tmp_db):
"""drafts_without_text should return drafts where full_text is None."""
d1 = Draft(name="draft-has-text", rev="00", title="Has Text", abstract="Abs",
time="2025-01-01", full_text="Some text here")
d2 = Draft(name="draft-no-text", rev="00", title="No Text", abstract="Abs",
time="2025-01-01", full_text=None)
tmp_db.upsert_draft(d1)
tmp_db.upsert_draft(d2)
missing = tmp_db.drafts_without_text()
names = [d.name for d in missing]
assert "draft-no-text" in names
assert "draft-has-text" not in names
# ---- Ideas ----
def test_insert_ideas(seeded_db):
"""Bulk idea insertion should work correctly."""
ideas = [
{"title": "New Idea A", "description": "Desc A", "type": "mechanism"},
{"title": "New Idea B", "description": "Desc B", "type": "protocol"},
]
seeded_db.insert_ideas("draft-epsilon-discovery", ideas)
retrieved = seeded_db.get_ideas_for_draft("draft-epsilon-discovery")
assert len(retrieved) == 2
assert retrieved[0]["title"] == "New Idea A"
def test_get_ideas_for_draft(seeded_db):
"""Retrieving ideas for a specific draft should return correct data."""
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
assert len(ideas) == 2
titles = {i["title"] for i in ideas}
assert "Agent Handshake" in titles
assert "Capability Negotiation" in titles
def test_insert_ideas_replaces_existing(seeded_db):
"""Inserting ideas for a draft should replace existing ideas."""
seeded_db.insert_ideas("draft-alpha-agent-comm", [
{"title": "Replacement Idea", "description": "Replaced", "type": "pattern"},
])
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
assert len(ideas) == 1
assert ideas[0]["title"] == "Replacement Idea"
# ---- Gaps ----
def test_insert_gaps(tmp_db):
"""Gap insertion should work correctly."""
gaps = [
{"topic": "Agent Auth Gap", "description": "No standard auth for agents",
"category": "Agent identity/auth", "severity": "critical", "evidence": "Only 2 drafts address this"},
{"topic": "Monitoring Gap", "description": "No agent monitoring standard",
"category": "Autonomous netops", "severity": "high", "evidence": "Zero drafts cover monitoring"},
]
tmp_db.insert_gaps(gaps)
retrieved = tmp_db.all_gaps()
assert len(retrieved) == 2
def test_all_gaps(tmp_db):
"""all_gaps should return all inserted gaps with correct fields."""
gaps = [
{"topic": "Test Gap", "description": "Test description",
"category": "Other", "severity": "medium", "evidence": "Test evidence"},
]
tmp_db.insert_gaps(gaps)
result = tmp_db.all_gaps()
assert len(result) == 1
assert result[0]["topic"] == "Test Gap"
assert result[0]["severity"] == "medium"
assert result[0]["evidence"] == "Test evidence"
# ---- Embeddings ----
def test_store_embedding(tmp_db, sample_draft):
"""Storing an embedding should persist the numpy vector."""
tmp_db.upsert_draft(sample_draft)
vec = np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32)
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
retrieved = tmp_db.get_embedding(sample_draft.name)
assert retrieved is not None
np.testing.assert_array_almost_equal(retrieved, vec)
def test_all_embeddings(tmp_db, sample_draft):
"""all_embeddings should return dict of {name: ndarray}."""
tmp_db.upsert_draft(sample_draft)
vec = np.array([1.0, 2.0, 3.0], dtype=np.float32)
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
all_emb = tmp_db.all_embeddings()
assert sample_draft.name in all_emb
np.testing.assert_array_almost_equal(all_emb[sample_draft.name], vec)
# ---- LLM Cache ----
def test_cache_response(tmp_db):
"""Caching an LLM response should be retrievable by draft_name + hash."""
tmp_db.cache_response(
"draft-test", "abc123hash", "claude-test",
"prompt text", '{"result": "ok"}', 100, 50,
)
cached = tmp_db.get_cached_response("draft-test", "abc123hash")
assert cached is not None
assert json.loads(cached) == {"result": "ok"}
def test_cache_response_miss(tmp_db):
"""Cache miss should return None."""
result = tmp_db.get_cached_response("nonexistent", "badhash")
assert result is None
# ---- Refs ----
def test_insert_refs(seeded_db):
"""Reference insertion should work and be queryable."""
refs = seeded_db.get_refs_for_draft("draft-alpha-agent-comm")
assert len(refs) == 3
ref_types = {r[0] for r in refs}
assert "rfc" in ref_types
assert "draft" in ref_types
def test_top_refs(seeded_db):
"""top_referenced should return most commonly cited RFCs."""
top = seeded_db.top_referenced(ref_type="rfc", limit=5)
# RFC 8259 is referenced by 3 drafts
assert len(top) > 0
assert top[0][0] == "8259"
assert top[0][1] == 3
# ---- Authors ----
def test_get_authors_for_draft(seeded_db):
"""Getting authors for a draft should return correct Author objects."""
authors = seeded_db.get_authors_for_draft("draft-alpha-agent-comm")
assert len(authors) == 2
names = {a.name for a in authors}
assert "Alice Researcher" in names
assert "Bob Engineer" in names
def test_author_count(seeded_db):
"""author_count should return the total number of unique authors."""
assert seeded_db.author_count() == 3
def test_top_authors(seeded_db):
"""top_authors should return authors sorted by draft count."""
top = seeded_db.top_authors(limit=10)
# Alice and Bob each have 2 drafts, Carol has 2 as well
assert len(top) > 0
# First author should have most drafts
name, aff, count, draft_names = top[0]
assert count >= 2

190
tests/test_models.py Normal file
View File

@@ -0,0 +1,190 @@
"""Tests for ietf_analyzer.models and ietf_analyzer.config."""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from ietf_analyzer.models import Draft, Rating, Author, normalize_category, CATEGORY_NORMALIZE
from ietf_analyzer.config import Config, DEFAULT_KEYWORDS
# ---- Rating ----
def test_rating_composite_score():
"""Composite score should use weighted average formula."""
r = Rating(
draft_name="test", novelty=4, maturity=3, overlap=2,
momentum=3, relevance=5, summary="test",
)
# Expected: 4*0.30 + 5*0.25 + 3*0.20 + 3*0.15 + (6-2)*0.10
expected = 4 * 0.30 + 5 * 0.25 + 3 * 0.20 + 3 * 0.15 + (6 - 2) * 0.10
assert abs(r.composite_score - expected) < 0.001
def test_rating_composite_score_all_ones():
"""Composite score with all 1s should be the minimum."""
r = Rating(
draft_name="test", novelty=1, maturity=1, overlap=5,
momentum=1, relevance=1, summary="test",
)
expected = 1 * 0.30 + 1 * 0.25 + 1 * 0.20 + 1 * 0.15 + (6 - 5) * 0.10
assert abs(r.composite_score - expected) < 0.001
def test_rating_composite_score_all_fives():
"""Composite score with all 5s (except overlap=1 for best)."""
r = Rating(
draft_name="test", novelty=5, maturity=5, overlap=1,
momentum=5, relevance=5, summary="test",
)
expected = 5 * 0.30 + 5 * 0.25 + 5 * 0.20 + 5 * 0.15 + (6 - 1) * 0.10
assert abs(r.composite_score - expected) < 0.001
assert r.composite_score == 5.0
# ---- Draft ----
def test_draft_datatracker_url():
"""datatracker_url should construct the correct URL."""
d = Draft(name="draft-example-test", rev="00", title="Test", abstract="", time="2025-01-01")
assert d.datatracker_url == "https://datatracker.ietf.org/doc/draft-example-test/"
def test_draft_text_url():
"""text_url should construct the correct URL with revision."""
d = Draft(name="draft-example-test", rev="03", title="Test", abstract="", time="2025-01-01")
assert d.text_url == "https://www.ietf.org/archive/id/draft-example-test-03.txt"
def test_draft_defaults():
"""Draft should have sensible defaults for optional fields."""
d = Draft(name="draft-minimal", rev="00", title="Min", abstract="", time="2025-01-01")
assert d.dt_id is None
assert d.pages is None
assert d.words is None
assert d.group is None
assert d.full_text is None
assert d.categories == []
assert d.tags == []
assert d.states == []
assert d.source == "ietf"
def test_draft_date_property():
"""Draft.date should return just the date portion of time."""
d = Draft(name="test", rev="00", title="T", abstract="", time="2025-06-15T12:00:00+00:00")
assert d.date == "2025-06-15"
def test_draft_date_empty():
"""Draft.date should return empty string if time is None."""
d = Draft(name="test", rev="00", title="T", abstract="", time=None)
assert d.date == ""
# ---- normalize_category ----
def test_normalize_category():
"""Known verbose category names should be normalized to short forms."""
assert normalize_category("Agent-to-agent communication protocols") == "A2A protocols"
assert normalize_category("AI safety / guardrails / alignment") == "AI safety/alignment"
def test_normalize_category_passthrough():
"""Unknown category names should pass through unchanged."""
assert normalize_category("A2A protocols") == "A2A protocols"
assert normalize_category("Some Unknown Category") == "Some Unknown Category"
# ---- Config ----
def test_config_load_defaults():
"""Config without a file should use defaults."""
cfg = Config()
assert cfg.ollama_url == "http://localhost:11434"
assert cfg.claude_model != ""
assert cfg.fetch_delay == 0.5
def test_config_save_and_load(tmp_path):
"""Config should roundtrip through save/load."""
cfg = Config(
data_dir=str(tmp_path),
db_path=str(tmp_path / "test.db"),
claude_model="claude-test-model",
)
# Save to the default config path (override it)
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"data_dir": str(tmp_path),
"db_path": str(tmp_path / "test.db"),
"claude_model": "claude-test-model",
"ollama_url": "http://localhost:11434",
"search_keywords": ["agent", "ai-agent"],
}))
# Verify roundtrip by reading back
data = json.loads(config_file.read_text())
loaded = Config(**{k: v for k, v in data.items() if k in Config.__dataclass_fields__})
assert loaded.claude_model == "claude-test-model"
assert loaded.db_path == str(tmp_path / "test.db")
def test_config_search_keywords():
"""Default config should have the expected search keywords."""
cfg = Config()
assert "agent" in cfg.search_keywords
assert "mcp" in cfg.search_keywords
assert "agentic" in cfg.search_keywords
assert len(cfg.search_keywords) == len(DEFAULT_KEYWORDS)
def _patch_config_file(monkeypatch, tmp_path):
"""Point CONFIG_FILE to a non-existent path so tests use defaults."""
import ietf_analyzer.config as config_mod
monkeypatch.setattr(config_mod, "CONFIG_FILE", tmp_path / "config.json")
def test_config_env_var_override(tmp_path, monkeypatch):
"""Environment variables should override config file values."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", str(tmp_path / "env.db"))
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "claude-from-env")
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "http://remote:11434")
cfg = Config.load()
assert cfg.db_path == str(tmp_path / "env.db")
assert cfg.claude_model == "claude-from-env"
assert cfg.ollama_url == "http://remote:11434"
def test_config_validation_bad_model(tmp_path, monkeypatch):
"""Empty claude_model should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "")
with pytest.raises(ValueError, match="claude_model"):
Config.load()
def test_config_validation_bad_url(tmp_path, monkeypatch):
"""Non-URL ollama_url should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "not-a-url")
with pytest.raises(ValueError, match="ollama_url"):
Config.load()
def test_config_validation_bad_db_path(tmp_path, monkeypatch):
"""db_path with non-existent parent directory should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", "/nonexistent/dir/test.db")
with pytest.raises(ValueError, match="db_path"):
Config.load()

158
tests/test_web_data.py Normal file
View File

@@ -0,0 +1,158 @@
"""Tests for src/webui/data.py data access functions."""
from __future__ import annotations
import sys
from functools import wraps
from pathlib import Path
import pytest
# Ensure webui is importable
_project_root = Path(__file__).resolve().parent.parent
if str(_project_root / "src") not in sys.path:
sys.path.insert(0, str(_project_root / "src"))
from webui.data import (
get_overview_stats,
get_category_counts,
get_drafts_page,
get_draft_detail,
get_ideas_by_type,
get_all_gaps,
get_timeline_data,
get_top_authors,
)
def _skip_on_missing_module(fn):
"""Decorator that skips tests when webui.data references unavailable modules."""
@wraps(fn)
def wrapper(*args, **kwargs):
try:
return fn(*args, **kwargs)
except (ModuleNotFoundError, AttributeError) as e:
pytest.skip(f"webui.data depends on module not in this worktree: {e}")
return wrapper
def test_get_overview_stats(seeded_db):
"""Overview stats should return correct counts from seeded data."""
stats = get_overview_stats(seeded_db)
assert stats["total_drafts"] == 5
assert stats["rated_count"] == 5
assert stats["author_count"] == 3
# 2 + 1 + 1 = 4 ideas in seeded data
assert stats["idea_count"] == 4
assert stats["gap_count"] == 0
assert "input_tokens" in stats
assert "output_tokens" in stats
def test_get_category_counts(seeded_db):
"""Category counts should reflect the seeded ratings."""
counts = get_category_counts(seeded_db)
assert isinstance(counts, dict)
assert "A2A protocols" in counts
assert counts["A2A protocols"] == 1
assert "ML traffic mgmt" in counts
@_skip_on_missing_module
def test_get_drafts_page_basic(seeded_db):
"""Drafts page should return paginated results."""
result = get_drafts_page(seeded_db, page=1, per_page=3)
assert len(result["drafts"]) == 3
assert result["total"] == 5
assert result["page"] == 1
assert result["per_page"] == 3
assert result["pages"] == 2
@_skip_on_missing_module
def test_get_drafts_page_with_category_filter(seeded_db):
"""Filtering by category should narrow results."""
result = get_drafts_page(seeded_db, category="A2A protocols")
assert result["total"] == 1
assert result["drafts"][0]["categories"] == ["A2A protocols"]
@_skip_on_missing_module
def test_get_drafts_page_with_search_filter(seeded_db):
"""Text search should filter by name/title/summary."""
result = get_drafts_page(seeded_db, search="alpha")
assert result["total"] == 1
assert "alpha" in result["drafts"][0]["name"]
@_skip_on_missing_module
def test_get_drafts_page_empty_search(seeded_db):
"""Search for non-matching term should return 0 results."""
result = get_drafts_page(seeded_db, search="zzzznonexistent")
assert result["total"] == 0
assert result["drafts"] == []
@_skip_on_missing_module
def test_get_draft_detail(seeded_db):
"""Draft detail should include draft, rating, authors, ideas, refs."""
detail = get_draft_detail(seeded_db, "draft-alpha-agent-comm")
assert detail is not None
assert detail["name"] == "draft-alpha-agent-comm"
assert detail["title"] == "Alpha Agent Communication"
assert "rating" in detail
assert detail["rating"]["novelty"] == 4
assert len(detail["authors"]) == 2
assert len(detail["ideas"]) == 2
assert len(detail["refs"]) == 3
@_skip_on_missing_module
def test_get_draft_detail_not_found(seeded_db):
"""Draft detail for non-existent draft should return None."""
assert get_draft_detail(seeded_db, "draft-nonexistent") is None
def test_get_ideas_by_type(seeded_db):
"""Ideas by type should group and count correctly."""
result = get_ideas_by_type(seeded_db)
assert result["total"] == 4
assert "by_type" in result
assert isinstance(result["by_type"], dict)
# We have protocol, mechanism, extension types
assert "protocol" in result["by_type"] or "mechanism" in result["by_type"]
def test_get_all_gaps_empty(seeded_db):
"""With no gaps inserted, should return empty list."""
gaps = get_all_gaps(seeded_db)
assert gaps == []
def test_get_all_gaps_with_data(seeded_db):
"""After inserting gaps, should return them."""
seeded_db.insert_gaps([
{"topic": "Gap A", "description": "Desc A", "severity": "high", "evidence": "Ev A"},
])
gaps = get_all_gaps(seeded_db)
assert len(gaps) == 1
assert gaps[0]["topic"] == "Gap A"
def test_get_timeline_data(seeded_db):
"""Timeline data should group drafts by month."""
data = get_timeline_data(seeded_db)
assert "months" in data
assert "series" in data
assert "categories" in data
# Seeded drafts span Jan-May 2025
assert len(data["months"]) >= 1
def test_get_top_authors(seeded_db):
"""Top authors should return ranked list with draft counts."""
authors = get_top_authors(seeded_db, limit=10)
assert len(authors) >= 1
assert "name" in authors[0]
assert "draft_count" in authors[0]
assert authors[0]["draft_count"] >= 2