Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

190
tests/test_models.py Normal file
View File

@@ -0,0 +1,190 @@
"""Tests for ietf_analyzer.models and ietf_analyzer.config."""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
from ietf_analyzer.models import Draft, Rating, Author, normalize_category, CATEGORY_NORMALIZE
from ietf_analyzer.config import Config, DEFAULT_KEYWORDS
# ---- Rating ----
def test_rating_composite_score():
"""Composite score should use weighted average formula."""
r = Rating(
draft_name="test", novelty=4, maturity=3, overlap=2,
momentum=3, relevance=5, summary="test",
)
# Expected: 4*0.30 + 5*0.25 + 3*0.20 + 3*0.15 + (6-2)*0.10
expected = 4 * 0.30 + 5 * 0.25 + 3 * 0.20 + 3 * 0.15 + (6 - 2) * 0.10
assert abs(r.composite_score - expected) < 0.001
def test_rating_composite_score_all_ones():
"""Composite score with all 1s should be the minimum."""
r = Rating(
draft_name="test", novelty=1, maturity=1, overlap=5,
momentum=1, relevance=1, summary="test",
)
expected = 1 * 0.30 + 1 * 0.25 + 1 * 0.20 + 1 * 0.15 + (6 - 5) * 0.10
assert abs(r.composite_score - expected) < 0.001
def test_rating_composite_score_all_fives():
"""Composite score with all 5s (except overlap=1 for best)."""
r = Rating(
draft_name="test", novelty=5, maturity=5, overlap=1,
momentum=5, relevance=5, summary="test",
)
expected = 5 * 0.30 + 5 * 0.25 + 5 * 0.20 + 5 * 0.15 + (6 - 1) * 0.10
assert abs(r.composite_score - expected) < 0.001
assert r.composite_score == 5.0
# ---- Draft ----
def test_draft_datatracker_url():
"""datatracker_url should construct the correct URL."""
d = Draft(name="draft-example-test", rev="00", title="Test", abstract="", time="2025-01-01")
assert d.datatracker_url == "https://datatracker.ietf.org/doc/draft-example-test/"
def test_draft_text_url():
"""text_url should construct the correct URL with revision."""
d = Draft(name="draft-example-test", rev="03", title="Test", abstract="", time="2025-01-01")
assert d.text_url == "https://www.ietf.org/archive/id/draft-example-test-03.txt"
def test_draft_defaults():
"""Draft should have sensible defaults for optional fields."""
d = Draft(name="draft-minimal", rev="00", title="Min", abstract="", time="2025-01-01")
assert d.dt_id is None
assert d.pages is None
assert d.words is None
assert d.group is None
assert d.full_text is None
assert d.categories == []
assert d.tags == []
assert d.states == []
assert d.source == "ietf"
def test_draft_date_property():
"""Draft.date should return just the date portion of time."""
d = Draft(name="test", rev="00", title="T", abstract="", time="2025-06-15T12:00:00+00:00")
assert d.date == "2025-06-15"
def test_draft_date_empty():
"""Draft.date should return empty string if time is None."""
d = Draft(name="test", rev="00", title="T", abstract="", time=None)
assert d.date == ""
# ---- normalize_category ----
def test_normalize_category():
"""Known verbose category names should be normalized to short forms."""
assert normalize_category("Agent-to-agent communication protocols") == "A2A protocols"
assert normalize_category("AI safety / guardrails / alignment") == "AI safety/alignment"
def test_normalize_category_passthrough():
"""Unknown category names should pass through unchanged."""
assert normalize_category("A2A protocols") == "A2A protocols"
assert normalize_category("Some Unknown Category") == "Some Unknown Category"
# ---- Config ----
def test_config_load_defaults():
"""Config without a file should use defaults."""
cfg = Config()
assert cfg.ollama_url == "http://localhost:11434"
assert cfg.claude_model != ""
assert cfg.fetch_delay == 0.5
def test_config_save_and_load(tmp_path):
"""Config should roundtrip through save/load."""
cfg = Config(
data_dir=str(tmp_path),
db_path=str(tmp_path / "test.db"),
claude_model="claude-test-model",
)
# Save to the default config path (override it)
config_file = tmp_path / "config.json"
config_file.write_text(json.dumps({
"data_dir": str(tmp_path),
"db_path": str(tmp_path / "test.db"),
"claude_model": "claude-test-model",
"ollama_url": "http://localhost:11434",
"search_keywords": ["agent", "ai-agent"],
}))
# Verify roundtrip by reading back
data = json.loads(config_file.read_text())
loaded = Config(**{k: v for k, v in data.items() if k in Config.__dataclass_fields__})
assert loaded.claude_model == "claude-test-model"
assert loaded.db_path == str(tmp_path / "test.db")
def test_config_search_keywords():
"""Default config should have the expected search keywords."""
cfg = Config()
assert "agent" in cfg.search_keywords
assert "mcp" in cfg.search_keywords
assert "agentic" in cfg.search_keywords
assert len(cfg.search_keywords) == len(DEFAULT_KEYWORDS)
def _patch_config_file(monkeypatch, tmp_path):
"""Point CONFIG_FILE to a non-existent path so tests use defaults."""
import ietf_analyzer.config as config_mod
monkeypatch.setattr(config_mod, "CONFIG_FILE", tmp_path / "config.json")
def test_config_env_var_override(tmp_path, monkeypatch):
"""Environment variables should override config file values."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", str(tmp_path / "env.db"))
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "claude-from-env")
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "http://remote:11434")
cfg = Config.load()
assert cfg.db_path == str(tmp_path / "env.db")
assert cfg.claude_model == "claude-from-env"
assert cfg.ollama_url == "http://remote:11434"
def test_config_validation_bad_model(tmp_path, monkeypatch):
"""Empty claude_model should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "")
with pytest.raises(ValueError, match="claude_model"):
Config.load()
def test_config_validation_bad_url(tmp_path, monkeypatch):
"""Non-URL ollama_url should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "not-a-url")
with pytest.raises(ValueError, match="ollama_url"):
Config.load()
def test_config_validation_bad_db_path(tmp_path, monkeypatch):
"""db_path with non-existent parent directory should raise ValueError."""
_patch_config_file(monkeypatch, tmp_path)
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", "/nonexistent/dir/test.db")
with pytest.raises(ValueError, match="db_path"):
Config.load()