Platform upgrade: semantic search, citations, readiness, tests, Docker
Major features added by 5 parallel agent teams: - Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis) - Global search across drafts, ideas, authors, gaps - REST API expansion (14 endpoints, up from 3) with CSV/JSON export - Citation graph visualization (D3.js, 440 nodes, 2422 edges) - Standards readiness scoring (0-100 composite from 6 factors) - Side-by-side draft comparison view with shared/unique analysis - Annotation system (notes + tags per draft, DB-persisted) - Docker deployment (Dockerfile + docker-compose with Ollama) - Scheduled updates (cron script with log rotation) - Pipeline health dashboard (stage progress bars, cost tracking) - Test suite foundation (54 pytest tests covering DB, models, web data) Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug, source-aware analysis prompts, config env var overrides + validation, resilient batch error handling with --retry-failed, observatory --dry-run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
168
tests/conftest.py
Normal file
168
tests/conftest.py
Normal file
@@ -0,0 +1,168 @@
|
||||
"""Shared fixtures for IETF Draft Analyzer tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from ietf_analyzer.config import Config
|
||||
from ietf_analyzer.db import Database, SCHEMA
|
||||
from ietf_analyzer.models import Author, Draft, Rating
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_db(tmp_path):
|
||||
"""Create an in-memory Database with all tables initialized."""
|
||||
cfg = Config(
|
||||
data_dir=str(tmp_path),
|
||||
db_path=str(tmp_path / "test.db"),
|
||||
)
|
||||
db = Database(cfg)
|
||||
# Force connection + schema creation
|
||||
_ = db.conn
|
||||
yield db
|
||||
db.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_draft():
|
||||
"""Return a Draft object with realistic data."""
|
||||
return Draft(
|
||||
name="draft-test-ai-agent-protocol",
|
||||
rev="02",
|
||||
title="AI Agent Communication Protocol",
|
||||
abstract="This document defines a protocol for autonomous AI agents to communicate with each other in a standardized manner.",
|
||||
time="2025-06-15T12:00:00+00:00",
|
||||
dt_id=12345,
|
||||
pages=28,
|
||||
words=12000,
|
||||
group="dispatch",
|
||||
group_uri="/api/v1/group/group/1234/",
|
||||
expires="2025-12-15T12:00:00+00:00",
|
||||
ad=None,
|
||||
shepherd=None,
|
||||
states=["I-D Exists"],
|
||||
full_text="Internet-Draft: AI Agent Communication Protocol\n\nAbstract\n\nThis document defines...",
|
||||
categories=["A2A protocols", "Agent discovery/reg"],
|
||||
tags=["ai", "agent"],
|
||||
fetched_at="2025-06-20T10:00:00+00:00",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_rating():
|
||||
"""Return a Rating object with realistic data."""
|
||||
return Rating(
|
||||
draft_name="draft-test-ai-agent-protocol",
|
||||
novelty=4,
|
||||
maturity=3,
|
||||
overlap=2,
|
||||
momentum=3,
|
||||
relevance=5,
|
||||
summary="Defines a novel protocol for AI agent communication with discovery and auth mechanisms.",
|
||||
novelty_note="Unique approach to agent handshake",
|
||||
maturity_note="Early stage but well-structured",
|
||||
overlap_note="Partially overlaps with MCP drafts",
|
||||
momentum_note="Active working group interest",
|
||||
relevance_note="Directly addresses core AI agent interop",
|
||||
categories=["A2A protocols", "Agent discovery/reg"],
|
||||
rated_at="2025-06-20T10:00:00+00:00",
|
||||
)
|
||||
|
||||
|
||||
def _make_draft(name, title, time, group=None, pages=10, categories=None):
|
||||
"""Helper to create Draft objects for seeding."""
|
||||
return Draft(
|
||||
name=name,
|
||||
rev="00",
|
||||
title=title,
|
||||
abstract=f"Abstract for {title}.",
|
||||
time=time,
|
||||
dt_id=None,
|
||||
pages=pages,
|
||||
words=pages * 400,
|
||||
group=group,
|
||||
categories=categories or [],
|
||||
fetched_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
|
||||
def _make_rating(draft_name, novelty, maturity, overlap, momentum, relevance, categories=None):
|
||||
"""Helper to create Rating objects for seeding."""
|
||||
return Rating(
|
||||
draft_name=draft_name,
|
||||
novelty=novelty,
|
||||
maturity=maturity,
|
||||
overlap=overlap,
|
||||
momentum=momentum,
|
||||
relevance=relevance,
|
||||
summary=f"Summary for {draft_name}.",
|
||||
categories=categories or ["A2A protocols"],
|
||||
rated_at=datetime.now(timezone.utc).isoformat(),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_db(tmp_db):
|
||||
"""Populate tmp_db with 5 drafts, ratings, ideas, authors, and refs."""
|
||||
db = tmp_db
|
||||
|
||||
drafts = [
|
||||
_make_draft("draft-alpha-agent-comm", "Alpha Agent Communication", "2025-01-10", "dispatch", 20, ["A2A protocols"]),
|
||||
_make_draft("draft-beta-ml-traffic", "Beta ML Traffic Optimization", "2025-02-15", "netmod", 15, ["ML traffic mgmt"]),
|
||||
_make_draft("draft-gamma-agent-id", "Gamma Agent Identity", "2025-03-20", "secdispatch", 12, ["Agent identity/auth"]),
|
||||
_make_draft("draft-delta-safety", "Delta AI Safety Framework", "2025-04-25", None, 30, ["AI safety/alignment"]),
|
||||
_make_draft("draft-epsilon-discovery", "Epsilon Agent Discovery", "2025-05-30", "dispatch", 8, ["Agent discovery/reg"]),
|
||||
]
|
||||
for d in drafts:
|
||||
db.upsert_draft(d)
|
||||
|
||||
ratings = [
|
||||
_make_rating("draft-alpha-agent-comm", 4, 3, 2, 3, 5, ["A2A protocols"]),
|
||||
_make_rating("draft-beta-ml-traffic", 3, 4, 3, 2, 3, ["ML traffic mgmt"]),
|
||||
_make_rating("draft-gamma-agent-id", 5, 2, 1, 4, 4, ["Agent identity/auth"]),
|
||||
_make_rating("draft-delta-safety", 3, 3, 4, 3, 4, ["AI safety/alignment"]),
|
||||
_make_rating("draft-epsilon-discovery", 4, 2, 2, 5, 5, ["Agent discovery/reg"]),
|
||||
]
|
||||
for r in ratings:
|
||||
db.upsert_rating(r)
|
||||
|
||||
# Ideas
|
||||
db.insert_ideas("draft-alpha-agent-comm", [
|
||||
{"title": "Agent Handshake", "description": "Three-way handshake for agents", "type": "protocol"},
|
||||
{"title": "Capability Negotiation", "description": "Agents advertise capabilities", "type": "mechanism"},
|
||||
])
|
||||
db.insert_ideas("draft-beta-ml-traffic", [
|
||||
{"title": "ML Traffic Classifier", "description": "Classify traffic using ML", "type": "mechanism"},
|
||||
])
|
||||
db.insert_ideas("draft-gamma-agent-id", [
|
||||
{"title": "Agent Certificate", "description": "X.509 extension for agents", "type": "extension"},
|
||||
])
|
||||
|
||||
# Authors
|
||||
author1 = Author(person_id=1001, name="Alice Researcher", ascii_name="Alice Researcher",
|
||||
affiliation="ExampleCorp", fetched_at=datetime.now(timezone.utc).isoformat())
|
||||
author2 = Author(person_id=1002, name="Bob Engineer", ascii_name="Bob Engineer",
|
||||
affiliation="TestLabs", fetched_at=datetime.now(timezone.utc).isoformat())
|
||||
author3 = Author(person_id=1003, name="Carol Scientist", ascii_name="Carol Scientist",
|
||||
affiliation="ExampleCorp", fetched_at=datetime.now(timezone.utc).isoformat())
|
||||
for a in [author1, author2, author3]:
|
||||
db.upsert_author(a)
|
||||
|
||||
db.upsert_draft_author("draft-alpha-agent-comm", 1001, 1, "ExampleCorp")
|
||||
db.upsert_draft_author("draft-alpha-agent-comm", 1002, 2, "TestLabs")
|
||||
db.upsert_draft_author("draft-beta-ml-traffic", 1002, 1, "TestLabs")
|
||||
db.upsert_draft_author("draft-gamma-agent-id", 1001, 1, "ExampleCorp")
|
||||
db.upsert_draft_author("draft-gamma-agent-id", 1003, 2, "ExampleCorp")
|
||||
db.upsert_draft_author("draft-delta-safety", 1003, 1, "ExampleCorp")
|
||||
|
||||
# Refs
|
||||
db.insert_refs("draft-alpha-agent-comm", [("rfc", "8259"), ("rfc", "9110"), ("draft", "draft-ietf-httpbis")])
|
||||
db.insert_refs("draft-beta-ml-traffic", [("rfc", "8259"), ("bcp", "BCP14")])
|
||||
db.insert_refs("draft-gamma-agent-id", [("rfc", "5280"), ("rfc", "8259")])
|
||||
|
||||
yield db
|
||||
287
tests/test_db.py
Normal file
287
tests/test_db.py
Normal file
@@ -0,0 +1,287 @@
|
||||
"""Tests for ietf_analyzer.db.Database."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from ietf_analyzer.db import Database
|
||||
from ietf_analyzer.models import Author, Draft, Rating
|
||||
|
||||
|
||||
# ---- Table creation ----
|
||||
|
||||
|
||||
def test_ensure_tables_creates_all(tmp_db):
|
||||
"""All expected tables should exist after Database initialization."""
|
||||
rows = tmp_db.conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
||||
).fetchall()
|
||||
table_names = {r["name"] for r in rows}
|
||||
expected = {
|
||||
"drafts", "ratings", "embeddings", "llm_cache",
|
||||
"authors", "draft_authors", "ideas", "gaps",
|
||||
"draft_refs", "generated_drafts", "generation_runs",
|
||||
"sources", "observatory_snapshots", "gap_history",
|
||||
"annotations", "monitor_runs",
|
||||
}
|
||||
assert expected.issubset(table_names), f"Missing tables: {expected - table_names}"
|
||||
|
||||
|
||||
# ---- Drafts ----
|
||||
|
||||
|
||||
def test_upsert_draft_insert(tmp_db, sample_draft):
|
||||
"""Inserting a new draft should make it retrievable."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
retrieved = tmp_db.get_draft(sample_draft.name)
|
||||
assert retrieved is not None
|
||||
assert retrieved.name == sample_draft.name
|
||||
assert retrieved.title == sample_draft.title
|
||||
assert retrieved.rev == sample_draft.rev
|
||||
assert retrieved.pages == sample_draft.pages
|
||||
assert retrieved.categories == sample_draft.categories
|
||||
|
||||
|
||||
def test_upsert_draft_update(tmp_db, sample_draft):
|
||||
"""Upserting an existing draft should update its fields."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
sample_draft.title = "Updated Title"
|
||||
sample_draft.rev = "03"
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
|
||||
retrieved = tmp_db.get_draft(sample_draft.name)
|
||||
assert retrieved.title == "Updated Title"
|
||||
assert retrieved.rev == "03"
|
||||
# Should still be only one draft
|
||||
assert tmp_db.count_drafts() == 1
|
||||
|
||||
|
||||
def test_search_drafts_fts5(tmp_db, sample_draft):
|
||||
"""FTS5 search should find drafts matching query terms."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
results = tmp_db.search_drafts("autonomous agents communicate")
|
||||
assert len(results) >= 1
|
||||
assert results[0].name == sample_draft.name
|
||||
|
||||
|
||||
def test_search_drafts_no_results(tmp_db, sample_draft):
|
||||
"""FTS5 search with non-matching query should return empty list."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
results = tmp_db.search_drafts("quantum blockchain hyperledger")
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_list_drafts_pagination(seeded_db):
|
||||
"""list_drafts should respect limit and order_by."""
|
||||
all_drafts = seeded_db.list_drafts(limit=100, order_by="name ASC")
|
||||
assert len(all_drafts) == 5
|
||||
|
||||
first_two = seeded_db.list_drafts(limit=2, order_by="name ASC")
|
||||
assert len(first_two) == 2
|
||||
assert first_two[0].name == "draft-alpha-agent-comm"
|
||||
assert first_two[1].name == "draft-beta-ml-traffic"
|
||||
|
||||
|
||||
def test_count_drafts(seeded_db):
|
||||
"""count_drafts should return accurate count."""
|
||||
assert seeded_db.count_drafts() == 5
|
||||
|
||||
|
||||
# ---- Ratings ----
|
||||
|
||||
|
||||
def test_upsert_rating(tmp_db, sample_draft, sample_rating):
|
||||
"""Inserting a rating should make it retrievable."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
tmp_db.upsert_rating(sample_rating)
|
||||
retrieved = tmp_db.get_rating(sample_rating.draft_name)
|
||||
assert retrieved is not None
|
||||
assert retrieved.novelty == 4
|
||||
assert retrieved.relevance == 5
|
||||
assert "A2A protocols" in retrieved.categories
|
||||
|
||||
|
||||
def test_drafts_with_ratings(seeded_db):
|
||||
"""drafts_with_ratings should return (Draft, Rating) pairs."""
|
||||
pairs = seeded_db.drafts_with_ratings(limit=100)
|
||||
assert len(pairs) == 5
|
||||
for draft, rating in pairs:
|
||||
assert isinstance(draft, Draft)
|
||||
assert isinstance(rating, Rating)
|
||||
assert draft.name == rating.draft_name
|
||||
|
||||
|
||||
def test_drafts_without_text(tmp_db):
|
||||
"""drafts_without_text should return drafts where full_text is None."""
|
||||
d1 = Draft(name="draft-has-text", rev="00", title="Has Text", abstract="Abs",
|
||||
time="2025-01-01", full_text="Some text here")
|
||||
d2 = Draft(name="draft-no-text", rev="00", title="No Text", abstract="Abs",
|
||||
time="2025-01-01", full_text=None)
|
||||
tmp_db.upsert_draft(d1)
|
||||
tmp_db.upsert_draft(d2)
|
||||
|
||||
missing = tmp_db.drafts_without_text()
|
||||
names = [d.name for d in missing]
|
||||
assert "draft-no-text" in names
|
||||
assert "draft-has-text" not in names
|
||||
|
||||
|
||||
# ---- Ideas ----
|
||||
|
||||
|
||||
def test_insert_ideas(seeded_db):
|
||||
"""Bulk idea insertion should work correctly."""
|
||||
ideas = [
|
||||
{"title": "New Idea A", "description": "Desc A", "type": "mechanism"},
|
||||
{"title": "New Idea B", "description": "Desc B", "type": "protocol"},
|
||||
]
|
||||
seeded_db.insert_ideas("draft-epsilon-discovery", ideas)
|
||||
retrieved = seeded_db.get_ideas_for_draft("draft-epsilon-discovery")
|
||||
assert len(retrieved) == 2
|
||||
assert retrieved[0]["title"] == "New Idea A"
|
||||
|
||||
|
||||
def test_get_ideas_for_draft(seeded_db):
|
||||
"""Retrieving ideas for a specific draft should return correct data."""
|
||||
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
|
||||
assert len(ideas) == 2
|
||||
titles = {i["title"] for i in ideas}
|
||||
assert "Agent Handshake" in titles
|
||||
assert "Capability Negotiation" in titles
|
||||
|
||||
|
||||
def test_insert_ideas_replaces_existing(seeded_db):
|
||||
"""Inserting ideas for a draft should replace existing ideas."""
|
||||
seeded_db.insert_ideas("draft-alpha-agent-comm", [
|
||||
{"title": "Replacement Idea", "description": "Replaced", "type": "pattern"},
|
||||
])
|
||||
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
|
||||
assert len(ideas) == 1
|
||||
assert ideas[0]["title"] == "Replacement Idea"
|
||||
|
||||
|
||||
# ---- Gaps ----
|
||||
|
||||
|
||||
def test_insert_gaps(tmp_db):
|
||||
"""Gap insertion should work correctly."""
|
||||
gaps = [
|
||||
{"topic": "Agent Auth Gap", "description": "No standard auth for agents",
|
||||
"category": "Agent identity/auth", "severity": "critical", "evidence": "Only 2 drafts address this"},
|
||||
{"topic": "Monitoring Gap", "description": "No agent monitoring standard",
|
||||
"category": "Autonomous netops", "severity": "high", "evidence": "Zero drafts cover monitoring"},
|
||||
]
|
||||
tmp_db.insert_gaps(gaps)
|
||||
retrieved = tmp_db.all_gaps()
|
||||
assert len(retrieved) == 2
|
||||
|
||||
|
||||
def test_all_gaps(tmp_db):
|
||||
"""all_gaps should return all inserted gaps with correct fields."""
|
||||
gaps = [
|
||||
{"topic": "Test Gap", "description": "Test description",
|
||||
"category": "Other", "severity": "medium", "evidence": "Test evidence"},
|
||||
]
|
||||
tmp_db.insert_gaps(gaps)
|
||||
result = tmp_db.all_gaps()
|
||||
assert len(result) == 1
|
||||
assert result[0]["topic"] == "Test Gap"
|
||||
assert result[0]["severity"] == "medium"
|
||||
assert result[0]["evidence"] == "Test evidence"
|
||||
|
||||
|
||||
# ---- Embeddings ----
|
||||
|
||||
|
||||
def test_store_embedding(tmp_db, sample_draft):
|
||||
"""Storing an embedding should persist the numpy vector."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
vec = np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32)
|
||||
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
|
||||
|
||||
retrieved = tmp_db.get_embedding(sample_draft.name)
|
||||
assert retrieved is not None
|
||||
np.testing.assert_array_almost_equal(retrieved, vec)
|
||||
|
||||
|
||||
def test_all_embeddings(tmp_db, sample_draft):
|
||||
"""all_embeddings should return dict of {name: ndarray}."""
|
||||
tmp_db.upsert_draft(sample_draft)
|
||||
vec = np.array([1.0, 2.0, 3.0], dtype=np.float32)
|
||||
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
|
||||
|
||||
all_emb = tmp_db.all_embeddings()
|
||||
assert sample_draft.name in all_emb
|
||||
np.testing.assert_array_almost_equal(all_emb[sample_draft.name], vec)
|
||||
|
||||
|
||||
# ---- LLM Cache ----
|
||||
|
||||
|
||||
def test_cache_response(tmp_db):
|
||||
"""Caching an LLM response should be retrievable by draft_name + hash."""
|
||||
tmp_db.cache_response(
|
||||
"draft-test", "abc123hash", "claude-test",
|
||||
"prompt text", '{"result": "ok"}', 100, 50,
|
||||
)
|
||||
cached = tmp_db.get_cached_response("draft-test", "abc123hash")
|
||||
assert cached is not None
|
||||
assert json.loads(cached) == {"result": "ok"}
|
||||
|
||||
|
||||
def test_cache_response_miss(tmp_db):
|
||||
"""Cache miss should return None."""
|
||||
result = tmp_db.get_cached_response("nonexistent", "badhash")
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---- Refs ----
|
||||
|
||||
|
||||
def test_insert_refs(seeded_db):
|
||||
"""Reference insertion should work and be queryable."""
|
||||
refs = seeded_db.get_refs_for_draft("draft-alpha-agent-comm")
|
||||
assert len(refs) == 3
|
||||
ref_types = {r[0] for r in refs}
|
||||
assert "rfc" in ref_types
|
||||
assert "draft" in ref_types
|
||||
|
||||
|
||||
def test_top_refs(seeded_db):
|
||||
"""top_referenced should return most commonly cited RFCs."""
|
||||
top = seeded_db.top_referenced(ref_type="rfc", limit=5)
|
||||
# RFC 8259 is referenced by 3 drafts
|
||||
assert len(top) > 0
|
||||
assert top[0][0] == "8259"
|
||||
assert top[0][1] == 3
|
||||
|
||||
|
||||
# ---- Authors ----
|
||||
|
||||
|
||||
def test_get_authors_for_draft(seeded_db):
|
||||
"""Getting authors for a draft should return correct Author objects."""
|
||||
authors = seeded_db.get_authors_for_draft("draft-alpha-agent-comm")
|
||||
assert len(authors) == 2
|
||||
names = {a.name for a in authors}
|
||||
assert "Alice Researcher" in names
|
||||
assert "Bob Engineer" in names
|
||||
|
||||
|
||||
def test_author_count(seeded_db):
|
||||
"""author_count should return the total number of unique authors."""
|
||||
assert seeded_db.author_count() == 3
|
||||
|
||||
|
||||
def test_top_authors(seeded_db):
|
||||
"""top_authors should return authors sorted by draft count."""
|
||||
top = seeded_db.top_authors(limit=10)
|
||||
# Alice and Bob each have 2 drafts, Carol has 2 as well
|
||||
assert len(top) > 0
|
||||
# First author should have most drafts
|
||||
name, aff, count, draft_names = top[0]
|
||||
assert count >= 2
|
||||
190
tests/test_models.py
Normal file
190
tests/test_models.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Tests for ietf_analyzer.models and ietf_analyzer.config."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from ietf_analyzer.models import Draft, Rating, Author, normalize_category, CATEGORY_NORMALIZE
|
||||
from ietf_analyzer.config import Config, DEFAULT_KEYWORDS
|
||||
|
||||
|
||||
# ---- Rating ----
|
||||
|
||||
|
||||
def test_rating_composite_score():
|
||||
"""Composite score should use weighted average formula."""
|
||||
r = Rating(
|
||||
draft_name="test", novelty=4, maturity=3, overlap=2,
|
||||
momentum=3, relevance=5, summary="test",
|
||||
)
|
||||
# Expected: 4*0.30 + 5*0.25 + 3*0.20 + 3*0.15 + (6-2)*0.10
|
||||
expected = 4 * 0.30 + 5 * 0.25 + 3 * 0.20 + 3 * 0.15 + (6 - 2) * 0.10
|
||||
assert abs(r.composite_score - expected) < 0.001
|
||||
|
||||
|
||||
def test_rating_composite_score_all_ones():
|
||||
"""Composite score with all 1s should be the minimum."""
|
||||
r = Rating(
|
||||
draft_name="test", novelty=1, maturity=1, overlap=5,
|
||||
momentum=1, relevance=1, summary="test",
|
||||
)
|
||||
expected = 1 * 0.30 + 1 * 0.25 + 1 * 0.20 + 1 * 0.15 + (6 - 5) * 0.10
|
||||
assert abs(r.composite_score - expected) < 0.001
|
||||
|
||||
|
||||
def test_rating_composite_score_all_fives():
|
||||
"""Composite score with all 5s (except overlap=1 for best)."""
|
||||
r = Rating(
|
||||
draft_name="test", novelty=5, maturity=5, overlap=1,
|
||||
momentum=5, relevance=5, summary="test",
|
||||
)
|
||||
expected = 5 * 0.30 + 5 * 0.25 + 5 * 0.20 + 5 * 0.15 + (6 - 1) * 0.10
|
||||
assert abs(r.composite_score - expected) < 0.001
|
||||
assert r.composite_score == 5.0
|
||||
|
||||
|
||||
# ---- Draft ----
|
||||
|
||||
|
||||
def test_draft_datatracker_url():
|
||||
"""datatracker_url should construct the correct URL."""
|
||||
d = Draft(name="draft-example-test", rev="00", title="Test", abstract="", time="2025-01-01")
|
||||
assert d.datatracker_url == "https://datatracker.ietf.org/doc/draft-example-test/"
|
||||
|
||||
|
||||
def test_draft_text_url():
|
||||
"""text_url should construct the correct URL with revision."""
|
||||
d = Draft(name="draft-example-test", rev="03", title="Test", abstract="", time="2025-01-01")
|
||||
assert d.text_url == "https://www.ietf.org/archive/id/draft-example-test-03.txt"
|
||||
|
||||
|
||||
def test_draft_defaults():
|
||||
"""Draft should have sensible defaults for optional fields."""
|
||||
d = Draft(name="draft-minimal", rev="00", title="Min", abstract="", time="2025-01-01")
|
||||
assert d.dt_id is None
|
||||
assert d.pages is None
|
||||
assert d.words is None
|
||||
assert d.group is None
|
||||
assert d.full_text is None
|
||||
assert d.categories == []
|
||||
assert d.tags == []
|
||||
assert d.states == []
|
||||
assert d.source == "ietf"
|
||||
|
||||
|
||||
def test_draft_date_property():
|
||||
"""Draft.date should return just the date portion of time."""
|
||||
d = Draft(name="test", rev="00", title="T", abstract="", time="2025-06-15T12:00:00+00:00")
|
||||
assert d.date == "2025-06-15"
|
||||
|
||||
|
||||
def test_draft_date_empty():
|
||||
"""Draft.date should return empty string if time is None."""
|
||||
d = Draft(name="test", rev="00", title="T", abstract="", time=None)
|
||||
assert d.date == ""
|
||||
|
||||
|
||||
# ---- normalize_category ----
|
||||
|
||||
|
||||
def test_normalize_category():
|
||||
"""Known verbose category names should be normalized to short forms."""
|
||||
assert normalize_category("Agent-to-agent communication protocols") == "A2A protocols"
|
||||
assert normalize_category("AI safety / guardrails / alignment") == "AI safety/alignment"
|
||||
|
||||
|
||||
def test_normalize_category_passthrough():
|
||||
"""Unknown category names should pass through unchanged."""
|
||||
assert normalize_category("A2A protocols") == "A2A protocols"
|
||||
assert normalize_category("Some Unknown Category") == "Some Unknown Category"
|
||||
|
||||
|
||||
# ---- Config ----
|
||||
|
||||
|
||||
def test_config_load_defaults():
|
||||
"""Config without a file should use defaults."""
|
||||
cfg = Config()
|
||||
assert cfg.ollama_url == "http://localhost:11434"
|
||||
assert cfg.claude_model != ""
|
||||
assert cfg.fetch_delay == 0.5
|
||||
|
||||
|
||||
def test_config_save_and_load(tmp_path):
|
||||
"""Config should roundtrip through save/load."""
|
||||
cfg = Config(
|
||||
data_dir=str(tmp_path),
|
||||
db_path=str(tmp_path / "test.db"),
|
||||
claude_model="claude-test-model",
|
||||
)
|
||||
# Save to the default config path (override it)
|
||||
config_file = tmp_path / "config.json"
|
||||
config_file.write_text(json.dumps({
|
||||
"data_dir": str(tmp_path),
|
||||
"db_path": str(tmp_path / "test.db"),
|
||||
"claude_model": "claude-test-model",
|
||||
"ollama_url": "http://localhost:11434",
|
||||
"search_keywords": ["agent", "ai-agent"],
|
||||
}))
|
||||
|
||||
# Verify roundtrip by reading back
|
||||
data = json.loads(config_file.read_text())
|
||||
loaded = Config(**{k: v for k, v in data.items() if k in Config.__dataclass_fields__})
|
||||
assert loaded.claude_model == "claude-test-model"
|
||||
assert loaded.db_path == str(tmp_path / "test.db")
|
||||
|
||||
|
||||
def test_config_search_keywords():
|
||||
"""Default config should have the expected search keywords."""
|
||||
cfg = Config()
|
||||
assert "agent" in cfg.search_keywords
|
||||
assert "mcp" in cfg.search_keywords
|
||||
assert "agentic" in cfg.search_keywords
|
||||
assert len(cfg.search_keywords) == len(DEFAULT_KEYWORDS)
|
||||
|
||||
|
||||
def _patch_config_file(monkeypatch, tmp_path):
|
||||
"""Point CONFIG_FILE to a non-existent path so tests use defaults."""
|
||||
import ietf_analyzer.config as config_mod
|
||||
monkeypatch.setattr(config_mod, "CONFIG_FILE", tmp_path / "config.json")
|
||||
|
||||
|
||||
def test_config_env_var_override(tmp_path, monkeypatch):
|
||||
"""Environment variables should override config file values."""
|
||||
_patch_config_file(monkeypatch, tmp_path)
|
||||
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", str(tmp_path / "env.db"))
|
||||
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "claude-from-env")
|
||||
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "http://remote:11434")
|
||||
|
||||
cfg = Config.load()
|
||||
assert cfg.db_path == str(tmp_path / "env.db")
|
||||
assert cfg.claude_model == "claude-from-env"
|
||||
assert cfg.ollama_url == "http://remote:11434"
|
||||
|
||||
|
||||
def test_config_validation_bad_model(tmp_path, monkeypatch):
|
||||
"""Empty claude_model should raise ValueError."""
|
||||
_patch_config_file(monkeypatch, tmp_path)
|
||||
monkeypatch.setenv("IETF_ANALYZER_CLAUDE_MODEL", "")
|
||||
with pytest.raises(ValueError, match="claude_model"):
|
||||
Config.load()
|
||||
|
||||
|
||||
def test_config_validation_bad_url(tmp_path, monkeypatch):
|
||||
"""Non-URL ollama_url should raise ValueError."""
|
||||
_patch_config_file(monkeypatch, tmp_path)
|
||||
monkeypatch.setenv("IETF_ANALYZER_OLLAMA_URL", "not-a-url")
|
||||
with pytest.raises(ValueError, match="ollama_url"):
|
||||
Config.load()
|
||||
|
||||
|
||||
def test_config_validation_bad_db_path(tmp_path, monkeypatch):
|
||||
"""db_path with non-existent parent directory should raise ValueError."""
|
||||
_patch_config_file(monkeypatch, tmp_path)
|
||||
monkeypatch.setenv("IETF_ANALYZER_DB_PATH", "/nonexistent/dir/test.db")
|
||||
with pytest.raises(ValueError, match="db_path"):
|
||||
Config.load()
|
||||
158
tests/test_web_data.py
Normal file
158
tests/test_web_data.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""Tests for src/webui/data.py data access functions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from functools import wraps
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure webui is importable
|
||||
_project_root = Path(__file__).resolve().parent.parent
|
||||
if str(_project_root / "src") not in sys.path:
|
||||
sys.path.insert(0, str(_project_root / "src"))
|
||||
|
||||
from webui.data import (
|
||||
get_overview_stats,
|
||||
get_category_counts,
|
||||
get_drafts_page,
|
||||
get_draft_detail,
|
||||
get_ideas_by_type,
|
||||
get_all_gaps,
|
||||
get_timeline_data,
|
||||
get_top_authors,
|
||||
)
|
||||
|
||||
|
||||
def _skip_on_missing_module(fn):
|
||||
"""Decorator that skips tests when webui.data references unavailable modules."""
|
||||
@wraps(fn)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
return fn(*args, **kwargs)
|
||||
except (ModuleNotFoundError, AttributeError) as e:
|
||||
pytest.skip(f"webui.data depends on module not in this worktree: {e}")
|
||||
return wrapper
|
||||
|
||||
|
||||
def test_get_overview_stats(seeded_db):
|
||||
"""Overview stats should return correct counts from seeded data."""
|
||||
stats = get_overview_stats(seeded_db)
|
||||
assert stats["total_drafts"] == 5
|
||||
assert stats["rated_count"] == 5
|
||||
assert stats["author_count"] == 3
|
||||
# 2 + 1 + 1 = 4 ideas in seeded data
|
||||
assert stats["idea_count"] == 4
|
||||
assert stats["gap_count"] == 0
|
||||
assert "input_tokens" in stats
|
||||
assert "output_tokens" in stats
|
||||
|
||||
|
||||
def test_get_category_counts(seeded_db):
|
||||
"""Category counts should reflect the seeded ratings."""
|
||||
counts = get_category_counts(seeded_db)
|
||||
assert isinstance(counts, dict)
|
||||
assert "A2A protocols" in counts
|
||||
assert counts["A2A protocols"] == 1
|
||||
assert "ML traffic mgmt" in counts
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_drafts_page_basic(seeded_db):
|
||||
"""Drafts page should return paginated results."""
|
||||
result = get_drafts_page(seeded_db, page=1, per_page=3)
|
||||
assert len(result["drafts"]) == 3
|
||||
assert result["total"] == 5
|
||||
assert result["page"] == 1
|
||||
assert result["per_page"] == 3
|
||||
assert result["pages"] == 2
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_drafts_page_with_category_filter(seeded_db):
|
||||
"""Filtering by category should narrow results."""
|
||||
result = get_drafts_page(seeded_db, category="A2A protocols")
|
||||
assert result["total"] == 1
|
||||
assert result["drafts"][0]["categories"] == ["A2A protocols"]
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_drafts_page_with_search_filter(seeded_db):
|
||||
"""Text search should filter by name/title/summary."""
|
||||
result = get_drafts_page(seeded_db, search="alpha")
|
||||
assert result["total"] == 1
|
||||
assert "alpha" in result["drafts"][0]["name"]
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_drafts_page_empty_search(seeded_db):
|
||||
"""Search for non-matching term should return 0 results."""
|
||||
result = get_drafts_page(seeded_db, search="zzzznonexistent")
|
||||
assert result["total"] == 0
|
||||
assert result["drafts"] == []
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_draft_detail(seeded_db):
|
||||
"""Draft detail should include draft, rating, authors, ideas, refs."""
|
||||
detail = get_draft_detail(seeded_db, "draft-alpha-agent-comm")
|
||||
assert detail is not None
|
||||
assert detail["name"] == "draft-alpha-agent-comm"
|
||||
assert detail["title"] == "Alpha Agent Communication"
|
||||
assert "rating" in detail
|
||||
assert detail["rating"]["novelty"] == 4
|
||||
assert len(detail["authors"]) == 2
|
||||
assert len(detail["ideas"]) == 2
|
||||
assert len(detail["refs"]) == 3
|
||||
|
||||
|
||||
@_skip_on_missing_module
|
||||
def test_get_draft_detail_not_found(seeded_db):
|
||||
"""Draft detail for non-existent draft should return None."""
|
||||
assert get_draft_detail(seeded_db, "draft-nonexistent") is None
|
||||
|
||||
|
||||
def test_get_ideas_by_type(seeded_db):
|
||||
"""Ideas by type should group and count correctly."""
|
||||
result = get_ideas_by_type(seeded_db)
|
||||
assert result["total"] == 4
|
||||
assert "by_type" in result
|
||||
assert isinstance(result["by_type"], dict)
|
||||
# We have protocol, mechanism, extension types
|
||||
assert "protocol" in result["by_type"] or "mechanism" in result["by_type"]
|
||||
|
||||
|
||||
def test_get_all_gaps_empty(seeded_db):
|
||||
"""With no gaps inserted, should return empty list."""
|
||||
gaps = get_all_gaps(seeded_db)
|
||||
assert gaps == []
|
||||
|
||||
|
||||
def test_get_all_gaps_with_data(seeded_db):
|
||||
"""After inserting gaps, should return them."""
|
||||
seeded_db.insert_gaps([
|
||||
{"topic": "Gap A", "description": "Desc A", "severity": "high", "evidence": "Ev A"},
|
||||
])
|
||||
gaps = get_all_gaps(seeded_db)
|
||||
assert len(gaps) == 1
|
||||
assert gaps[0]["topic"] == "Gap A"
|
||||
|
||||
|
||||
def test_get_timeline_data(seeded_db):
|
||||
"""Timeline data should group drafts by month."""
|
||||
data = get_timeline_data(seeded_db)
|
||||
assert "months" in data
|
||||
assert "series" in data
|
||||
assert "categories" in data
|
||||
# Seeded drafts span Jan-May 2025
|
||||
assert len(data["months"]) >= 1
|
||||
|
||||
|
||||
def test_get_top_authors(seeded_db):
|
||||
"""Top authors should return ranked list with draft counts."""
|
||||
authors = get_top_authors(seeded_db, limit=10)
|
||||
assert len(authors) >= 1
|
||||
assert "name" in authors[0]
|
||||
assert "draft_count" in authors[0]
|
||||
assert authors[0]["draft_count"] >= 2
|
||||
Reference in New Issue
Block a user