Files
ietf-draft-analyzer/tests/test_db.py
Christian Nennemann 757b781c67 Platform upgrade: semantic search, citations, readiness, tests, Docker
Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 20:52:56 +01:00

288 lines
9.4 KiB
Python

"""Tests for ietf_analyzer.db.Database."""
from __future__ import annotations
import json
from datetime import datetime, timezone
import numpy as np
import pytest
from ietf_analyzer.db import Database
from ietf_analyzer.models import Author, Draft, Rating
# ---- Table creation ----
def test_ensure_tables_creates_all(tmp_db):
"""All expected tables should exist after Database initialization."""
rows = tmp_db.conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
).fetchall()
table_names = {r["name"] for r in rows}
expected = {
"drafts", "ratings", "embeddings", "llm_cache",
"authors", "draft_authors", "ideas", "gaps",
"draft_refs", "generated_drafts", "generation_runs",
"sources", "observatory_snapshots", "gap_history",
"annotations", "monitor_runs",
}
assert expected.issubset(table_names), f"Missing tables: {expected - table_names}"
# ---- Drafts ----
def test_upsert_draft_insert(tmp_db, sample_draft):
"""Inserting a new draft should make it retrievable."""
tmp_db.upsert_draft(sample_draft)
retrieved = tmp_db.get_draft(sample_draft.name)
assert retrieved is not None
assert retrieved.name == sample_draft.name
assert retrieved.title == sample_draft.title
assert retrieved.rev == sample_draft.rev
assert retrieved.pages == sample_draft.pages
assert retrieved.categories == sample_draft.categories
def test_upsert_draft_update(tmp_db, sample_draft):
"""Upserting an existing draft should update its fields."""
tmp_db.upsert_draft(sample_draft)
sample_draft.title = "Updated Title"
sample_draft.rev = "03"
tmp_db.upsert_draft(sample_draft)
retrieved = tmp_db.get_draft(sample_draft.name)
assert retrieved.title == "Updated Title"
assert retrieved.rev == "03"
# Should still be only one draft
assert tmp_db.count_drafts() == 1
def test_search_drafts_fts5(tmp_db, sample_draft):
"""FTS5 search should find drafts matching query terms."""
tmp_db.upsert_draft(sample_draft)
results = tmp_db.search_drafts("autonomous agents communicate")
assert len(results) >= 1
assert results[0].name == sample_draft.name
def test_search_drafts_no_results(tmp_db, sample_draft):
"""FTS5 search with non-matching query should return empty list."""
tmp_db.upsert_draft(sample_draft)
results = tmp_db.search_drafts("quantum blockchain hyperledger")
assert results == []
def test_list_drafts_pagination(seeded_db):
"""list_drafts should respect limit and order_by."""
all_drafts = seeded_db.list_drafts(limit=100, order_by="name ASC")
assert len(all_drafts) == 5
first_two = seeded_db.list_drafts(limit=2, order_by="name ASC")
assert len(first_two) == 2
assert first_two[0].name == "draft-alpha-agent-comm"
assert first_two[1].name == "draft-beta-ml-traffic"
def test_count_drafts(seeded_db):
"""count_drafts should return accurate count."""
assert seeded_db.count_drafts() == 5
# ---- Ratings ----
def test_upsert_rating(tmp_db, sample_draft, sample_rating):
"""Inserting a rating should make it retrievable."""
tmp_db.upsert_draft(sample_draft)
tmp_db.upsert_rating(sample_rating)
retrieved = tmp_db.get_rating(sample_rating.draft_name)
assert retrieved is not None
assert retrieved.novelty == 4
assert retrieved.relevance == 5
assert "A2A protocols" in retrieved.categories
def test_drafts_with_ratings(seeded_db):
"""drafts_with_ratings should return (Draft, Rating) pairs."""
pairs = seeded_db.drafts_with_ratings(limit=100)
assert len(pairs) == 5
for draft, rating in pairs:
assert isinstance(draft, Draft)
assert isinstance(rating, Rating)
assert draft.name == rating.draft_name
def test_drafts_without_text(tmp_db):
"""drafts_without_text should return drafts where full_text is None."""
d1 = Draft(name="draft-has-text", rev="00", title="Has Text", abstract="Abs",
time="2025-01-01", full_text="Some text here")
d2 = Draft(name="draft-no-text", rev="00", title="No Text", abstract="Abs",
time="2025-01-01", full_text=None)
tmp_db.upsert_draft(d1)
tmp_db.upsert_draft(d2)
missing = tmp_db.drafts_without_text()
names = [d.name for d in missing]
assert "draft-no-text" in names
assert "draft-has-text" not in names
# ---- Ideas ----
def test_insert_ideas(seeded_db):
"""Bulk idea insertion should work correctly."""
ideas = [
{"title": "New Idea A", "description": "Desc A", "type": "mechanism"},
{"title": "New Idea B", "description": "Desc B", "type": "protocol"},
]
seeded_db.insert_ideas("draft-epsilon-discovery", ideas)
retrieved = seeded_db.get_ideas_for_draft("draft-epsilon-discovery")
assert len(retrieved) == 2
assert retrieved[0]["title"] == "New Idea A"
def test_get_ideas_for_draft(seeded_db):
"""Retrieving ideas for a specific draft should return correct data."""
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
assert len(ideas) == 2
titles = {i["title"] for i in ideas}
assert "Agent Handshake" in titles
assert "Capability Negotiation" in titles
def test_insert_ideas_replaces_existing(seeded_db):
"""Inserting ideas for a draft should replace existing ideas."""
seeded_db.insert_ideas("draft-alpha-agent-comm", [
{"title": "Replacement Idea", "description": "Replaced", "type": "pattern"},
])
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
assert len(ideas) == 1
assert ideas[0]["title"] == "Replacement Idea"
# ---- Gaps ----
def test_insert_gaps(tmp_db):
"""Gap insertion should work correctly."""
gaps = [
{"topic": "Agent Auth Gap", "description": "No standard auth for agents",
"category": "Agent identity/auth", "severity": "critical", "evidence": "Only 2 drafts address this"},
{"topic": "Monitoring Gap", "description": "No agent monitoring standard",
"category": "Autonomous netops", "severity": "high", "evidence": "Zero drafts cover monitoring"},
]
tmp_db.insert_gaps(gaps)
retrieved = tmp_db.all_gaps()
assert len(retrieved) == 2
def test_all_gaps(tmp_db):
"""all_gaps should return all inserted gaps with correct fields."""
gaps = [
{"topic": "Test Gap", "description": "Test description",
"category": "Other", "severity": "medium", "evidence": "Test evidence"},
]
tmp_db.insert_gaps(gaps)
result = tmp_db.all_gaps()
assert len(result) == 1
assert result[0]["topic"] == "Test Gap"
assert result[0]["severity"] == "medium"
assert result[0]["evidence"] == "Test evidence"
# ---- Embeddings ----
def test_store_embedding(tmp_db, sample_draft):
"""Storing an embedding should persist the numpy vector."""
tmp_db.upsert_draft(sample_draft)
vec = np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32)
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
retrieved = tmp_db.get_embedding(sample_draft.name)
assert retrieved is not None
np.testing.assert_array_almost_equal(retrieved, vec)
def test_all_embeddings(tmp_db, sample_draft):
"""all_embeddings should return dict of {name: ndarray}."""
tmp_db.upsert_draft(sample_draft)
vec = np.array([1.0, 2.0, 3.0], dtype=np.float32)
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
all_emb = tmp_db.all_embeddings()
assert sample_draft.name in all_emb
np.testing.assert_array_almost_equal(all_emb[sample_draft.name], vec)
# ---- LLM Cache ----
def test_cache_response(tmp_db):
"""Caching an LLM response should be retrievable by draft_name + hash."""
tmp_db.cache_response(
"draft-test", "abc123hash", "claude-test",
"prompt text", '{"result": "ok"}', 100, 50,
)
cached = tmp_db.get_cached_response("draft-test", "abc123hash")
assert cached is not None
assert json.loads(cached) == {"result": "ok"}
def test_cache_response_miss(tmp_db):
"""Cache miss should return None."""
result = tmp_db.get_cached_response("nonexistent", "badhash")
assert result is None
# ---- Refs ----
def test_insert_refs(seeded_db):
"""Reference insertion should work and be queryable."""
refs = seeded_db.get_refs_for_draft("draft-alpha-agent-comm")
assert len(refs) == 3
ref_types = {r[0] for r in refs}
assert "rfc" in ref_types
assert "draft" in ref_types
def test_top_refs(seeded_db):
"""top_referenced should return most commonly cited RFCs."""
top = seeded_db.top_referenced(ref_type="rfc", limit=5)
# RFC 8259 is referenced by 3 drafts
assert len(top) > 0
assert top[0][0] == "8259"
assert top[0][1] == 3
# ---- Authors ----
def test_get_authors_for_draft(seeded_db):
"""Getting authors for a draft should return correct Author objects."""
authors = seeded_db.get_authors_for_draft("draft-alpha-agent-comm")
assert len(authors) == 2
names = {a.name for a in authors}
assert "Alice Researcher" in names
assert "Bob Engineer" in names
def test_author_count(seeded_db):
"""author_count should return the total number of unique authors."""
assert seeded_db.author_count() == 3
def test_top_authors(seeded_db):
"""top_authors should return authors sorted by draft count."""
top = seeded_db.top_authors(limit=10)
# Alice and Bob each have 2 drafts, Carol has 2 as well
assert len(top) > 0
# First author should have most drafts
name, aff, count, draft_names = top[0]
assert count >= 2