Major features added by 5 parallel agent teams: - Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis) - Global search across drafts, ideas, authors, gaps - REST API expansion (14 endpoints, up from 3) with CSV/JSON export - Citation graph visualization (D3.js, 440 nodes, 2422 edges) - Standards readiness scoring (0-100 composite from 6 factors) - Side-by-side draft comparison view with shared/unique analysis - Annotation system (notes + tags per draft, DB-persisted) - Docker deployment (Dockerfile + docker-compose with Ollama) - Scheduled updates (cron script with log rotation) - Pipeline health dashboard (stage progress bars, cost tracking) - Test suite foundation (54 pytest tests covering DB, models, web data) Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug, source-aware analysis prompts, config env var overrides + validation, resilient batch error handling with --retry-failed, observatory --dry-run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
288 lines
9.4 KiB
Python
288 lines
9.4 KiB
Python
"""Tests for ietf_analyzer.db.Database."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
|
|
import numpy as np
|
|
import pytest
|
|
|
|
from ietf_analyzer.db import Database
|
|
from ietf_analyzer.models import Author, Draft, Rating
|
|
|
|
|
|
# ---- Table creation ----
|
|
|
|
|
|
def test_ensure_tables_creates_all(tmp_db):
|
|
"""All expected tables should exist after Database initialization."""
|
|
rows = tmp_db.conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
|
).fetchall()
|
|
table_names = {r["name"] for r in rows}
|
|
expected = {
|
|
"drafts", "ratings", "embeddings", "llm_cache",
|
|
"authors", "draft_authors", "ideas", "gaps",
|
|
"draft_refs", "generated_drafts", "generation_runs",
|
|
"sources", "observatory_snapshots", "gap_history",
|
|
"annotations", "monitor_runs",
|
|
}
|
|
assert expected.issubset(table_names), f"Missing tables: {expected - table_names}"
|
|
|
|
|
|
# ---- Drafts ----
|
|
|
|
|
|
def test_upsert_draft_insert(tmp_db, sample_draft):
|
|
"""Inserting a new draft should make it retrievable."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
retrieved = tmp_db.get_draft(sample_draft.name)
|
|
assert retrieved is not None
|
|
assert retrieved.name == sample_draft.name
|
|
assert retrieved.title == sample_draft.title
|
|
assert retrieved.rev == sample_draft.rev
|
|
assert retrieved.pages == sample_draft.pages
|
|
assert retrieved.categories == sample_draft.categories
|
|
|
|
|
|
def test_upsert_draft_update(tmp_db, sample_draft):
|
|
"""Upserting an existing draft should update its fields."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
sample_draft.title = "Updated Title"
|
|
sample_draft.rev = "03"
|
|
tmp_db.upsert_draft(sample_draft)
|
|
|
|
retrieved = tmp_db.get_draft(sample_draft.name)
|
|
assert retrieved.title == "Updated Title"
|
|
assert retrieved.rev == "03"
|
|
# Should still be only one draft
|
|
assert tmp_db.count_drafts() == 1
|
|
|
|
|
|
def test_search_drafts_fts5(tmp_db, sample_draft):
|
|
"""FTS5 search should find drafts matching query terms."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
results = tmp_db.search_drafts("autonomous agents communicate")
|
|
assert len(results) >= 1
|
|
assert results[0].name == sample_draft.name
|
|
|
|
|
|
def test_search_drafts_no_results(tmp_db, sample_draft):
|
|
"""FTS5 search with non-matching query should return empty list."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
results = tmp_db.search_drafts("quantum blockchain hyperledger")
|
|
assert results == []
|
|
|
|
|
|
def test_list_drafts_pagination(seeded_db):
|
|
"""list_drafts should respect limit and order_by."""
|
|
all_drafts = seeded_db.list_drafts(limit=100, order_by="name ASC")
|
|
assert len(all_drafts) == 5
|
|
|
|
first_two = seeded_db.list_drafts(limit=2, order_by="name ASC")
|
|
assert len(first_two) == 2
|
|
assert first_two[0].name == "draft-alpha-agent-comm"
|
|
assert first_two[1].name == "draft-beta-ml-traffic"
|
|
|
|
|
|
def test_count_drafts(seeded_db):
|
|
"""count_drafts should return accurate count."""
|
|
assert seeded_db.count_drafts() == 5
|
|
|
|
|
|
# ---- Ratings ----
|
|
|
|
|
|
def test_upsert_rating(tmp_db, sample_draft, sample_rating):
|
|
"""Inserting a rating should make it retrievable."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
tmp_db.upsert_rating(sample_rating)
|
|
retrieved = tmp_db.get_rating(sample_rating.draft_name)
|
|
assert retrieved is not None
|
|
assert retrieved.novelty == 4
|
|
assert retrieved.relevance == 5
|
|
assert "A2A protocols" in retrieved.categories
|
|
|
|
|
|
def test_drafts_with_ratings(seeded_db):
|
|
"""drafts_with_ratings should return (Draft, Rating) pairs."""
|
|
pairs = seeded_db.drafts_with_ratings(limit=100)
|
|
assert len(pairs) == 5
|
|
for draft, rating in pairs:
|
|
assert isinstance(draft, Draft)
|
|
assert isinstance(rating, Rating)
|
|
assert draft.name == rating.draft_name
|
|
|
|
|
|
def test_drafts_without_text(tmp_db):
|
|
"""drafts_without_text should return drafts where full_text is None."""
|
|
d1 = Draft(name="draft-has-text", rev="00", title="Has Text", abstract="Abs",
|
|
time="2025-01-01", full_text="Some text here")
|
|
d2 = Draft(name="draft-no-text", rev="00", title="No Text", abstract="Abs",
|
|
time="2025-01-01", full_text=None)
|
|
tmp_db.upsert_draft(d1)
|
|
tmp_db.upsert_draft(d2)
|
|
|
|
missing = tmp_db.drafts_without_text()
|
|
names = [d.name for d in missing]
|
|
assert "draft-no-text" in names
|
|
assert "draft-has-text" not in names
|
|
|
|
|
|
# ---- Ideas ----
|
|
|
|
|
|
def test_insert_ideas(seeded_db):
|
|
"""Bulk idea insertion should work correctly."""
|
|
ideas = [
|
|
{"title": "New Idea A", "description": "Desc A", "type": "mechanism"},
|
|
{"title": "New Idea B", "description": "Desc B", "type": "protocol"},
|
|
]
|
|
seeded_db.insert_ideas("draft-epsilon-discovery", ideas)
|
|
retrieved = seeded_db.get_ideas_for_draft("draft-epsilon-discovery")
|
|
assert len(retrieved) == 2
|
|
assert retrieved[0]["title"] == "New Idea A"
|
|
|
|
|
|
def test_get_ideas_for_draft(seeded_db):
|
|
"""Retrieving ideas for a specific draft should return correct data."""
|
|
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
|
|
assert len(ideas) == 2
|
|
titles = {i["title"] for i in ideas}
|
|
assert "Agent Handshake" in titles
|
|
assert "Capability Negotiation" in titles
|
|
|
|
|
|
def test_insert_ideas_replaces_existing(seeded_db):
|
|
"""Inserting ideas for a draft should replace existing ideas."""
|
|
seeded_db.insert_ideas("draft-alpha-agent-comm", [
|
|
{"title": "Replacement Idea", "description": "Replaced", "type": "pattern"},
|
|
])
|
|
ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm")
|
|
assert len(ideas) == 1
|
|
assert ideas[0]["title"] == "Replacement Idea"
|
|
|
|
|
|
# ---- Gaps ----
|
|
|
|
|
|
def test_insert_gaps(tmp_db):
|
|
"""Gap insertion should work correctly."""
|
|
gaps = [
|
|
{"topic": "Agent Auth Gap", "description": "No standard auth for agents",
|
|
"category": "Agent identity/auth", "severity": "critical", "evidence": "Only 2 drafts address this"},
|
|
{"topic": "Monitoring Gap", "description": "No agent monitoring standard",
|
|
"category": "Autonomous netops", "severity": "high", "evidence": "Zero drafts cover monitoring"},
|
|
]
|
|
tmp_db.insert_gaps(gaps)
|
|
retrieved = tmp_db.all_gaps()
|
|
assert len(retrieved) == 2
|
|
|
|
|
|
def test_all_gaps(tmp_db):
|
|
"""all_gaps should return all inserted gaps with correct fields."""
|
|
gaps = [
|
|
{"topic": "Test Gap", "description": "Test description",
|
|
"category": "Other", "severity": "medium", "evidence": "Test evidence"},
|
|
]
|
|
tmp_db.insert_gaps(gaps)
|
|
result = tmp_db.all_gaps()
|
|
assert len(result) == 1
|
|
assert result[0]["topic"] == "Test Gap"
|
|
assert result[0]["severity"] == "medium"
|
|
assert result[0]["evidence"] == "Test evidence"
|
|
|
|
|
|
# ---- Embeddings ----
|
|
|
|
|
|
def test_store_embedding(tmp_db, sample_draft):
|
|
"""Storing an embedding should persist the numpy vector."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
vec = np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32)
|
|
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
|
|
|
|
retrieved = tmp_db.get_embedding(sample_draft.name)
|
|
assert retrieved is not None
|
|
np.testing.assert_array_almost_equal(retrieved, vec)
|
|
|
|
|
|
def test_all_embeddings(tmp_db, sample_draft):
|
|
"""all_embeddings should return dict of {name: ndarray}."""
|
|
tmp_db.upsert_draft(sample_draft)
|
|
vec = np.array([1.0, 2.0, 3.0], dtype=np.float32)
|
|
tmp_db.store_embedding(sample_draft.name, "test-model", vec)
|
|
|
|
all_emb = tmp_db.all_embeddings()
|
|
assert sample_draft.name in all_emb
|
|
np.testing.assert_array_almost_equal(all_emb[sample_draft.name], vec)
|
|
|
|
|
|
# ---- LLM Cache ----
|
|
|
|
|
|
def test_cache_response(tmp_db):
|
|
"""Caching an LLM response should be retrievable by draft_name + hash."""
|
|
tmp_db.cache_response(
|
|
"draft-test", "abc123hash", "claude-test",
|
|
"prompt text", '{"result": "ok"}', 100, 50,
|
|
)
|
|
cached = tmp_db.get_cached_response("draft-test", "abc123hash")
|
|
assert cached is not None
|
|
assert json.loads(cached) == {"result": "ok"}
|
|
|
|
|
|
def test_cache_response_miss(tmp_db):
|
|
"""Cache miss should return None."""
|
|
result = tmp_db.get_cached_response("nonexistent", "badhash")
|
|
assert result is None
|
|
|
|
|
|
# ---- Refs ----
|
|
|
|
|
|
def test_insert_refs(seeded_db):
|
|
"""Reference insertion should work and be queryable."""
|
|
refs = seeded_db.get_refs_for_draft("draft-alpha-agent-comm")
|
|
assert len(refs) == 3
|
|
ref_types = {r[0] for r in refs}
|
|
assert "rfc" in ref_types
|
|
assert "draft" in ref_types
|
|
|
|
|
|
def test_top_refs(seeded_db):
|
|
"""top_referenced should return most commonly cited RFCs."""
|
|
top = seeded_db.top_referenced(ref_type="rfc", limit=5)
|
|
# RFC 8259 is referenced by 3 drafts
|
|
assert len(top) > 0
|
|
assert top[0][0] == "8259"
|
|
assert top[0][1] == 3
|
|
|
|
|
|
# ---- Authors ----
|
|
|
|
|
|
def test_get_authors_for_draft(seeded_db):
|
|
"""Getting authors for a draft should return correct Author objects."""
|
|
authors = seeded_db.get_authors_for_draft("draft-alpha-agent-comm")
|
|
assert len(authors) == 2
|
|
names = {a.name for a in authors}
|
|
assert "Alice Researcher" in names
|
|
assert "Bob Engineer" in names
|
|
|
|
|
|
def test_author_count(seeded_db):
|
|
"""author_count should return the total number of unique authors."""
|
|
assert seeded_db.author_count() == 3
|
|
|
|
|
|
def test_top_authors(seeded_db):
|
|
"""top_authors should return authors sorted by draft count."""
|
|
top = seeded_db.top_authors(limit=10)
|
|
# Alice and Bob each have 2 drafts, Carol has 2 as well
|
|
assert len(top) > 0
|
|
# First author should have most drafts
|
|
name, aff, count, draft_names = top[0]
|
|
assert count >= 2
|