"""Tests for ietf_analyzer.db.Database.""" from __future__ import annotations import json import numpy as np from ietf_analyzer.models import Draft, Rating # ---- Table creation ---- def test_ensure_tables_creates_all(tmp_db): """All expected tables should exist after Database initialization.""" rows = tmp_db.conn.execute( "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" ).fetchall() table_names = {r["name"] for r in rows} expected = { "drafts", "ratings", "embeddings", "llm_cache", "authors", "draft_authors", "ideas", "gaps", "draft_refs", "generated_drafts", "generation_runs", "sources", "observatory_snapshots", "gap_history", "annotations", "monitor_runs", } assert expected.issubset(table_names), f"Missing tables: {expected - table_names}" # ---- Drafts ---- def test_upsert_draft_insert(tmp_db, sample_draft): """Inserting a new draft should make it retrievable.""" tmp_db.upsert_draft(sample_draft) retrieved = tmp_db.get_draft(sample_draft.name) assert retrieved is not None assert retrieved.name == sample_draft.name assert retrieved.title == sample_draft.title assert retrieved.rev == sample_draft.rev assert retrieved.pages == sample_draft.pages assert retrieved.categories == sample_draft.categories def test_upsert_draft_update(tmp_db, sample_draft): """Upserting an existing draft should update its fields.""" tmp_db.upsert_draft(sample_draft) sample_draft.title = "Updated Title" sample_draft.rev = "03" tmp_db.upsert_draft(sample_draft) retrieved = tmp_db.get_draft(sample_draft.name) assert retrieved.title == "Updated Title" assert retrieved.rev == "03" # Should still be only one draft assert tmp_db.count_drafts() == 1 def test_search_drafts_fts5(tmp_db, sample_draft): """FTS5 search should find drafts matching query terms.""" tmp_db.upsert_draft(sample_draft) results = tmp_db.search_drafts("autonomous agents communicate") assert len(results) >= 1 assert results[0].name == sample_draft.name def test_search_drafts_no_results(tmp_db, sample_draft): """FTS5 search with non-matching query should return empty list.""" tmp_db.upsert_draft(sample_draft) results = tmp_db.search_drafts("quantum blockchain hyperledger") assert results == [] def test_list_drafts_pagination(seeded_db): """list_drafts should respect limit and order_by.""" all_drafts = seeded_db.list_drafts(limit=100, order_by="name ASC") assert len(all_drafts) == 5 first_two = seeded_db.list_drafts(limit=2, order_by="name ASC") assert len(first_two) == 2 assert first_two[0].name == "draft-alpha-agent-comm" assert first_two[1].name == "draft-beta-ml-traffic" def test_count_drafts(seeded_db): """count_drafts should return accurate count.""" assert seeded_db.count_drafts() == 5 # ---- Ratings ---- def test_upsert_rating(tmp_db, sample_draft, sample_rating): """Inserting a rating should make it retrievable.""" tmp_db.upsert_draft(sample_draft) tmp_db.upsert_rating(sample_rating) retrieved = tmp_db.get_rating(sample_rating.draft_name) assert retrieved is not None assert retrieved.novelty == 4 assert retrieved.relevance == 5 assert "A2A protocols" in retrieved.categories def test_drafts_with_ratings(seeded_db): """drafts_with_ratings should return (Draft, Rating) pairs.""" pairs = seeded_db.drafts_with_ratings(limit=100) assert len(pairs) == 5 for draft, rating in pairs: assert isinstance(draft, Draft) assert isinstance(rating, Rating) assert draft.name == rating.draft_name def test_drafts_without_text(tmp_db): """drafts_without_text should return drafts where full_text is None.""" d1 = Draft(name="draft-has-text", rev="00", title="Has Text", abstract="Abs", time="2025-01-01", full_text="Some text here") d2 = Draft(name="draft-no-text", rev="00", title="No Text", abstract="Abs", time="2025-01-01", full_text=None) tmp_db.upsert_draft(d1) tmp_db.upsert_draft(d2) missing = tmp_db.drafts_without_text() names = [d.name for d in missing] assert "draft-no-text" in names assert "draft-has-text" not in names # ---- Ideas ---- def test_insert_ideas(seeded_db): """Bulk idea insertion should work correctly.""" ideas = [ {"title": "New Idea A", "description": "Desc A", "type": "mechanism"}, {"title": "New Idea B", "description": "Desc B", "type": "protocol"}, ] seeded_db.insert_ideas("draft-epsilon-discovery", ideas) retrieved = seeded_db.get_ideas_for_draft("draft-epsilon-discovery") assert len(retrieved) == 2 assert retrieved[0]["title"] == "New Idea A" def test_get_ideas_for_draft(seeded_db): """Retrieving ideas for a specific draft should return correct data.""" ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm") assert len(ideas) == 2 titles = {i["title"] for i in ideas} assert "Agent Handshake" in titles assert "Capability Negotiation" in titles def test_insert_ideas_replaces_existing(seeded_db): """Inserting ideas for a draft should replace existing ideas.""" seeded_db.insert_ideas("draft-alpha-agent-comm", [ {"title": "Replacement Idea", "description": "Replaced", "type": "pattern"}, ]) ideas = seeded_db.get_ideas_for_draft("draft-alpha-agent-comm") assert len(ideas) == 1 assert ideas[0]["title"] == "Replacement Idea" # ---- Gaps ---- def test_insert_gaps(tmp_db): """Gap insertion should work correctly.""" gaps = [ {"topic": "Agent Auth Gap", "description": "No standard auth for agents", "category": "Agent identity/auth", "severity": "critical", "evidence": "Only 2 drafts address this"}, {"topic": "Monitoring Gap", "description": "No agent monitoring standard", "category": "Autonomous netops", "severity": "high", "evidence": "Zero drafts cover monitoring"}, ] tmp_db.insert_gaps(gaps) retrieved = tmp_db.all_gaps() assert len(retrieved) == 2 def test_all_gaps(tmp_db): """all_gaps should return all inserted gaps with correct fields.""" gaps = [ {"topic": "Test Gap", "description": "Test description", "category": "Other", "severity": "medium", "evidence": "Test evidence"}, ] tmp_db.insert_gaps(gaps) result = tmp_db.all_gaps() assert len(result) == 1 assert result[0]["topic"] == "Test Gap" assert result[0]["severity"] == "medium" assert result[0]["evidence"] == "Test evidence" # ---- Embeddings ---- def test_store_embedding(tmp_db, sample_draft): """Storing an embedding should persist the numpy vector.""" tmp_db.upsert_draft(sample_draft) vec = np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32) tmp_db.store_embedding(sample_draft.name, "test-model", vec) retrieved = tmp_db.get_embedding(sample_draft.name) assert retrieved is not None np.testing.assert_array_almost_equal(retrieved, vec) def test_all_embeddings(tmp_db, sample_draft): """all_embeddings should return dict of {name: ndarray}.""" tmp_db.upsert_draft(sample_draft) vec = np.array([1.0, 2.0, 3.0], dtype=np.float32) tmp_db.store_embedding(sample_draft.name, "test-model", vec) all_emb = tmp_db.all_embeddings() assert sample_draft.name in all_emb np.testing.assert_array_almost_equal(all_emb[sample_draft.name], vec) # ---- LLM Cache ---- def test_cache_response(tmp_db): """Caching an LLM response should be retrievable by draft_name + hash.""" tmp_db.cache_response( "draft-test", "abc123hash", "claude-test", "prompt text", '{"result": "ok"}', 100, 50, ) cached = tmp_db.get_cached_response("draft-test", "abc123hash") assert cached is not None assert json.loads(cached) == {"result": "ok"} def test_cache_response_miss(tmp_db): """Cache miss should return None.""" result = tmp_db.get_cached_response("nonexistent", "badhash") assert result is None # ---- Refs ---- def test_insert_refs(seeded_db): """Reference insertion should work and be queryable.""" refs = seeded_db.get_refs_for_draft("draft-alpha-agent-comm") assert len(refs) == 3 ref_types = {r[0] for r in refs} assert "rfc" in ref_types assert "draft" in ref_types def test_top_refs(seeded_db): """top_referenced should return most commonly cited RFCs.""" top = seeded_db.top_referenced(ref_type="rfc", limit=5) # RFC 8259 is referenced by 3 drafts assert len(top) > 0 assert top[0][0] == "8259" assert top[0][1] == 3 # ---- Authors ---- def test_get_authors_for_draft(seeded_db): """Getting authors for a draft should return correct Author objects.""" authors = seeded_db.get_authors_for_draft("draft-alpha-agent-comm") assert len(authors) == 2 names = {a.name for a in authors} assert "Alice Researcher" in names assert "Bob Engineer" in names def test_author_count(seeded_db): """author_count should return the total number of unique authors.""" assert seeded_db.author_count() == 3 def test_top_authors(seeded_db): """top_authors should return authors sorted by draft count.""" top = seeded_db.top_authors(limit=10) # Alice and Bob each have 2 drafts, Carol has 2 as well assert len(top) > 0 # First author should have most drafts name, aff, count, draft_names = top[0] assert count >= 2