Files
ietf-draft-analyzer/tests/test_analyzer.py
Christian Nennemann 20c45a7eba Complete remaining medium/low issues: performance, CLI, types, CI, tests
Performance:
- Batch readiness computation (~200 queries → ~6 per page)
- Batch draft lookup in author network (N+1 → single query)
- File-based similarity matrix cache (.npy + metadata sidecar)
- 5-minute TTL embedding cache for search queries

CLI quality:
- Add pass_cfg_db decorator, convert ~30 commands to shared config/db lifecycle
- Add --dry-run to analyze, embed, embed-ideas, ideas, gaps commands
- Move 15+ in-function imports to top of data.py

Types & documentation:
- Add 16 TypedDicts to data.py, annotate 12 function return types
- Add ethics section to Post 06 (premature standardization, power asymmetry)
- Add EU AI Act Article 43 conformity mapping to Post 06
- Add NIS2 and CRA references to Post 04

CI & testing:
- Add GitHub Actions CI workflow (Python 3.11+3.12, ruff, pytest)
- Add API documentation for all 20 endpoints (data/reports/api-docs.md)
- Add 41 new tests (test_analyzer.py, test_search.py) — 64 total pass

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 14:06:54 +01:00

167 lines
5.4 KiB
Python

"""Tests for pure functions in ietf_analyzer.analyzer (no API calls)."""
from __future__ import annotations
import json
import pytest
from ietf_analyzer.analyzer import Analyzer
from ietf_analyzer.models import Rating
# ---- _extract_json ----
class TestExtractJson:
"""Test the _extract_json static-ish method that strips markdown fences."""
@staticmethod
def _extract(text: str) -> str:
# _extract_json is an instance method but only uses self for nothing,
# so we call it on a dummy — avoid constructing full Analyzer (needs API key).
return Analyzer._extract_json(None, text)
def test_plain_json(self):
raw = '{"key": "value"}'
assert self._extract(raw) == '{"key": "value"}'
def test_json_with_fences(self):
raw = '```json\n{"key": "value"}\n```'
assert self._extract(raw) == '{"key": "value"}'
def test_json_with_plain_fences(self):
raw = '```\n{"key": "value"}\n```'
assert self._extract(raw) == '{"key": "value"}'
def test_json_with_whitespace(self):
raw = ' \n {"key": "value"} \n '
assert self._extract(raw) == '{"key": "value"}'
def test_json_array_with_fences(self):
raw = '```json\n[{"a": 1}, {"b": 2}]\n```'
result = self._extract(raw)
assert json.loads(result) == [{"a": 1}, {"b": 2}]
def test_multiline_json_with_fences(self):
raw = '```json\n{\n "key": "value",\n "num": 42\n}\n```'
result = self._extract(raw)
parsed = json.loads(result)
assert parsed == {"key": "value", "num": 42}
def test_no_fences_passthrough(self):
raw = '[1, 2, 3]'
assert self._extract(raw) == '[1, 2, 3]'
def test_empty_string(self):
assert self._extract('') == ''
def test_fences_with_trailing_whitespace(self):
raw = '```json\n{"ok": true}\n``` \n'
result = self._extract(raw)
assert json.loads(result) == {"ok": True}
# ---- _clamp_rating ----
class TestClampRating:
def test_normal_values(self):
assert Analyzer._clamp_rating(3) == 3
assert Analyzer._clamp_rating(1) == 1
assert Analyzer._clamp_rating(5) == 5
def test_clamp_high(self):
assert Analyzer._clamp_rating(10) == 5
assert Analyzer._clamp_rating(99) == 5
def test_clamp_low(self):
assert Analyzer._clamp_rating(0) == 1
assert Analyzer._clamp_rating(-5) == 1
def test_float_truncated(self):
assert Analyzer._clamp_rating(3.7) == 3
assert Analyzer._clamp_rating(4.9) == 4
def test_string_number(self):
assert Analyzer._clamp_rating("4") == 4
assert Analyzer._clamp_rating("1") == 1
def test_invalid_returns_default(self):
assert Analyzer._clamp_rating("abc") == 3
assert Analyzer._clamp_rating(None) == 3
assert Analyzer._clamp_rating([]) == 3
def test_custom_default(self):
assert Analyzer._clamp_rating("abc", default=2) == 2
def test_custom_range(self):
assert Analyzer._clamp_rating(8, lo=1, hi=10) == 8
assert Analyzer._clamp_rating(15, lo=1, hi=10) == 10
# ---- _parse_rating ----
class TestParseRating:
"""Test _parse_rating with compact and verbose key formats."""
@staticmethod
def _parse(draft_name: str, data: dict) -> Rating:
# _parse_rating calls self._clamp_rating, so we need a minimal object.
# Create an object with just the _clamp_rating method bound.
stub = object.__new__(Analyzer)
return stub._parse_rating(draft_name, data)
def test_compact_keys(self):
data = {
"s": "A summary",
"n": 4, "nn": "novel approach",
"m": 3, "mn": "early stage",
"o": 2, "on": "minor overlap",
"mo": 5, "mon": "strong momentum",
"r": 4, "rn": "relevant",
"c": ["A2A protocols"],
}
rating = self._parse("draft-test", data)
assert rating.draft_name == "draft-test"
assert rating.novelty == 4
assert rating.maturity == 3
assert rating.overlap == 2
assert rating.momentum == 5
assert rating.relevance == 4
assert rating.summary == "A summary"
assert rating.categories == ["A2A protocols"]
def test_verbose_keys(self):
data = {
"summary": "A summary",
"novelty": 3, "novelty_note": "ok",
"maturity": 2, "maturity_note": "early",
"overlap": 1, "overlap_note": "unique",
"momentum": 4, "momentum_note": "active",
"relevance": 5, "relevance_note": "core",
"categories": ["AI safety/alignment"],
}
rating = self._parse("draft-test-2", data)
assert rating.novelty == 3
assert rating.relevance == 5
assert rating.categories == ["AI safety/alignment"]
def test_missing_keys_use_defaults(self):
data = {}
rating = self._parse("draft-empty", data)
assert rating.novelty == 3 # default
assert rating.maturity == 3
assert rating.summary == ""
assert rating.categories == []
def test_out_of_range_clamped(self):
data = {"n": 99, "m": -1, "o": 0, "mo": 10, "r": 6}
rating = self._parse("draft-clamp", data)
assert rating.novelty == 5
assert rating.maturity == 1
assert rating.overlap == 1
assert rating.momentum == 5
assert rating.relevance == 5