Files
ietf-draft-analyzer/src/ietf_analyzer/config.py
Christian Nennemann 757b781c67 Platform upgrade: semantic search, citations, readiness, tests, Docker
Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 20:52:56 +01:00

109 lines
3.8 KiB
Python

"""Configuration management."""
from __future__ import annotations
import json
import os
from dataclasses import dataclass, field, asdict
from pathlib import Path
DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
DEFAULT_KEYWORDS = [
"agent",
"ai-agent",
"llm",
"autonomous",
"machine-learning",
"artificial-intelligence",
"mcp",
"agentic",
"inference",
"generative",
"intelligent",
"aipref",
]
# Environment variable overrides (env var name -> config field name)
_ENV_OVERRIDES = {
"IETF_ANALYZER_DB_PATH": "db_path",
"IETF_ANALYZER_CLAUDE_MODEL": "claude_model",
"IETF_ANALYZER_OLLAMA_URL": "ollama_url",
}
@dataclass
class Config:
data_dir: str = str(DEFAULT_DATA_DIR)
db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
ollama_url: str = "http://localhost:11434"
ollama_embed_model: str = "nomic-embed-text"
claude_model: str = "claude-sonnet-4-20250514"
claude_model_cheap: str = "claude-haiku-4-5-20251001"
search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))
# Only fetch drafts newer than this (ISO date string)
fetch_since: str = "2024-01-01"
# Polite delay between API requests (seconds)
fetch_delay: float = 0.5
# Pipeline
generation_max_tokens: int = 4096
generation_model: str = "" # defaults to claude_model
# Observatory — add "w3c" to enable W3C spec tracking:
# ietf observatory update --source w3c (one-off)
# or set observatory_sources to ["ietf", "w3c"] in config.json
observatory_sources: list[str] = field(default_factory=lambda: ["ietf"])
dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
w3c_groups: list[str] = field(default_factory=lambda: [
"webmachinelearning", "wot", "credentials", "did", "vc"
])
def save(self) -> None:
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2))
def env_sources(self) -> dict[str, str]:
"""Return {field_name: env_var_name} for fields overridden by env vars."""
sources: dict[str, str] = {}
for env_var, field_name in _ENV_OVERRIDES.items():
if os.environ.get(env_var):
sources[field_name] = env_var
return sources
@classmethod
def _validate(cls, cfg: Config) -> None:
"""Validate config values, raise ValueError on problems."""
if not cfg.claude_model or not cfg.claude_model.strip():
raise ValueError(
"claude_model must be a non-empty string. "
"Check your config file or IETF_ANALYZER_CLAUDE_MODEL env var."
)
if not cfg.ollama_url.startswith(("http://", "https://")):
raise ValueError(
f"ollama_url must be an HTTP(S) URL, got: '{cfg.ollama_url}'. "
"Check your config file or IETF_ANALYZER_OLLAMA_URL env var."
)
db_parent = Path(cfg.db_path).parent
if not db_parent.exists():
raise ValueError(
f"db_path parent directory does not exist: '{db_parent}'. "
"Check your config file or IETF_ANALYZER_DB_PATH env var."
)
@classmethod
def load(cls) -> Config:
if CONFIG_FILE.exists():
data = json.loads(CONFIG_FILE.read_text())
cfg = cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
else:
cfg = cls()
# Apply environment variable overrides (env vars take precedence)
for env_var, field_name in _ENV_OVERRIDES.items():
env_val = os.environ.get(env_var)
if env_val is not None:
setattr(cfg, field_name, env_val)
cls._validate(cfg)
return cfg