Major features added by 5 parallel agent teams: - Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis) - Global search across drafts, ideas, authors, gaps - REST API expansion (14 endpoints, up from 3) with CSV/JSON export - Citation graph visualization (D3.js, 440 nodes, 2422 edges) - Standards readiness scoring (0-100 composite from 6 factors) - Side-by-side draft comparison view with shared/unique analysis - Annotation system (notes + tags per draft, DB-persisted) - Docker deployment (Dockerfile + docker-compose with Ollama) - Scheduled updates (cron script with log rotation) - Pipeline health dashboard (stage progress bars, cost tracking) - Test suite foundation (54 pytest tests covering DB, models, web data) Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug, source-aware analysis prompts, config env var overrides + validation, resilient batch error handling with --retry-failed, observatory --dry-run Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
109 lines
3.8 KiB
Python
109 lines
3.8 KiB
Python
"""Configuration management."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field, asdict
|
|
from pathlib import Path
|
|
|
|
DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
|
CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
|
|
|
|
DEFAULT_KEYWORDS = [
|
|
"agent",
|
|
"ai-agent",
|
|
"llm",
|
|
"autonomous",
|
|
"machine-learning",
|
|
"artificial-intelligence",
|
|
"mcp",
|
|
"agentic",
|
|
"inference",
|
|
"generative",
|
|
"intelligent",
|
|
"aipref",
|
|
]
|
|
|
|
# Environment variable overrides (env var name -> config field name)
|
|
_ENV_OVERRIDES = {
|
|
"IETF_ANALYZER_DB_PATH": "db_path",
|
|
"IETF_ANALYZER_CLAUDE_MODEL": "claude_model",
|
|
"IETF_ANALYZER_OLLAMA_URL": "ollama_url",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
data_dir: str = str(DEFAULT_DATA_DIR)
|
|
db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
|
|
ollama_url: str = "http://localhost:11434"
|
|
ollama_embed_model: str = "nomic-embed-text"
|
|
claude_model: str = "claude-sonnet-4-20250514"
|
|
claude_model_cheap: str = "claude-haiku-4-5-20251001"
|
|
search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))
|
|
# Only fetch drafts newer than this (ISO date string)
|
|
fetch_since: str = "2024-01-01"
|
|
# Polite delay between API requests (seconds)
|
|
fetch_delay: float = 0.5
|
|
# Pipeline
|
|
generation_max_tokens: int = 4096
|
|
generation_model: str = "" # defaults to claude_model
|
|
# Observatory — add "w3c" to enable W3C spec tracking:
|
|
# ietf observatory update --source w3c (one-off)
|
|
# or set observatory_sources to ["ietf", "w3c"] in config.json
|
|
observatory_sources: list[str] = field(default_factory=lambda: ["ietf"])
|
|
dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
|
|
w3c_groups: list[str] = field(default_factory=lambda: [
|
|
"webmachinelearning", "wot", "credentials", "did", "vc"
|
|
])
|
|
|
|
def save(self) -> None:
|
|
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
|
|
CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2))
|
|
|
|
def env_sources(self) -> dict[str, str]:
|
|
"""Return {field_name: env_var_name} for fields overridden by env vars."""
|
|
sources: dict[str, str] = {}
|
|
for env_var, field_name in _ENV_OVERRIDES.items():
|
|
if os.environ.get(env_var):
|
|
sources[field_name] = env_var
|
|
return sources
|
|
|
|
@classmethod
|
|
def _validate(cls, cfg: Config) -> None:
|
|
"""Validate config values, raise ValueError on problems."""
|
|
if not cfg.claude_model or not cfg.claude_model.strip():
|
|
raise ValueError(
|
|
"claude_model must be a non-empty string. "
|
|
"Check your config file or IETF_ANALYZER_CLAUDE_MODEL env var."
|
|
)
|
|
if not cfg.ollama_url.startswith(("http://", "https://")):
|
|
raise ValueError(
|
|
f"ollama_url must be an HTTP(S) URL, got: '{cfg.ollama_url}'. "
|
|
"Check your config file or IETF_ANALYZER_OLLAMA_URL env var."
|
|
)
|
|
db_parent = Path(cfg.db_path).parent
|
|
if not db_parent.exists():
|
|
raise ValueError(
|
|
f"db_path parent directory does not exist: '{db_parent}'. "
|
|
"Check your config file or IETF_ANALYZER_DB_PATH env var."
|
|
)
|
|
|
|
@classmethod
|
|
def load(cls) -> Config:
|
|
if CONFIG_FILE.exists():
|
|
data = json.loads(CONFIG_FILE.read_text())
|
|
cfg = cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
|
else:
|
|
cfg = cls()
|
|
|
|
# Apply environment variable overrides (env vars take precedence)
|
|
for env_var, field_name in _ENV_OVERRIDES.items():
|
|
env_val = os.environ.get(env_var)
|
|
if env_val is not None:
|
|
setattr(cfg, field_name, env_val)
|
|
|
|
cls._validate(cfg)
|
|
return cfg
|