"""Configuration management.""" from __future__ import annotations import json import os from dataclasses import dataclass, field, asdict from pathlib import Path DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data" CONFIG_FILE = DEFAULT_DATA_DIR / "config.json" DEFAULT_KEYWORDS = [ "agent", "ai-agent", "agentic", "autonomous", "mcp", "inference", "generative", "intelligent", "large language model", "multi-agent", "trustworth", ] # Environment variable overrides (env var name -> config field name) _ENV_OVERRIDES = { "IETF_ANALYZER_DB_PATH": "db_path", "IETF_ANALYZER_CLAUDE_MODEL": "claude_model", "IETF_ANALYZER_OLLAMA_URL": "ollama_url", } @dataclass class Config: data_dir: str = str(DEFAULT_DATA_DIR) db_path: str = str(DEFAULT_DATA_DIR / "drafts.db") ollama_url: str = "http://localhost:11434" ollama_embed_model: str = "nomic-embed-text" ollama_classify_model: str = "llama3.2" claude_model: str = "claude-sonnet-4-20250514" claude_model_cheap: str = "claude-haiku-4-5-20251001" search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS)) # Only fetch drafts newer than this (ISO date string) fetch_since: str = "2024-01-01" # Polite delay between API requests (seconds) fetch_delay: float = 0.5 # Pipeline generation_max_tokens: int = 4096 generation_model: str = "" # defaults to claude_model # Observatory — add "w3c" to enable W3C spec tracking: # ietf observatory update --source w3c (one-off) # or set observatory_sources to ["ietf", "w3c"] in config.json observatory_sources: list[str] = field(default_factory=lambda: ["ietf", "w3c", "etsi", "itu", "iso"]) dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs") w3c_groups: list[str] = field(default_factory=lambda: [ "webmachinelearning", "wot", "credentials", "did", "vc" ]) def save(self) -> None: Path(self.data_dir).mkdir(parents=True, exist_ok=True) CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2)) def env_sources(self) -> dict[str, str]: """Return {field_name: env_var_name} for fields overridden by env vars.""" sources: dict[str, str] = {} for env_var, field_name in _ENV_OVERRIDES.items(): if os.environ.get(env_var): sources[field_name] = env_var return sources @classmethod def _validate(cls, cfg: Config) -> None: """Validate config values, raise ValueError on problems.""" if not cfg.claude_model or not cfg.claude_model.strip(): raise ValueError( "claude_model must be a non-empty string. " "Check your config file or IETF_ANALYZER_CLAUDE_MODEL env var." ) if not cfg.ollama_url.startswith(("http://", "https://")): raise ValueError( f"ollama_url must be an HTTP(S) URL, got: '{cfg.ollama_url}'. " "Check your config file or IETF_ANALYZER_OLLAMA_URL env var." ) db_parent = Path(cfg.db_path).parent if not db_parent.exists(): raise ValueError( f"db_path parent directory does not exist: '{db_parent}'. " "Check your config file or IETF_ANALYZER_DB_PATH env var." ) @classmethod def load(cls) -> Config: if CONFIG_FILE.exists(): data = json.loads(CONFIG_FILE.read_text()) cfg = cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__}) else: cfg = cls() # Apply environment variable overrides (env vars take precedence) for env_var, field_name in _ENV_OVERRIDES.items(): env_val = os.environ.get(env_var) if env_val is not None: setattr(cfg, field_name, env_val) cls._validate(cfg) return cfg