Release prep: - Version bump to 0.3.0 (pyproject.toml, cli.py) - Rewrite README.md with current stats (475 drafts, 713 authors, 501 ideas) - Add CONTRIBUTING.md with dev setup and code conventions Blog site: - Add scripts/build-site.py (markdown → HTML with clean CSS, dark mode, nav) - Generate static site in docs/blog/ (10 pages) - Ready for GitHub Pages deployment Academic paper (paper/main.tex): - Update all counts: 474→475 drafts, 557→710 authors, 1907→462 ideas, 11→12 gaps - Add false-positive filtering methodology (113 excluded, 361 relevant) - Add cross-org convergence analysis (132 ideas, 33% rate) - Add GDPR compliance gap to gap table - Add LLM-as-judge caveats to rating methodology and limitations - Add FIPA, IEEE P3394, W3C WoT to related work with bibliography entries - Fix safety ratio to show monthly variation (1.5:1 to 21:1) Pipeline: - Fetch 1 new draft (475 total), 3 new authors (713 total) - Fix 16 ruff lint errors across test files - All 106 tests pass Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
109 lines
3.9 KiB
Python
109 lines
3.9 KiB
Python
"""Configuration management."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass, field, asdict
|
|
from pathlib import Path
|
|
|
|
DEFAULT_DATA_DIR = Path(__file__).resolve().parent.parent.parent / "data"
|
|
CONFIG_FILE = DEFAULT_DATA_DIR / "config.json"
|
|
|
|
DEFAULT_KEYWORDS = [
|
|
"agent",
|
|
"ai-agent",
|
|
"agentic",
|
|
"autonomous",
|
|
"mcp",
|
|
"inference",
|
|
"generative",
|
|
"intelligent",
|
|
"large language model",
|
|
"multi-agent",
|
|
"trustworth",
|
|
]
|
|
|
|
# Environment variable overrides (env var name -> config field name)
|
|
_ENV_OVERRIDES = {
|
|
"IETF_ANALYZER_DB_PATH": "db_path",
|
|
"IETF_ANALYZER_CLAUDE_MODEL": "claude_model",
|
|
"IETF_ANALYZER_OLLAMA_URL": "ollama_url",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class Config:
|
|
data_dir: str = str(DEFAULT_DATA_DIR)
|
|
db_path: str = str(DEFAULT_DATA_DIR / "drafts.db")
|
|
ollama_url: str = "http://localhost:11434"
|
|
ollama_embed_model: str = "nomic-embed-text"
|
|
ollama_classify_model: str = "llama3.2"
|
|
claude_model: str = "claude-sonnet-4-20250514"
|
|
claude_model_cheap: str = "claude-haiku-4-5-20251001"
|
|
search_keywords: list[str] = field(default_factory=lambda: list(DEFAULT_KEYWORDS))
|
|
# Only fetch drafts newer than this (ISO date string)
|
|
fetch_since: str = "2024-01-01"
|
|
# Polite delay between API requests (seconds)
|
|
fetch_delay: float = 0.5
|
|
# Pipeline
|
|
generation_max_tokens: int = 4096
|
|
generation_model: str = "" # defaults to claude_model
|
|
# Observatory — add "w3c" to enable W3C spec tracking:
|
|
# ietf observatory update --source w3c (one-off)
|
|
# or set observatory_sources to ["ietf", "w3c"] in config.json
|
|
observatory_sources: list[str] = field(default_factory=lambda: ["ietf", "w3c", "etsi", "itu", "iso"])
|
|
dashboard_dir: str = str(DEFAULT_DATA_DIR.parent / "docs")
|
|
w3c_groups: list[str] = field(default_factory=lambda: [
|
|
"webmachinelearning", "wot", "credentials", "did", "vc"
|
|
])
|
|
|
|
def save(self) -> None:
|
|
Path(self.data_dir).mkdir(parents=True, exist_ok=True)
|
|
CONFIG_FILE.write_text(json.dumps(asdict(self), indent=2))
|
|
|
|
def env_sources(self) -> dict[str, str]:
|
|
"""Return {field_name: env_var_name} for fields overridden by env vars."""
|
|
sources: dict[str, str] = {}
|
|
for env_var, field_name in _ENV_OVERRIDES.items():
|
|
if os.environ.get(env_var):
|
|
sources[field_name] = env_var
|
|
return sources
|
|
|
|
@classmethod
|
|
def _validate(cls, cfg: Config) -> None:
|
|
"""Validate config values, raise ValueError on problems."""
|
|
if not cfg.claude_model or not cfg.claude_model.strip():
|
|
raise ValueError(
|
|
"claude_model must be a non-empty string. "
|
|
"Check your config file or IETF_ANALYZER_CLAUDE_MODEL env var."
|
|
)
|
|
if not cfg.ollama_url.startswith(("http://", "https://")):
|
|
raise ValueError(
|
|
f"ollama_url must be an HTTP(S) URL, got: '{cfg.ollama_url}'. "
|
|
"Check your config file or IETF_ANALYZER_OLLAMA_URL env var."
|
|
)
|
|
db_parent = Path(cfg.db_path).parent
|
|
if not db_parent.exists():
|
|
raise ValueError(
|
|
f"db_path parent directory does not exist: '{db_parent}'. "
|
|
"Check your config file or IETF_ANALYZER_DB_PATH env var."
|
|
)
|
|
|
|
@classmethod
|
|
def load(cls) -> Config:
|
|
if CONFIG_FILE.exists():
|
|
data = json.loads(CONFIG_FILE.read_text())
|
|
cfg = cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
|
else:
|
|
cfg = cls()
|
|
|
|
# Apply environment variable overrides (env vars take precedence)
|
|
for env_var, field_name in _ENV_OVERRIDES.items():
|
|
env_val = os.environ.get(env_var)
|
|
if env_val is not None:
|
|
setattr(cfg, field_name, env_val)
|
|
|
|
cls._validate(cfg)
|
|
return cfg
|