v0.3.0: Publication-ready release with blog site, paper update, and polish

Release prep: - Version bump to 0.3.0 (pyproject.toml, cli.py) - Rewrite README.md with current stats (475 drafts, 713 authors, 501 ideas) - Add CONTRIBUTING.md with dev setup and code conventions Blog site: - Add scripts/build-site.py (markdown → HTML with clean CSS, dark mode, nav) - Generate static site in docs/blog/ (10 pages) - Ready for GitHub Pages deployment Academic paper (paper/main.tex): - Update all counts: 474→475 drafts, 557→710 authors, 1907→462 ideas, 11→12 gaps - Add false-positive filtering methodology (113 excluded, 361 relevant) - Add cross-org convergence analysis (132 ideas, 33% rate) - Add GDPR compliance gap to gap table - Add LLM-as-judge caveats to rating methodology and limitations - Add FIPA, IEEE P3394, W3C WoT to related work with bibliography entries - Fix safety ratio to show monthly variation (1.5:1 to 21:1) Pipeline: - Fetch 1 new draft (475 total), 3 new authors (713 total) - Fix 16 ruff lint errors across test files - All 106 tests pass Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 17:54:43 +01:00
parent e247bfef8f
commit 1ec1f69bee
34 changed files with 4268 additions and 272 deletions
--- a/scripts/fetch-all-sources.py
+++ b/scripts/fetch-all-sources.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+"""Fetch from all 5 sources and import into DB."""
+
+import sys
+sys.path.insert(0, "src")
+
+from ietf_analyzer.config import Config
+from ietf_analyzer.sources import FETCHERS, get_fetcher
+from ietf_analyzer.db import Database
+from ietf_analyzer.models import Draft
+from rich.console import Console
+
+console = Console()
+cfg = Config.load()
+db = Database(cfg)
+
+# Only fetch from new sources (IETF and W3C already done recently)
+sources_to_fetch = ["etsi", "itu", "iso"]
+
+total_new = 0
+for source_name in sources_to_fetch:
+    console.print(f"\n[bold blue]{'='*60}[/]")
+    console.print(f"[bold blue]Fetching from {source_name.upper()}...[/]")
+    console.print(f"[bold blue]{'='*60}[/]")
+
+    fetcher = get_fetcher(source_name, cfg)
+    try:
+        docs = fetcher.search(cfg.search_keywords)
+        console.print(f"  Found {len(docs)} documents")
+
+        new_count = 0
+        for doc in docs:
+            existing = db.get_draft(doc.name)
+            if existing:
+                continue
+            new_count += 1
+
+            # Convert to Draft
+            draft = Draft(
+                name=doc.name,
+                rev="01",
+                title=doc.title,
+                abstract=doc.abstract,
+                source=doc.source,
+                source_id=doc.source_id,
+                source_url=doc.source_url,
+                time=doc.time,
+                doc_status=doc.doc_status,
+                full_text=doc.full_text,
+            )
+            db.upsert_draft(draft)
+
+        console.print(f"  [green]Imported {new_count} new documents[/]")
+        total_new += new_count
+    except Exception as e:
+        console.print(f"  [red]Error: {e}[/]")
+        import traceback
+        traceback.print_exc()
+    finally:
+        fetcher.close()
+
+console.print(f"\n[bold green]Total new documents: {total_new}[/]")
+
+# Final stats
+import sqlite3
+conn = sqlite3.connect(cfg.db_path)
+rows = conn.execute("SELECT source, COUNT(*) FROM drafts GROUP BY source ORDER BY source").fetchall()
+console.print("\n[bold]Database by source:[/]")
+for source, count in rows:
+    console.print(f"  {source}: {count}")
+total = conn.execute("SELECT COUNT(*) FROM drafts").fetchone()[0]
+console.print(f"  [bold]Total: {total}[/]")
+conn.close()