Run pipeline, write Post 08, commit untracked files

Pipeline:
- Extract ideas for 38 new drafts → 462 ideas total
- Convergence analysis: 132 cross-org convergent ideas (33% rate)
- Fetch authors for 102 drafts → 709 authors (up from 403)
- Refresh gap analysis: 12 gaps across full 474-draft corpus
- Update verified counts with new totals

Post 08:
- Complete rewrite of "Agents Building the Agent Analysis" (2,953 words)
- Covers 3 phases: writing team → review cycle → fix cycle
- Meta-irony table mapping team coordination to IETF gap names
- Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch)

Untracked files committed:
- scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui
- src/ietf_analyzer/classifier.py: two-stage Ollama classifier
- src/webui/: analytics (GDPR-compliant), auth, obsidian_export
- tests/test_obsidian_export.py (10 tests)
- data/reports/: wg-analysis, generated draft for gap #37

Housekeeping:
- .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-08 15:31:30 +01:00
parent 20c45a7eba
commit e247bfef8f
19 changed files with 2758 additions and 586 deletions

View File

@@ -0,0 +1,200 @@
"""Tests for the Obsidian vault export.
If this test breaks, the export is out of sync with the data model.
Fix obsidian_export.py to match whatever changed.
"""
from __future__ import annotations
import io
import sys
import zipfile
from pathlib import Path
import pytest
_project_root = Path(__file__).resolve().parent.parent
if str(_project_root / "src") not in sys.path:
sys.path.insert(0, str(_project_root / "src"))
from webui.obsidian_export import build_obsidian_vault
def test_vault_structure(seeded_db):
"""Vault ZIP should contain expected folders and key files."""
data = build_obsidian_vault(seeded_db)
assert len(data) > 0
z = zipfile.ZipFile(io.BytesIO(data))
names = z.namelist()
# Key structural files must exist
assert "IETF-AI-Agent-Drafts/Dashboard.md" in names
assert "IETF-AI-Agent-Drafts/Authors/index.md" in names
assert "IETF-AI-Agent-Drafts/Categories/index.md" in names
assert "IETF-AI-Agent-Drafts/.obsidian/graph.json" in names
# Should have analysis notes
analysis = [n for n in names if "/Analysis/" in n]
assert len(analysis) >= 3 # Score Distribution, Top Rated, Ideas Overview
def test_vault_has_all_drafts(seeded_db):
"""Every draft in the DB should have a corresponding note in the vault."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
draft_files = [n for n in z.namelist() if "/Drafts/" in n]
# seeded_db has 5 drafts
assert len(draft_files) == 5
# Check each draft name appears
draft_names = {Path(f).stem for f in draft_files}
assert "draft-alpha-agent-comm" in draft_names
assert "draft-gamma-agent-id" in draft_names
def test_draft_note_has_frontmatter(seeded_db):
"""Draft notes must have YAML frontmatter with score and categories."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Drafts/draft-alpha-agent-comm.md").decode()
# YAML frontmatter
assert content.startswith("---")
assert "score:" in content
assert "novelty:" in content
assert "maturity:" in content
assert "categories:" in content
assert "tags:" in content
# No floating-point noise (e.g., 3.4000000000000004)
import re
long_floats = re.findall(r"\d+\.\d{4,}", content)
assert len(long_floats) == 0, f"Unformatted floats found: {long_floats}"
def test_draft_note_has_wikilinks(seeded_db):
"""Draft notes should link to authors and categories with [[wikilinks]]."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Drafts/draft-alpha-agent-comm.md").decode()
# Should link to authors
assert "[[Alice Researcher]]" in content
assert "[[Bob Engineer]]" in content
# Should link to categories
assert "[[A2A protocols]]" in content
def test_draft_note_has_ideas(seeded_db):
"""Draft notes should include extracted ideas."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Drafts/draft-alpha-agent-comm.md").decode()
assert "Extracted Ideas" in content
assert "Agent Handshake" in content
assert "Capability Negotiation" in content
def test_draft_note_has_rating_bars(seeded_db):
"""Draft notes should include visual score bars."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Drafts/draft-alpha-agent-comm.md").decode()
# Score bars use block chars
assert "\u2588" in content # filled block
assert "\u2591" in content # empty block
assert "/5.0" in content
def test_author_notes(seeded_db):
"""Author notes should list their drafts with wikilinks."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Authors/Alice Researcher.md").decode()
assert content.startswith("---")
assert "affiliation:" in content
assert "ExampleCorp" in content
assert "[[draft-alpha-agent-comm" in content
assert "[[draft-gamma-agent-id" in content
def test_category_notes(seeded_db):
"""Category notes should list drafts with scores."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
cat_files = [n for n in z.namelist() if "/Categories/" in n and "index" not in n]
# seeded_db has 5 distinct categories
assert len(cat_files) >= 4
# Check one category note
content = z.read("IETF-AI-Agent-Drafts/Categories/A2A protocols.md").decode()
assert "[[draft-alpha-agent-comm" in content
assert "draft_count:" in content
def test_dashboard_has_mermaid(seeded_db):
"""Dashboard should contain Mermaid chart blocks."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Dashboard.md").decode()
assert "```mermaid" in content
assert "pie title" in content
assert "Key Stats" in content
assert "Total Drafts" in content
def test_vault_has_glossary(seeded_db):
"""Vault should contain a Glossary with scoring dimensions explained."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
assert "IETF-AI-Agent-Drafts/Analysis/Glossary.md" in z.namelist()
content = z.read("IETF-AI-Agent-Drafts/Analysis/Glossary.md").decode()
# All five dimensions must be explained
for dim in ("Novelty", "Maturity", "Overlap", "Momentum", "Relevance"):
assert dim in content, f"Glossary missing dimension: {dim}"
assert "Composite Score" in content
assert "Internet-Draft" in content
def test_top_rated_uses_full_names(seeded_db):
"""Top Rated table should use full dimension names, not abbreviations."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
content = z.read("IETF-AI-Agent-Drafts/Analysis/Top Rated.md").decode()
assert "Novelty" in content
assert "Maturity" in content
assert "| Nov |" not in content # no abbreviations
def test_vault_is_valid_zip(seeded_db):
"""The output should be a valid ZIP that can be extracted."""
data = build_obsidian_vault(seeded_db)
z = zipfile.ZipFile(io.BytesIO(data))
# Should not raise
bad = z.testzip()
assert bad is None, f"Corrupt file in ZIP: {bad}"
# All files should be decodable as UTF-8
for name in z.namelist():
if name.endswith(".md"):
z.read(name).decode("utf-8")