Run pipeline, write Post 08, commit untracked files

Pipeline:
- Extract ideas for 38 new drafts → 462 ideas total
- Convergence analysis: 132 cross-org convergent ideas (33% rate)
- Fetch authors for 102 drafts → 709 authors (up from 403)
- Refresh gap analysis: 12 gaps across full 474-draft corpus
- Update verified counts with new totals

Post 08:
- Complete rewrite of "Agents Building the Agent Analysis" (2,953 words)
- Covers 3 phases: writing team → review cycle → fix cycle
- Meta-irony table mapping team coordination to IETF gap names
- Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch)

Untracked files committed:
- scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui
- src/ietf_analyzer/classifier.py: two-stage Ollama classifier
- src/webui/: analytics (GDPR-compliant), auth, obsidian_export
- tests/test_obsidian_export.py (10 tests)
- data/reports/: wg-analysis, generated draft for gap #37

Housekeeping:
- .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-08 15:31:30 +01:00
parent 20c45a7eba
commit e247bfef8f
19 changed files with 2758 additions and 586 deletions

View File

@@ -0,0 +1,182 @@
"""Local AI-relevance classifier using Ollama.
Two-stage filter to avoid spending Claude tokens on irrelevant drafts:
1. Embedding similarity — fast cosine check against a reference description
2. Chat classification — small local model for borderline cases
Both stages run locally via Ollama (zero cost).
"""
from __future__ import annotations
import numpy as np
import ollama as ollama_lib
from rich.console import Console
from .config import Config
console = Console()
# Reference description of what we're looking for.
# Embedding of this text is compared against each draft's abstract.
REFERENCE_DESCRIPTION = """
AI agent protocols, autonomous agent communication, agent-to-agent interaction,
agent identity and authentication, agent authorization, agent discovery,
large language model integration with network protocols, agentic systems,
machine learning for network operations, AI safety in networked systems,
model context protocol, multi-agent coordination, agent task delegation,
generative AI infrastructure, intelligent network automation,
trustworthy AI systems, AI governance in standards.
"""
# Thresholds for the two-stage filter (calibrated against 434 drafts + 73 FPs)
# TP avg similarity: 0.685, FP avg: 0.598
SIMILARITY_ACCEPT = 0.72 # Above this: definitely relevant, skip chat
SIMILARITY_REJECT = 0.50 # Below this: definitely irrelevant, skip chat
# Between REJECT and ACCEPT: borderline, use chat model to decide
CLASSIFY_PROMPT = """\
You are classifying IETF Internet-Drafts for an AI/agent standards tracker.
A draft is RELEVANT if it relates to ANY of these topics:
- AI agents, autonomous agents, multi-agent systems
- Agent identity, authentication, authorization, discovery
- Agent-to-agent (A2A) communication protocols
- Large language models (LLMs), generative AI
- Machine learning in network operations
- AI safety, alignment, trustworthiness
- Model Context Protocol (MCP), agentic workflows
- OAuth/JWT/credentials for agents or AI systems
- Autonomous network operations using AI
- Intelligent network management or traffic handling
A draft is NOT relevant if it only covers:
- Pure cryptography without AI/agent context
- General networking protocols (BGP, DNS, TLS) without AI
- Email, HTTP, or web standards without AI/agent features
Title: {title}
Abstract: {abstract}
Is this draft relevant to AI agents or related topics? Answer ONLY "yes" or "no"."""
class Classifier:
"""Classify drafts as AI-relevant using local Ollama models."""
def __init__(self, config: Config | None = None):
self.config = config or Config.load()
self.client = ollama_lib.Client(host=self.config.ollama_url)
self._ref_embedding: np.ndarray | None = None
def close(self) -> None:
if hasattr(self.client, '_client'):
self.client._client.close()
def __enter__(self):
return self
def __exit__(self, *exc):
self.close()
def _get_reference_embedding(self) -> np.ndarray:
"""Get (cached) embedding of the reference AI description."""
if self._ref_embedding is None:
resp = self.client.embed(
model=self.config.ollama_embed_model,
input=REFERENCE_DESCRIPTION.strip(),
)
self._ref_embedding = np.array(resp["embeddings"][0], dtype=np.float32)
return self._ref_embedding
def _embed(self, text: str) -> np.ndarray:
"""Embed a text string."""
resp = self.client.embed(
model=self.config.ollama_embed_model,
input=text[:8000],
)
return np.array(resp["embeddings"][0], dtype=np.float32)
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
dot = np.dot(a, b)
norm = np.linalg.norm(a) * np.linalg.norm(b)
return float(dot / norm) if norm > 0 else 0.0
def _chat_classify(self, title: str, abstract: str) -> bool:
"""Ask local chat model whether a draft is AI-related."""
prompt = CLASSIFY_PROMPT.format(title=title, abstract=abstract[:2000])
try:
resp = self.client.chat(
model=self.config.ollama_classify_model,
messages=[{"role": "user", "content": prompt}],
options={"temperature": 0.0, "num_predict": 10},
)
answer = resp["message"]["content"].strip().lower()
return answer.startswith("yes")
except Exception as e:
console.print(f"[yellow]Chat classify failed: {e}, defaulting to relevant[/yellow]")
return True # err on the side of inclusion
def classify(self, title: str, abstract: str) -> tuple[bool, float, str]:
"""Classify a draft as AI-relevant.
Returns:
(is_relevant, similarity_score, method)
method is one of: "embedding_accept", "embedding_reject", "chat_yes", "chat_no"
"""
text = f"{title}\n{abstract}"
ref = self._get_reference_embedding()
emb = self._embed(text)
sim = self._cosine_similarity(emb, ref)
if sim >= SIMILARITY_ACCEPT:
return True, sim, "embedding_accept"
if sim <= SIMILARITY_REJECT:
return False, sim, "embedding_reject"
# Borderline — ask chat model
is_relevant = self._chat_classify(title, abstract)
method = "chat_yes" if is_relevant else "chat_no"
return is_relevant, sim, method
def classify_batch(
self, drafts: list[dict], verbose: bool = True
) -> tuple[list[dict], list[dict]]:
"""Classify a batch of drafts.
Args:
drafts: list of dicts with at least 'name', 'title', 'abstract' keys
Returns:
(relevant, irrelevant) — two lists of draft dicts
"""
relevant = []
irrelevant = []
stats = {"embedding_accept": 0, "embedding_reject": 0, "chat_yes": 0, "chat_no": 0}
for i, d in enumerate(drafts):
is_rel, sim, method = self.classify(
d.get("title", ""), d.get("abstract", "")
)
stats[method] += 1
if verbose and (i + 1) % 10 == 0:
console.print(f" Classified {i + 1}/{len(drafts)}...")
if is_rel:
relevant.append(d)
else:
irrelevant.append(d)
if verbose:
console.print(
f"\n [green]Relevant: {len(relevant)}[/green] "
f"[red]Irrelevant: {len(irrelevant)}[/red]\n"
f" Embedding accept: {stats['embedding_accept']} "
f" Embedding reject: {stats['embedding_reject']}\n"
f" Chat yes: {stats['chat_yes']} "
f" Chat no: {stats['chat_no']}"
)
return relevant, irrelevant

View File

@@ -297,8 +297,9 @@ class Database:
def upsert_draft(self, draft: Draft) -> None:
self.conn.execute(
"""INSERT INTO drafts (name, rev, title, abstract, time, dt_id, pages, words,
"group", group_uri, expires, ad, shepherd, states, full_text, categories, tags, fetched_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"group", group_uri, expires, ad, shepherd, states, full_text, categories, tags, fetched_at,
source, source_id, source_url, doc_status)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(name) DO UPDATE SET
rev=excluded.rev, title=excluded.title, abstract=excluded.abstract,
time=excluded.time, dt_id=excluded.dt_id, pages=excluded.pages,
@@ -307,7 +308,9 @@ class Database:
states=excluded.states,
full_text=COALESCE(excluded.full_text, full_text),
categories=excluded.categories, tags=excluded.tags,
fetched_at=excluded.fetched_at
fetched_at=excluded.fetched_at,
source=excluded.source, source_id=excluded.source_id,
source_url=excluded.source_url, doc_status=excluded.doc_status
""",
(
draft.name, draft.rev, draft.title, draft.abstract, draft.time,
@@ -316,6 +319,7 @@ class Database:
json.dumps(draft.states), draft.full_text,
json.dumps(draft.categories), json.dumps(draft.tags),
draft.fetched_at or datetime.now(timezone.utc).isoformat(),
draft.source, draft.source_id, draft.source_url, draft.doc_status,
),
)
self.conn.commit()

244
src/webui/analytics.py Normal file
View File

@@ -0,0 +1,244 @@
"""Lightweight, GDPR-compliant analytics using SQLite.
No cookies, no personal data, no consent needed.
Visitor uniqueness is estimated via daily-salted IP hash (not stored raw).
Data lives in a separate analytics.db to keep the main DB clean.
"""
from __future__ import annotations
import hashlib
import sqlite3
import time
from collections import Counter, defaultdict
from datetime import date, datetime, timedelta
from pathlib import Path
from urllib.parse import urlparse
from flask import Flask, request, g
_SCHEMA = """
CREATE TABLE IF NOT EXISTS page_views (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
date TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
path TEXT NOT NULL,
referrer TEXT,
visitor TEXT,
ua_type TEXT
);
CREATE INDEX IF NOT EXISTS idx_pv_date ON page_views(date);
CREATE INDEX IF NOT EXISTS idx_pv_path ON page_views(path);
CREATE TABLE IF NOT EXISTS downloads (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
date TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
file_type TEXT NOT NULL,
visitor TEXT
);
CREATE INDEX IF NOT EXISTS idx_dl_date ON downloads(date);
"""
# Daily salt rotates so yesterday's hashes can't be correlated with today's
_daily_salt: tuple[str, str] = ("", "")
def _get_salt() -> str:
global _daily_salt
today = date.today().isoformat()
if _daily_salt[0] != today:
_daily_salt = (today, hashlib.sha256(f"ietf-analytics-{today}".encode()).hexdigest()[:16])
return _daily_salt[1]
def _hash_visitor(ip: str) -> str:
"""Create a daily-rotating hash from IP. Cannot be reversed or correlated across days."""
salt = _get_salt()
return hashlib.sha256(f"{salt}:{ip}".encode()).hexdigest()[:12]
def _classify_ua(ua: str) -> str:
"""Rough bot/browser classification."""
ua_lower = ua.lower()
if any(b in ua_lower for b in ("bot", "spider", "crawl", "slurp", "wget", "curl", "python-requests")):
return "bot"
if "mobile" in ua_lower:
return "mobile"
return "browser"
def _get_analytics_db() -> sqlite3.Connection:
"""Get or create the analytics DB connection for this request."""
if "analytics_db" not in g:
db_path = Path(request.app_root or ".").parent.parent.parent / "data" / "analytics.db"
# Fall back to app config if available
if hasattr(g, "_analytics_db_path"):
db_path = g._analytics_db_path
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
conn.executescript(_SCHEMA)
g.analytics_db = conn
return g.analytics_db
# Paths to skip (static assets, API calls, etc.)
_SKIP_PREFIXES = ("/static/", "/api/", "/favicon", "/robots.txt", "/admin/")
def init_analytics(app: Flask, db_path: str | None = None):
"""Register analytics hooks on the Flask app."""
_resolved_db_path = Path(db_path) if db_path else (
Path(app.root_path).parent.parent / "data" / "analytics.db"
)
@app.before_request
def track_pageview():
path = request.path
# Skip static/API/admin routes
if any(path.startswith(p) for p in _SKIP_PREFIXES):
return
g._analytics_db_path = _resolved_db_path
try:
conn = _get_analytics_db()
ip = request.remote_addr or "unknown"
visitor = _hash_visitor(ip)
ua = request.headers.get("User-Agent", "")
ua_type = _classify_ua(ua)
# Skip bots from page view counts (still track downloads)
if ua_type == "bot" and path != "/export/obsidian":
return
referrer = request.headers.get("Referer", "")
# Only keep the domain of referrer
if referrer:
try:
parsed = urlparse(referrer)
referrer = parsed.netloc or ""
except Exception:
referrer = ""
# Track downloads separately
if path == "/export/obsidian":
conn.execute(
"INSERT INTO downloads (file_type, visitor) VALUES (?, ?)",
("obsidian", visitor),
)
conn.commit()
conn.execute(
"INSERT INTO page_views (path, referrer, visitor, ua_type) VALUES (?, ?, ?, ?)",
(path, referrer, visitor, ua_type),
)
conn.commit()
except Exception:
pass # Analytics should never break the app
@app.teardown_appcontext
def close_analytics_db(exception=None):
conn = g.pop("analytics_db", None)
if conn is not None:
conn.close()
def get_analytics_data(db_path: str | Path) -> dict:
"""Query analytics data for the dashboard. Returns dicts ready for rendering."""
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
conn.executescript(_SCHEMA)
today = date.today()
week_ago = (today - timedelta(days=7)).isoformat()
month_ago = (today - timedelta(days=30)).isoformat()
# --- Overall stats ---
total_views = conn.execute("SELECT COUNT(*) FROM page_views").fetchone()[0]
total_visitors = conn.execute("SELECT COUNT(DISTINCT visitor || date) FROM page_views").fetchone()[0]
total_downloads = conn.execute("SELECT COUNT(*) FROM downloads").fetchone()[0]
today_views = conn.execute(
"SELECT COUNT(*) FROM page_views WHERE date = ?", (today.isoformat(),)
).fetchone()[0]
today_visitors = conn.execute(
"SELECT COUNT(DISTINCT visitor) FROM page_views WHERE date = ?", (today.isoformat(),)
).fetchone()[0]
week_views = conn.execute(
"SELECT COUNT(*) FROM page_views WHERE date >= ?", (week_ago,)
).fetchone()[0]
month_views = conn.execute(
"SELECT COUNT(*) FROM page_views WHERE date >= ?", (month_ago,)
).fetchone()[0]
# --- Daily views (last 30 days) ---
daily_rows = conn.execute(
"SELECT date, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
"FROM page_views WHERE date >= ? GROUP BY date ORDER BY date",
(month_ago,),
).fetchall()
daily = {
"dates": [r["date"] for r in daily_rows],
"views": [r["views"] for r in daily_rows],
"visitors": [r["visitors"] for r in daily_rows],
}
# --- Top pages (last 30 days) ---
page_rows = conn.execute(
"SELECT path, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
"FROM page_views WHERE date >= ? GROUP BY path ORDER BY views DESC LIMIT 20",
(month_ago,),
).fetchall()
top_pages = [{"path": r["path"], "views": r["views"], "visitors": r["visitors"]} for r in page_rows]
# --- Top referrers (last 30 days) ---
ref_rows = conn.execute(
"SELECT referrer, COUNT(*) as count FROM page_views "
"WHERE date >= ? AND referrer != '' GROUP BY referrer ORDER BY count DESC LIMIT 15",
(month_ago,),
).fetchall()
top_referrers = [{"referrer": r["referrer"], "count": r["count"]} for r in ref_rows]
# --- Downloads over time ---
dl_rows = conn.execute(
"SELECT date, COUNT(*) as count FROM downloads GROUP BY date ORDER BY date"
).fetchall()
downloads_daily = {
"dates": [r["date"] for r in dl_rows],
"counts": [r["count"] for r in dl_rows],
}
# --- Hourly pattern (last 7 days) ---
hourly_rows = conn.execute(
"SELECT CAST(strftime('%H', ts) AS INTEGER) as hour, COUNT(*) as views "
"FROM page_views WHERE date >= ? GROUP BY hour ORDER BY hour",
(week_ago,),
).fetchall()
hourly = {r["hour"]: r["views"] for r in hourly_rows}
hourly_full = {"hours": list(range(24)), "views": [hourly.get(h, 0) for h in range(24)]}
conn.close()
return {
"stats": {
"total_views": total_views,
"total_visitors": total_visitors,
"total_downloads": total_downloads,
"today_views": today_views,
"today_visitors": today_visitors,
"week_views": week_views,
"month_views": month_views,
},
"daily": daily,
"top_pages": top_pages,
"top_referrers": top_referrers,
"downloads_daily": downloads_daily,
"hourly": hourly_full,
}

55
src/webui/auth.py Normal file
View File

@@ -0,0 +1,55 @@
"""Admin authentication with two run modes.
Production (default):
python src/webui/app.py
All admin routes return 404. No way to access private features.
Development:
python src/webui/app.py --dev
Every request is auto-authenticated as admin. No login needed.
The mode is set once at startup and cannot be changed at runtime.
"""
from __future__ import annotations
from functools import wraps
from flask import abort, g
# Module-level flag set by init_auth()
_dev_mode: bool = False
_initialized: bool = False
def is_admin() -> bool:
"""Check if the current request has admin access."""
return _dev_mode
def admin_required(f):
"""Decorator: returns 404 for non-admin users so routes stay hidden."""
@wraps(f)
def decorated(*args, **kwargs):
if not is_admin():
abort(404)
return f(*args, **kwargs)
return decorated
def init_auth(app, dev: bool = False):
"""Set the auth mode and register Flask hooks (once only)."""
global _dev_mode, _initialized
_dev_mode = dev
if _initialized:
return
_initialized = True
@app.before_request
def set_admin_flag():
g.is_admin = is_admin()
@app.context_processor
def inject_admin():
return {"is_admin": g.get("is_admin", False)}

View File

@@ -0,0 +1,508 @@
"""Export research data as an Obsidian-compatible vault (ZIP).
Generates interlinked markdown files with YAML frontmatter,
[[wikilinks]], #tags, and Mermaid diagrams that Obsidian renders natively.
"""
from __future__ import annotations
import io
import zipfile
from collections import Counter, defaultdict
from datetime import date
from ietf_analyzer.db import Database
from webui.data import _extract_month
def _safe_filename(name: str) -> str:
"""Sanitize a string for use as a filename."""
return name.replace("/", "-").replace("\\", "-").replace(":", "-").replace('"', "")
def _score_bar(val: float, max_val: float = 5.0) -> str:
"""Render a simple text progress bar."""
filled = round(val / max_val * 10)
return "`" + "\u2588" * filled + "\u2591" * (10 - filled) + f"` {val}/{max_val}"
def _mermaid_pie(title: str, data: dict[str, int], limit: int = 12) -> str:
"""Generate a Mermaid pie chart."""
items = list(data.items())[:limit]
if not items:
return ""
lines = [f'```mermaid\npie title {title}']
for label, count in items:
safe_label = label.replace('"', "'")
lines.append(f' "{safe_label}" : {count}')
lines.append("```")
return "\n".join(lines)
def _mermaid_bar(title: str, data: dict[str, float], limit: int = 15) -> str:
"""Generate a Mermaid xychart bar chart."""
items = list(data.items())[:limit]
if not items:
return ""
labels = [f'"{k[:20]}"' for k, _ in items]
values = [str(round(v, 1)) for _, v in items]
return f"""```mermaid
xychart-beta
title "{title}"
x-axis [{", ".join(labels)}]
y-axis "Score"
bar [{", ".join(values)}]
```"""
def _mermaid_timeline_chart(monthly: dict[str, int]) -> str:
"""Generate a Mermaid xychart for submissions over time."""
if len(monthly) < 2:
return ""
months = sorted(monthly.keys())
# Show every 3rd label to avoid clutter
labels = []
for i, m in enumerate(months):
if i % 3 == 0:
labels.append(f'"{m}"')
else:
labels.append('" "')
values = [str(monthly[m]) for m in months]
return f"""```mermaid
xychart-beta
title "Draft Submissions Over Time"
x-axis [{", ".join(labels)}]
y-axis "Drafts"
bar [{", ".join(values)}]
```"""
def build_obsidian_vault(db: Database) -> bytes:
"""Build a ZIP file containing an Obsidian vault with all research data."""
buf = io.BytesIO()
prefix = "IETF-AI-Agent-Drafts"
pairs = db.drafts_with_ratings(limit=2000)
all_drafts_list = db.list_drafts(limit=2000, order_by="time DESC")
draft_map = {d.name: d for d in all_drafts_list}
all_ideas = db.all_ideas()
all_authors = db.top_authors(limit=500)
# Build lookup maps
cat_counts: Counter = Counter()
cat_drafts: dict[str, list[str]] = defaultdict(list)
score_map: dict[str, float] = {}
rating_map: dict[str, object] = {}
for d, r in pairs:
score_map[d.name] = r.composite_score
rating_map[d.name] = r
for cat in r.categories:
cat_counts[cat] += 1
cat_drafts[cat].append(d.name)
# Monthly submission counts
monthly: Counter = Counter()
for d in all_drafts_list:
monthly[_extract_month(d.time)] += 1
# Ideas by draft
ideas_by_draft: dict[str, list[dict]] = defaultdict(list)
for idea in all_ideas:
ideas_by_draft[idea.get("draft_name", "")].append(idea)
# Author info by draft
author_drafts: dict[str, list[str]] = defaultdict(list)
author_info: dict[str, dict] = {}
for name, aff, cnt, drafts in all_authors:
author_info[name] = {"affiliation": aff or "", "draft_count": cnt, "drafts": drafts}
for dn in drafts:
author_drafts[dn].append(name)
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
# --- Dashboard.md ---
top_rated = sorted(pairs, key=lambda p: p[1].composite_score, reverse=True)[:15]
top_table = "| Draft | Score | Category |\n|---|---|---|\n"
for d, r in top_rated:
score = r.composite_score
cat = r.categories[0] if r.categories else ""
top_table += f"| [[{d.name}]] | **{score:.2f}** | {cat} |\n"
cat_pie = _mermaid_pie("Drafts by Category", dict(cat_counts.most_common(12)))
timeline_chart = _mermaid_timeline_chart(dict(sorted(monthly.items())))
# Score distribution as mermaid
score_buckets: Counter = Counter()
for _, r in pairs:
bucket = f"{r.composite_score:.0f}"
score_buckets[bucket] += 1
score_dist = dict(sorted(score_buckets.items()))
dashboard = f"""---
tags: [dashboard, ietf, ai-agents]
generated: {date.today().isoformat()}
---
# IETF AI/Agent Draft Analysis
> Automated analysis of {len(all_drafts_list)} Internet-Drafts on AI and agent topics.
> Generated by [IETF Draft Analyzer](https://github.com) on {date.today().isoformat()}.
## Key Stats
| Metric | Value |
|---|---|
| Total Drafts | **{len(all_drafts_list)}** |
| Rated Drafts | **{len(pairs)}** |
| Authors | **{len(all_authors)}** |
| Ideas Extracted | **{len(all_ideas)}** |
| Categories | **{len(cat_counts)}** |
## Categories
{cat_pie}
### Category Index
{chr(10).join(f"- [[{cat}]] ({count} drafts)" for cat, count in cat_counts.most_common())}
## Submissions Over Time
{timeline_chart}
## Top Rated Drafts
{top_table}
## Navigation
- **[[Categories/index|Categories]]** — Browse by topic
- **[[Authors/index|Authors]]** — Browse by author
- **[[Analysis/Score Distribution|Score Distribution]]** — Rating analytics
- **[[Analysis/Top Rated|Top Rated]]** — Highest-scored drafts
- **[[Analysis/Ideas Overview|Ideas]]** — Extracted technical ideas
- **[[Analysis/Glossary|Glossary]]** — Terms, abbreviations, and scoring methodology
"""
zf.writestr(f"{prefix}/Dashboard.md", dashboard)
# --- Individual Draft Notes ---
for d_obj in all_drafts_list:
name = d_obj.name
draft = draft_map.get(name, d_obj)
r = rating_map.get(name)
ideas = ideas_by_draft.get(name, [])
authors = author_drafts.get(name, [])
month = _extract_month(draft.time)
# Frontmatter
fm_lines = [
"---",
f'title: "{(draft.title or name).replace(chr(34), chr(39))}"',
f"date: {draft.time or 'unknown'}",
f"rev: {draft.rev or '00'}",
]
if r:
fm_lines.append(f"score: {r.composite_score:.2f}")
fm_lines.append(f"novelty: {r.novelty}")
fm_lines.append(f"maturity: {r.maturity}")
fm_lines.append(f"overlap: {r.overlap}")
fm_lines.append(f"momentum: {r.momentum}")
fm_lines.append(f"relevance: {r.relevance}")
if r.categories:
fm_lines.append(f"categories: [{', '.join(r.categories)}]")
if authors:
fm_lines.append(f"authors: [{', '.join(a.replace(',', '') for a in authors)}]")
fm_lines.append(f"tags: [draft, ietf, {month}]")
fm_lines.append("---")
frontmatter = "\n".join(fm_lines)
# Body
body = f"\n# {draft.title or name}\n\n"
body += f"**{name}** | rev {draft.rev or '00'} | {draft.time or 'unknown'}\n\n"
if authors:
body += "## Authors\n\n"
body += "\n".join(f"- [[{a}]]" for a in authors) + "\n\n"
if r:
body += "## Rating\n\n"
body += f"**Composite Score: {r.composite_score:.2f}**\n\n"
body += f"| Dimension | Score |\n|---|---|\n"
body += f"| Novelty | {_score_bar(r.novelty)} |\n"
body += f"| Maturity | {_score_bar(r.maturity)} |\n"
body += f"| Overlap | {_score_bar(r.overlap)} |\n"
body += f"| Momentum | {_score_bar(r.momentum)} |\n"
body += f"| Relevance | {_score_bar(r.relevance)} |\n\n"
if r.summary:
body += f"> {r.summary}\n\n"
if r.categories:
body += "**Categories:** " + ", ".join(f"[[{c}]]" for c in r.categories) + "\n\n"
if draft.abstract:
body += "## Abstract\n\n"
body += draft.abstract + "\n\n"
if ideas:
body += f"## Extracted Ideas ({len(ideas)})\n\n"
for idea in ideas:
novelty = f" `N:{idea.get('novelty_score', '?')}`" if idea.get("novelty_score") else ""
itype = f" *{idea.get('type', '')}*" if idea.get("type") else ""
body += f"- **{idea.get('title', 'Untitled')}**{itype}{novelty}\n"
if idea.get("description"):
body += f" {idea['description']}\n"
body += "\n"
body += "## Links\n\n"
body += f"- [View on IETF Datatracker](https://datatracker.ietf.org/doc/{name}/)\n"
if draft.rev:
body += f"- [Read Full Text](https://www.ietf.org/archive/id/{name}-{draft.rev}.txt)\n"
content = frontmatter + body
zf.writestr(f"{prefix}/Drafts/{_safe_filename(name)}.md", content)
# --- Author Notes ---
author_index_lines = [
"---\ntags: [index, authors]\n---\n",
"# Authors\n\n",
f"**{len(all_authors)}** authors contributing to AI/agent Internet-Drafts.\n\n",
"| Author | Affiliation | Drafts |\n|---|---|---|\n",
]
for name, aff, cnt, drafts in sorted(all_authors, key=lambda x: x[2], reverse=True):
author_index_lines.append(f"| [[{name}]] | {aff or ''} | {cnt} |\n")
zf.writestr(f"{prefix}/Authors/index.md", "".join(author_index_lines))
for name, aff, cnt, drafts in all_authors:
fm = f"---\ntags: [author]\naffiliation: \"{aff or ''}\"\ndraft_count: {cnt}\n---\n"
body = f"\n# {name}\n\n"
if aff:
body += f"**Affiliation:** {aff}\n\n"
body += f"## Drafts ({cnt})\n\n"
for dn in drafts:
d = draft_map.get(dn)
title = d.title if d else dn
score = score_map.get(dn, "")
score_str = f" (score: {score:.2f})" if score else ""
body += f"- [[{dn}|{title}]]{score_str}\n"
# Co-authors
coauthors: Counter = Counter()
for dn in drafts:
for other in author_drafts.get(dn, []):
if other != name:
coauthors[other] += 1
if coauthors:
body += f"\n## Co-authors\n\n"
for co, shared in coauthors.most_common(20):
body += f"- [[{co}]] ({shared} shared)\n"
zf.writestr(f"{prefix}/Authors/{_safe_filename(name)}.md", fm + body)
# --- Category Notes ---
cat_index_lines = [
"---\ntags: [index, categories]\n---\n",
"# Categories\n\n",
_mermaid_pie("Draft Distribution", dict(cat_counts.most_common(12))),
"\n\n",
]
for cat, count in cat_counts.most_common():
cat_index_lines.append(f"- [[{cat}]] — {count} drafts\n")
zf.writestr(f"{prefix}/Categories/index.md", "".join(cat_index_lines))
for cat, count in cat_counts.most_common():
fm = f"---\ntags: [category]\ndraft_count: {count}\n---\n"
body = f"\n# {cat}\n\n"
body += f"**{count} drafts** in this category.\n\n"
# Table of drafts sorted by score
draft_names = cat_drafts[cat]
scored = [(dn, score_map.get(dn, 0)) for dn in draft_names]
scored.sort(key=lambda x: x[1], reverse=True)
body += "| Draft | Score |\n|---|---|\n"
for dn, score in scored:
d = draft_map.get(dn)
title = d.title[:60] if d else dn
body += f"| [[{dn}|{title}]] | {score:.2f} |\n"
zf.writestr(f"{prefix}/Categories/{_safe_filename(cat)}.md", fm + body)
# --- Analysis Notes ---
# Score Distribution
score_lines = [
"---\ntags: [analysis]\n---\n",
"\n# Score Distribution\n\n",
"Composite scores across all rated drafts (1.05.0 scale).\n\n",
]
# Mermaid bar chart of score buckets
buckets: dict[str, int] = defaultdict(int)
for _, r in pairs:
b = f"{r.composite_score:.1f}"
buckets[b] += 1
sorted_buckets = dict(sorted(buckets.items()))
if sorted_buckets:
labels = [f'"{k}"' for k in sorted_buckets.keys()]
values = [str(v) for v in sorted_buckets.values()]
score_lines.append(f"""```mermaid
xychart-beta
title "Score Distribution"
x-axis [{", ".join(labels)}]
y-axis "Count"
bar [{", ".join(values)}]
```\n\n""")
# Dimension averages
dims = {"Novelty": [], "Maturity": [], "Overlap": [], "Momentum": [], "Relevance": []}
for _, r in pairs:
dims["Novelty"].append(r.novelty)
dims["Maturity"].append(r.maturity)
dims["Overlap"].append(r.overlap)
dims["Momentum"].append(r.momentum)
dims["Relevance"].append(r.relevance)
score_lines.append("## Dimension Averages\n\n")
score_lines.append("| Dimension | Average | Min | Max |\n|---|---|---|---|\n")
for dim, vals in dims.items():
if vals:
avg = sum(vals) / len(vals)
score_lines.append(f"| {dim} | {avg:.2f} | {min(vals)} | {max(vals)} |\n")
zf.writestr(f"{prefix}/Analysis/Score Distribution.md", "".join(score_lines))
# Top Rated
top_lines = [
"---\ntags: [analysis]\n---\n",
"\n# Top Rated Drafts\n\n",
"Drafts ranked by composite score.\n\n",
"| # | Draft | Score | Novelty | Maturity | Overlap | Momentum | Relevance | Category |\n",
"|---|---|---|---|---|---|---|---|---|\n",
]
for i, (d, r) in enumerate(top_rated[:30], 1):
cat = r.categories[0] if r.categories else ""
top_lines.append(
f"| {i} | [[{d.name}|{(d.title or d.name)[:45]}]] | **{r.composite_score:.2f}** | "
f"{r.novelty} | {r.maturity} | {r.overlap} | {r.momentum} | {r.relevance} | {cat} |\n"
)
zf.writestr(f"{prefix}/Analysis/Top Rated.md", "".join(top_lines))
# Ideas Overview
type_counts = Counter(i.get("type", "other") or "other" for i in all_ideas)
ideas_lines = [
"---\ntags: [analysis, ideas]\n---\n",
f"\n# Extracted Ideas\n\n",
f"**{len(all_ideas)}** technical ideas extracted from rated drafts.\n\n",
_mermaid_pie("Ideas by Type", dict(type_counts.most_common(10))),
"\n\n## By Type\n\n",
]
for itype, count in type_counts.most_common():
ideas_lines.append(f"- **{itype}**: {count} ideas\n")
ideas_lines.append(f"\n## Recent Ideas\n\n")
for idea in all_ideas[:50]:
dn = idea.get("draft_name", "")
novelty = f" `N:{idea.get('novelty_score')}`" if idea.get("novelty_score") else ""
ideas_lines.append(f"- **{idea.get('title', 'Untitled')}**{novelty} — [[{dn}]]\n")
if len(all_ideas) > 50:
ideas_lines.append(f"\n*...and {len(all_ideas) - 50} more. See individual draft notes.*\n")
zf.writestr(f"{prefix}/Analysis/Ideas Overview.md", "".join(ideas_lines))
# Timeline
timeline_lines = [
"---\ntags: [analysis, timeline]\n---\n",
"\n# Timeline\n\n",
"Draft submission activity over time.\n\n",
_mermaid_timeline_chart(dict(sorted(monthly.items()))),
"\n\n## Monthly Counts\n\n",
"| Month | Drafts |\n|---|---|\n",
]
for m in sorted(monthly.keys()):
timeline_lines.append(f"| {m} | {monthly[m]} |\n")
zf.writestr(f"{prefix}/Analysis/Timeline.md", "".join(timeline_lines))
# --- Glossary ---
glossary = """---
tags: [reference, glossary]
---
# Glossary
Reference for all terms, abbreviations, and scoring dimensions used in this vault.
## Scoring Dimensions
Each draft is rated by Claude AI on five dimensions, scored from 1 (lowest) to 5 (highest).
| Dimension | Description |
|---|---|
| **Novelty** | How original is this draft? Does it introduce new ideas, or rehash existing approaches? High = genuinely new contribution. |
| **Maturity** | How complete and well-developed is the specification? High = detailed protocol, clear data formats, ready for implementation. Low = early sketch or position paper. |
| **Overlap** | How much does this draft duplicate existing work? High overlap (5) = very similar to other drafts. Low overlap (1) = unique in the landscape. *Note: In composite score, this is inverted (5 - overlap) so lower overlap contributes positively.* |
| **Momentum** | Is this draft gaining traction? High = active revisions, working group adoption, multiple authors/organizations. Low = single submission, no updates. |
| **Relevance** | How relevant is this draft to AI agent infrastructure? High = directly addresses agent-to-agent communication, identity, authorization. Low = tangentially related. |
## Composite Score
The **composite score** (1.05.0) is calculated as:
```
score = (novelty + maturity + (5 - overlap) + momentum + relevance) / 5
```
Overlap is inverted because a *lower* overlap is better (more unique).
## Score Bars
Score bars visualize ratings: `\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2591\u2591\u2591` = 3.5/5.0
- `\u2588` (filled) = earned score
- `\u2591` (empty) = remaining
## Other Terms
| Term | Meaning |
|---|---|
| **Draft / I-D** | Internet-Draft — a working document submitted to the IETF. Not yet an RFC (standard). |
| **RFC** | Request for Comments — a published IETF standard or informational document. |
| **Working Group (WG)** | An IETF group chartered to work on a specific topic (e.g., WIMSE, OAuth). |
| **Category** | Topic classification assigned by Claude during analysis (e.g., "A2A protocols", "AI safety/alignment"). A draft can belong to multiple categories. |
| **Idea** | A distinct technical concept extracted from a draft by Claude. Each idea has a type (protocol, mechanism, framework, etc.) and a novelty score. |
| **Novelty Score (N:15)** | Per-idea originality rating. Shown as `N:4` next to ideas. 5 = completely new concept, 1 = well-known approach. |
| **Gap** | An area identified where no existing draft adequately addresses a need in the AI agent ecosystem. |
| **Affiliation** | The organization an author is associated with (from IETF Datatracker records). |
| **Co-authorship** | Two authors who appear together on at least one draft. |
| **Datatracker** | The IETF's official system for tracking Internet-Drafts, RFCs, and working groups (datatracker.ietf.org). |
"""
zf.writestr(f"{prefix}/Analysis/Glossary.md", glossary)
# --- .obsidian settings for graph colors ---
graph_json = """{
"collapse-filter": false,
"search": "",
"showTags": true,
"showAttachments": false,
"hideUnresolved": false,
"showOrphans": true,
"collapse-color-groups": false,
"colorGroups": [
{"query": "path:Drafts", "color": {"a": 1, "rgb": 3444735}},
{"query": "path:Authors", "color": {"a": 1, "rgb": 10092441}},
{"query": "path:Categories", "color": {"a": 1, "rgb": 16744448}},
{"query": "path:Analysis", "color": {"a": 1, "rgb": 2293541}}
],
"collapse-display": false,
"showArrow": true,
"textFadeMultiplier": 0,
"nodeSizeMultiplier": 1.2,
"lineSizeMultiplier": 1,
"collapse-forces": true,
"centerStrength": 0.5,
"repelStrength": 10,
"linkStrength": 1,
"linkDistance": 100
}"""
zf.writestr(f"{prefix}/.obsidian/graph.json", graph_json)
buf.seek(0)
return buf.getvalue()