Run pipeline, write Post 08, commit untracked files
Pipeline: - Extract ideas for 38 new drafts → 462 ideas total - Convergence analysis: 132 cross-org convergent ideas (33% rate) - Fetch authors for 102 drafts → 709 authors (up from 403) - Refresh gap analysis: 12 gaps across full 474-draft corpus - Update verified counts with new totals Post 08: - Complete rewrite of "Agents Building the Agent Analysis" (2,953 words) - Covers 3 phases: writing team → review cycle → fix cycle - Meta-irony table mapping team coordination to IETF gap names - Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch) Untracked files committed: - scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui - src/ietf_analyzer/classifier.py: two-stage Ollama classifier - src/webui/: analytics (GDPR-compliant), auth, obsidian_export - tests/test_obsidian_export.py (10 tests) - data/reports/: wg-analysis, generated draft for gap #37 Housekeeping: - .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
244
src/webui/analytics.py
Normal file
244
src/webui/analytics.py
Normal file
@@ -0,0 +1,244 @@
|
||||
"""Lightweight, GDPR-compliant analytics using SQLite.
|
||||
|
||||
No cookies, no personal data, no consent needed.
|
||||
Visitor uniqueness is estimated via daily-salted IP hash (not stored raw).
|
||||
Data lives in a separate analytics.db to keep the main DB clean.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import sqlite3
|
||||
import time
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from flask import Flask, request, g
|
||||
|
||||
_SCHEMA = """
|
||||
CREATE TABLE IF NOT EXISTS page_views (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
|
||||
date TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
|
||||
path TEXT NOT NULL,
|
||||
referrer TEXT,
|
||||
visitor TEXT,
|
||||
ua_type TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_pv_date ON page_views(date);
|
||||
CREATE INDEX IF NOT EXISTS idx_pv_path ON page_views(path);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS downloads (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
ts TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%S', 'now')),
|
||||
date TEXT NOT NULL DEFAULT (strftime('%Y-%m-%d', 'now')),
|
||||
file_type TEXT NOT NULL,
|
||||
visitor TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_dl_date ON downloads(date);
|
||||
"""
|
||||
|
||||
# Daily salt rotates so yesterday's hashes can't be correlated with today's
|
||||
_daily_salt: tuple[str, str] = ("", "")
|
||||
|
||||
|
||||
def _get_salt() -> str:
|
||||
global _daily_salt
|
||||
today = date.today().isoformat()
|
||||
if _daily_salt[0] != today:
|
||||
_daily_salt = (today, hashlib.sha256(f"ietf-analytics-{today}".encode()).hexdigest()[:16])
|
||||
return _daily_salt[1]
|
||||
|
||||
|
||||
def _hash_visitor(ip: str) -> str:
|
||||
"""Create a daily-rotating hash from IP. Cannot be reversed or correlated across days."""
|
||||
salt = _get_salt()
|
||||
return hashlib.sha256(f"{salt}:{ip}".encode()).hexdigest()[:12]
|
||||
|
||||
|
||||
def _classify_ua(ua: str) -> str:
|
||||
"""Rough bot/browser classification."""
|
||||
ua_lower = ua.lower()
|
||||
if any(b in ua_lower for b in ("bot", "spider", "crawl", "slurp", "wget", "curl", "python-requests")):
|
||||
return "bot"
|
||||
if "mobile" in ua_lower:
|
||||
return "mobile"
|
||||
return "browser"
|
||||
|
||||
|
||||
def _get_analytics_db() -> sqlite3.Connection:
|
||||
"""Get or create the analytics DB connection for this request."""
|
||||
if "analytics_db" not in g:
|
||||
db_path = Path(request.app_root or ".").parent.parent.parent / "data" / "analytics.db"
|
||||
# Fall back to app config if available
|
||||
if hasattr(g, "_analytics_db_path"):
|
||||
db_path = g._analytics_db_path
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.executescript(_SCHEMA)
|
||||
g.analytics_db = conn
|
||||
return g.analytics_db
|
||||
|
||||
|
||||
# Paths to skip (static assets, API calls, etc.)
|
||||
_SKIP_PREFIXES = ("/static/", "/api/", "/favicon", "/robots.txt", "/admin/")
|
||||
|
||||
|
||||
def init_analytics(app: Flask, db_path: str | None = None):
|
||||
"""Register analytics hooks on the Flask app."""
|
||||
|
||||
_resolved_db_path = Path(db_path) if db_path else (
|
||||
Path(app.root_path).parent.parent / "data" / "analytics.db"
|
||||
)
|
||||
|
||||
@app.before_request
|
||||
def track_pageview():
|
||||
path = request.path
|
||||
|
||||
# Skip static/API/admin routes
|
||||
if any(path.startswith(p) for p in _SKIP_PREFIXES):
|
||||
return
|
||||
|
||||
g._analytics_db_path = _resolved_db_path
|
||||
|
||||
try:
|
||||
conn = _get_analytics_db()
|
||||
ip = request.remote_addr or "unknown"
|
||||
visitor = _hash_visitor(ip)
|
||||
ua = request.headers.get("User-Agent", "")
|
||||
ua_type = _classify_ua(ua)
|
||||
|
||||
# Skip bots from page view counts (still track downloads)
|
||||
if ua_type == "bot" and path != "/export/obsidian":
|
||||
return
|
||||
|
||||
referrer = request.headers.get("Referer", "")
|
||||
# Only keep the domain of referrer
|
||||
if referrer:
|
||||
try:
|
||||
parsed = urlparse(referrer)
|
||||
referrer = parsed.netloc or ""
|
||||
except Exception:
|
||||
referrer = ""
|
||||
|
||||
# Track downloads separately
|
||||
if path == "/export/obsidian":
|
||||
conn.execute(
|
||||
"INSERT INTO downloads (file_type, visitor) VALUES (?, ?)",
|
||||
("obsidian", visitor),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
conn.execute(
|
||||
"INSERT INTO page_views (path, referrer, visitor, ua_type) VALUES (?, ?, ?, ?)",
|
||||
(path, referrer, visitor, ua_type),
|
||||
)
|
||||
conn.commit()
|
||||
except Exception:
|
||||
pass # Analytics should never break the app
|
||||
|
||||
@app.teardown_appcontext
|
||||
def close_analytics_db(exception=None):
|
||||
conn = g.pop("analytics_db", None)
|
||||
if conn is not None:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_analytics_data(db_path: str | Path) -> dict:
|
||||
"""Query analytics data for the dashboard. Returns dicts ready for rendering."""
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.executescript(_SCHEMA)
|
||||
|
||||
today = date.today()
|
||||
week_ago = (today - timedelta(days=7)).isoformat()
|
||||
month_ago = (today - timedelta(days=30)).isoformat()
|
||||
|
||||
# --- Overall stats ---
|
||||
total_views = conn.execute("SELECT COUNT(*) FROM page_views").fetchone()[0]
|
||||
total_visitors = conn.execute("SELECT COUNT(DISTINCT visitor || date) FROM page_views").fetchone()[0]
|
||||
total_downloads = conn.execute("SELECT COUNT(*) FROM downloads").fetchone()[0]
|
||||
|
||||
today_views = conn.execute(
|
||||
"SELECT COUNT(*) FROM page_views WHERE date = ?", (today.isoformat(),)
|
||||
).fetchone()[0]
|
||||
today_visitors = conn.execute(
|
||||
"SELECT COUNT(DISTINCT visitor) FROM page_views WHERE date = ?", (today.isoformat(),)
|
||||
).fetchone()[0]
|
||||
|
||||
week_views = conn.execute(
|
||||
"SELECT COUNT(*) FROM page_views WHERE date >= ?", (week_ago,)
|
||||
).fetchone()[0]
|
||||
month_views = conn.execute(
|
||||
"SELECT COUNT(*) FROM page_views WHERE date >= ?", (month_ago,)
|
||||
).fetchone()[0]
|
||||
|
||||
# --- Daily views (last 30 days) ---
|
||||
daily_rows = conn.execute(
|
||||
"SELECT date, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
|
||||
"FROM page_views WHERE date >= ? GROUP BY date ORDER BY date",
|
||||
(month_ago,),
|
||||
).fetchall()
|
||||
daily = {
|
||||
"dates": [r["date"] for r in daily_rows],
|
||||
"views": [r["views"] for r in daily_rows],
|
||||
"visitors": [r["visitors"] for r in daily_rows],
|
||||
}
|
||||
|
||||
# --- Top pages (last 30 days) ---
|
||||
page_rows = conn.execute(
|
||||
"SELECT path, COUNT(*) as views, COUNT(DISTINCT visitor) as visitors "
|
||||
"FROM page_views WHERE date >= ? GROUP BY path ORDER BY views DESC LIMIT 20",
|
||||
(month_ago,),
|
||||
).fetchall()
|
||||
top_pages = [{"path": r["path"], "views": r["views"], "visitors": r["visitors"]} for r in page_rows]
|
||||
|
||||
# --- Top referrers (last 30 days) ---
|
||||
ref_rows = conn.execute(
|
||||
"SELECT referrer, COUNT(*) as count FROM page_views "
|
||||
"WHERE date >= ? AND referrer != '' GROUP BY referrer ORDER BY count DESC LIMIT 15",
|
||||
(month_ago,),
|
||||
).fetchall()
|
||||
top_referrers = [{"referrer": r["referrer"], "count": r["count"]} for r in ref_rows]
|
||||
|
||||
# --- Downloads over time ---
|
||||
dl_rows = conn.execute(
|
||||
"SELECT date, COUNT(*) as count FROM downloads GROUP BY date ORDER BY date"
|
||||
).fetchall()
|
||||
downloads_daily = {
|
||||
"dates": [r["date"] for r in dl_rows],
|
||||
"counts": [r["count"] for r in dl_rows],
|
||||
}
|
||||
|
||||
# --- Hourly pattern (last 7 days) ---
|
||||
hourly_rows = conn.execute(
|
||||
"SELECT CAST(strftime('%H', ts) AS INTEGER) as hour, COUNT(*) as views "
|
||||
"FROM page_views WHERE date >= ? GROUP BY hour ORDER BY hour",
|
||||
(week_ago,),
|
||||
).fetchall()
|
||||
hourly = {r["hour"]: r["views"] for r in hourly_rows}
|
||||
hourly_full = {"hours": list(range(24)), "views": [hourly.get(h, 0) for h in range(24)]}
|
||||
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
"stats": {
|
||||
"total_views": total_views,
|
||||
"total_visitors": total_visitors,
|
||||
"total_downloads": total_downloads,
|
||||
"today_views": today_views,
|
||||
"today_visitors": today_visitors,
|
||||
"week_views": week_views,
|
||||
"month_views": month_views,
|
||||
},
|
||||
"daily": daily,
|
||||
"top_pages": top_pages,
|
||||
"top_referrers": top_referrers,
|
||||
"downloads_daily": downloads_daily,
|
||||
"hourly": hourly_full,
|
||||
}
|
||||
55
src/webui/auth.py
Normal file
55
src/webui/auth.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Admin authentication with two run modes.
|
||||
|
||||
Production (default):
|
||||
python src/webui/app.py
|
||||
All admin routes return 404. No way to access private features.
|
||||
|
||||
Development:
|
||||
python src/webui/app.py --dev
|
||||
Every request is auto-authenticated as admin. No login needed.
|
||||
|
||||
The mode is set once at startup and cannot be changed at runtime.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import wraps
|
||||
|
||||
from flask import abort, g
|
||||
|
||||
# Module-level flag set by init_auth()
|
||||
_dev_mode: bool = False
|
||||
_initialized: bool = False
|
||||
|
||||
|
||||
def is_admin() -> bool:
|
||||
"""Check if the current request has admin access."""
|
||||
return _dev_mode
|
||||
|
||||
|
||||
def admin_required(f):
|
||||
"""Decorator: returns 404 for non-admin users so routes stay hidden."""
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
if not is_admin():
|
||||
abort(404)
|
||||
return f(*args, **kwargs)
|
||||
return decorated
|
||||
|
||||
|
||||
def init_auth(app, dev: bool = False):
|
||||
"""Set the auth mode and register Flask hooks (once only)."""
|
||||
global _dev_mode, _initialized
|
||||
_dev_mode = dev
|
||||
|
||||
if _initialized:
|
||||
return
|
||||
_initialized = True
|
||||
|
||||
@app.before_request
|
||||
def set_admin_flag():
|
||||
g.is_admin = is_admin()
|
||||
|
||||
@app.context_processor
|
||||
def inject_admin():
|
||||
return {"is_admin": g.get("is_admin", False)}
|
||||
508
src/webui/obsidian_export.py
Normal file
508
src/webui/obsidian_export.py
Normal file
@@ -0,0 +1,508 @@
|
||||
"""Export research data as an Obsidian-compatible vault (ZIP).
|
||||
|
||||
Generates interlinked markdown files with YAML frontmatter,
|
||||
[[wikilinks]], #tags, and Mermaid diagrams that Obsidian renders natively.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import zipfile
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import date
|
||||
|
||||
from ietf_analyzer.db import Database
|
||||
|
||||
from webui.data import _extract_month
|
||||
|
||||
|
||||
def _safe_filename(name: str) -> str:
|
||||
"""Sanitize a string for use as a filename."""
|
||||
return name.replace("/", "-").replace("\\", "-").replace(":", "-").replace('"', "")
|
||||
|
||||
|
||||
def _score_bar(val: float, max_val: float = 5.0) -> str:
|
||||
"""Render a simple text progress bar."""
|
||||
filled = round(val / max_val * 10)
|
||||
return "`" + "\u2588" * filled + "\u2591" * (10 - filled) + f"` {val}/{max_val}"
|
||||
|
||||
|
||||
def _mermaid_pie(title: str, data: dict[str, int], limit: int = 12) -> str:
|
||||
"""Generate a Mermaid pie chart."""
|
||||
items = list(data.items())[:limit]
|
||||
if not items:
|
||||
return ""
|
||||
lines = [f'```mermaid\npie title {title}']
|
||||
for label, count in items:
|
||||
safe_label = label.replace('"', "'")
|
||||
lines.append(f' "{safe_label}" : {count}')
|
||||
lines.append("```")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _mermaid_bar(title: str, data: dict[str, float], limit: int = 15) -> str:
|
||||
"""Generate a Mermaid xychart bar chart."""
|
||||
items = list(data.items())[:limit]
|
||||
if not items:
|
||||
return ""
|
||||
labels = [f'"{k[:20]}"' for k, _ in items]
|
||||
values = [str(round(v, 1)) for _, v in items]
|
||||
return f"""```mermaid
|
||||
xychart-beta
|
||||
title "{title}"
|
||||
x-axis [{", ".join(labels)}]
|
||||
y-axis "Score"
|
||||
bar [{", ".join(values)}]
|
||||
```"""
|
||||
|
||||
|
||||
def _mermaid_timeline_chart(monthly: dict[str, int]) -> str:
|
||||
"""Generate a Mermaid xychart for submissions over time."""
|
||||
if len(monthly) < 2:
|
||||
return ""
|
||||
months = sorted(monthly.keys())
|
||||
# Show every 3rd label to avoid clutter
|
||||
labels = []
|
||||
for i, m in enumerate(months):
|
||||
if i % 3 == 0:
|
||||
labels.append(f'"{m}"')
|
||||
else:
|
||||
labels.append('" "')
|
||||
values = [str(monthly[m]) for m in months]
|
||||
return f"""```mermaid
|
||||
xychart-beta
|
||||
title "Draft Submissions Over Time"
|
||||
x-axis [{", ".join(labels)}]
|
||||
y-axis "Drafts"
|
||||
bar [{", ".join(values)}]
|
||||
```"""
|
||||
|
||||
|
||||
def build_obsidian_vault(db: Database) -> bytes:
|
||||
"""Build a ZIP file containing an Obsidian vault with all research data."""
|
||||
buf = io.BytesIO()
|
||||
prefix = "IETF-AI-Agent-Drafts"
|
||||
|
||||
pairs = db.drafts_with_ratings(limit=2000)
|
||||
all_drafts_list = db.list_drafts(limit=2000, order_by="time DESC")
|
||||
draft_map = {d.name: d for d in all_drafts_list}
|
||||
all_ideas = db.all_ideas()
|
||||
all_authors = db.top_authors(limit=500)
|
||||
|
||||
# Build lookup maps
|
||||
cat_counts: Counter = Counter()
|
||||
cat_drafts: dict[str, list[str]] = defaultdict(list)
|
||||
score_map: dict[str, float] = {}
|
||||
rating_map: dict[str, object] = {}
|
||||
|
||||
for d, r in pairs:
|
||||
score_map[d.name] = r.composite_score
|
||||
rating_map[d.name] = r
|
||||
for cat in r.categories:
|
||||
cat_counts[cat] += 1
|
||||
cat_drafts[cat].append(d.name)
|
||||
|
||||
# Monthly submission counts
|
||||
monthly: Counter = Counter()
|
||||
for d in all_drafts_list:
|
||||
monthly[_extract_month(d.time)] += 1
|
||||
|
||||
# Ideas by draft
|
||||
ideas_by_draft: dict[str, list[dict]] = defaultdict(list)
|
||||
for idea in all_ideas:
|
||||
ideas_by_draft[idea.get("draft_name", "")].append(idea)
|
||||
|
||||
# Author info by draft
|
||||
author_drafts: dict[str, list[str]] = defaultdict(list)
|
||||
author_info: dict[str, dict] = {}
|
||||
for name, aff, cnt, drafts in all_authors:
|
||||
author_info[name] = {"affiliation": aff or "", "draft_count": cnt, "drafts": drafts}
|
||||
for dn in drafts:
|
||||
author_drafts[dn].append(name)
|
||||
|
||||
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
|
||||
|
||||
# --- Dashboard.md ---
|
||||
top_rated = sorted(pairs, key=lambda p: p[1].composite_score, reverse=True)[:15]
|
||||
top_table = "| Draft | Score | Category |\n|---|---|---|\n"
|
||||
for d, r in top_rated:
|
||||
score = r.composite_score
|
||||
cat = r.categories[0] if r.categories else ""
|
||||
top_table += f"| [[{d.name}]] | **{score:.2f}** | {cat} |\n"
|
||||
|
||||
cat_pie = _mermaid_pie("Drafts by Category", dict(cat_counts.most_common(12)))
|
||||
timeline_chart = _mermaid_timeline_chart(dict(sorted(monthly.items())))
|
||||
|
||||
# Score distribution as mermaid
|
||||
score_buckets: Counter = Counter()
|
||||
for _, r in pairs:
|
||||
bucket = f"{r.composite_score:.0f}"
|
||||
score_buckets[bucket] += 1
|
||||
score_dist = dict(sorted(score_buckets.items()))
|
||||
|
||||
dashboard = f"""---
|
||||
tags: [dashboard, ietf, ai-agents]
|
||||
generated: {date.today().isoformat()}
|
||||
---
|
||||
|
||||
# IETF AI/Agent Draft Analysis
|
||||
|
||||
> Automated analysis of {len(all_drafts_list)} Internet-Drafts on AI and agent topics.
|
||||
> Generated by [IETF Draft Analyzer](https://github.com) on {date.today().isoformat()}.
|
||||
|
||||
## Key Stats
|
||||
|
||||
| Metric | Value |
|
||||
|---|---|
|
||||
| Total Drafts | **{len(all_drafts_list)}** |
|
||||
| Rated Drafts | **{len(pairs)}** |
|
||||
| Authors | **{len(all_authors)}** |
|
||||
| Ideas Extracted | **{len(all_ideas)}** |
|
||||
| Categories | **{len(cat_counts)}** |
|
||||
|
||||
## Categories
|
||||
|
||||
{cat_pie}
|
||||
|
||||
### Category Index
|
||||
|
||||
{chr(10).join(f"- [[{cat}]] ({count} drafts)" for cat, count in cat_counts.most_common())}
|
||||
|
||||
## Submissions Over Time
|
||||
|
||||
{timeline_chart}
|
||||
|
||||
## Top Rated Drafts
|
||||
|
||||
{top_table}
|
||||
|
||||
## Navigation
|
||||
|
||||
- **[[Categories/index|Categories]]** — Browse by topic
|
||||
- **[[Authors/index|Authors]]** — Browse by author
|
||||
- **[[Analysis/Score Distribution|Score Distribution]]** — Rating analytics
|
||||
- **[[Analysis/Top Rated|Top Rated]]** — Highest-scored drafts
|
||||
- **[[Analysis/Ideas Overview|Ideas]]** — Extracted technical ideas
|
||||
- **[[Analysis/Glossary|Glossary]]** — Terms, abbreviations, and scoring methodology
|
||||
"""
|
||||
zf.writestr(f"{prefix}/Dashboard.md", dashboard)
|
||||
|
||||
# --- Individual Draft Notes ---
|
||||
for d_obj in all_drafts_list:
|
||||
name = d_obj.name
|
||||
draft = draft_map.get(name, d_obj)
|
||||
r = rating_map.get(name)
|
||||
ideas = ideas_by_draft.get(name, [])
|
||||
authors = author_drafts.get(name, [])
|
||||
month = _extract_month(draft.time)
|
||||
|
||||
# Frontmatter
|
||||
fm_lines = [
|
||||
"---",
|
||||
f'title: "{(draft.title or name).replace(chr(34), chr(39))}"',
|
||||
f"date: {draft.time or 'unknown'}",
|
||||
f"rev: {draft.rev or '00'}",
|
||||
]
|
||||
if r:
|
||||
fm_lines.append(f"score: {r.composite_score:.2f}")
|
||||
fm_lines.append(f"novelty: {r.novelty}")
|
||||
fm_lines.append(f"maturity: {r.maturity}")
|
||||
fm_lines.append(f"overlap: {r.overlap}")
|
||||
fm_lines.append(f"momentum: {r.momentum}")
|
||||
fm_lines.append(f"relevance: {r.relevance}")
|
||||
if r.categories:
|
||||
fm_lines.append(f"categories: [{', '.join(r.categories)}]")
|
||||
if authors:
|
||||
fm_lines.append(f"authors: [{', '.join(a.replace(',', '') for a in authors)}]")
|
||||
fm_lines.append(f"tags: [draft, ietf, {month}]")
|
||||
fm_lines.append("---")
|
||||
frontmatter = "\n".join(fm_lines)
|
||||
|
||||
# Body
|
||||
body = f"\n# {draft.title or name}\n\n"
|
||||
body += f"**{name}** | rev {draft.rev or '00'} | {draft.time or 'unknown'}\n\n"
|
||||
|
||||
if authors:
|
||||
body += "## Authors\n\n"
|
||||
body += "\n".join(f"- [[{a}]]" for a in authors) + "\n\n"
|
||||
|
||||
if r:
|
||||
body += "## Rating\n\n"
|
||||
body += f"**Composite Score: {r.composite_score:.2f}**\n\n"
|
||||
body += f"| Dimension | Score |\n|---|---|\n"
|
||||
body += f"| Novelty | {_score_bar(r.novelty)} |\n"
|
||||
body += f"| Maturity | {_score_bar(r.maturity)} |\n"
|
||||
body += f"| Overlap | {_score_bar(r.overlap)} |\n"
|
||||
body += f"| Momentum | {_score_bar(r.momentum)} |\n"
|
||||
body += f"| Relevance | {_score_bar(r.relevance)} |\n\n"
|
||||
if r.summary:
|
||||
body += f"> {r.summary}\n\n"
|
||||
if r.categories:
|
||||
body += "**Categories:** " + ", ".join(f"[[{c}]]" for c in r.categories) + "\n\n"
|
||||
|
||||
if draft.abstract:
|
||||
body += "## Abstract\n\n"
|
||||
body += draft.abstract + "\n\n"
|
||||
|
||||
if ideas:
|
||||
body += f"## Extracted Ideas ({len(ideas)})\n\n"
|
||||
for idea in ideas:
|
||||
novelty = f" `N:{idea.get('novelty_score', '?')}`" if idea.get("novelty_score") else ""
|
||||
itype = f" *{idea.get('type', '')}*" if idea.get("type") else ""
|
||||
body += f"- **{idea.get('title', 'Untitled')}**{itype}{novelty}\n"
|
||||
if idea.get("description"):
|
||||
body += f" {idea['description']}\n"
|
||||
body += "\n"
|
||||
|
||||
body += "## Links\n\n"
|
||||
body += f"- [View on IETF Datatracker](https://datatracker.ietf.org/doc/{name}/)\n"
|
||||
if draft.rev:
|
||||
body += f"- [Read Full Text](https://www.ietf.org/archive/id/{name}-{draft.rev}.txt)\n"
|
||||
|
||||
content = frontmatter + body
|
||||
zf.writestr(f"{prefix}/Drafts/{_safe_filename(name)}.md", content)
|
||||
|
||||
# --- Author Notes ---
|
||||
author_index_lines = [
|
||||
"---\ntags: [index, authors]\n---\n",
|
||||
"# Authors\n\n",
|
||||
f"**{len(all_authors)}** authors contributing to AI/agent Internet-Drafts.\n\n",
|
||||
"| Author | Affiliation | Drafts |\n|---|---|---|\n",
|
||||
]
|
||||
for name, aff, cnt, drafts in sorted(all_authors, key=lambda x: x[2], reverse=True):
|
||||
author_index_lines.append(f"| [[{name}]] | {aff or ''} | {cnt} |\n")
|
||||
zf.writestr(f"{prefix}/Authors/index.md", "".join(author_index_lines))
|
||||
|
||||
for name, aff, cnt, drafts in all_authors:
|
||||
fm = f"---\ntags: [author]\naffiliation: \"{aff or ''}\"\ndraft_count: {cnt}\n---\n"
|
||||
body = f"\n# {name}\n\n"
|
||||
if aff:
|
||||
body += f"**Affiliation:** {aff}\n\n"
|
||||
body += f"## Drafts ({cnt})\n\n"
|
||||
for dn in drafts:
|
||||
d = draft_map.get(dn)
|
||||
title = d.title if d else dn
|
||||
score = score_map.get(dn, "")
|
||||
score_str = f" (score: {score:.2f})" if score else ""
|
||||
body += f"- [[{dn}|{title}]]{score_str}\n"
|
||||
|
||||
# Co-authors
|
||||
coauthors: Counter = Counter()
|
||||
for dn in drafts:
|
||||
for other in author_drafts.get(dn, []):
|
||||
if other != name:
|
||||
coauthors[other] += 1
|
||||
if coauthors:
|
||||
body += f"\n## Co-authors\n\n"
|
||||
for co, shared in coauthors.most_common(20):
|
||||
body += f"- [[{co}]] ({shared} shared)\n"
|
||||
|
||||
zf.writestr(f"{prefix}/Authors/{_safe_filename(name)}.md", fm + body)
|
||||
|
||||
# --- Category Notes ---
|
||||
cat_index_lines = [
|
||||
"---\ntags: [index, categories]\n---\n",
|
||||
"# Categories\n\n",
|
||||
_mermaid_pie("Draft Distribution", dict(cat_counts.most_common(12))),
|
||||
"\n\n",
|
||||
]
|
||||
for cat, count in cat_counts.most_common():
|
||||
cat_index_lines.append(f"- [[{cat}]] — {count} drafts\n")
|
||||
zf.writestr(f"{prefix}/Categories/index.md", "".join(cat_index_lines))
|
||||
|
||||
for cat, count in cat_counts.most_common():
|
||||
fm = f"---\ntags: [category]\ndraft_count: {count}\n---\n"
|
||||
body = f"\n# {cat}\n\n"
|
||||
body += f"**{count} drafts** in this category.\n\n"
|
||||
|
||||
# Table of drafts sorted by score
|
||||
draft_names = cat_drafts[cat]
|
||||
scored = [(dn, score_map.get(dn, 0)) for dn in draft_names]
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
body += "| Draft | Score |\n|---|---|\n"
|
||||
for dn, score in scored:
|
||||
d = draft_map.get(dn)
|
||||
title = d.title[:60] if d else dn
|
||||
body += f"| [[{dn}|{title}]] | {score:.2f} |\n"
|
||||
|
||||
zf.writestr(f"{prefix}/Categories/{_safe_filename(cat)}.md", fm + body)
|
||||
|
||||
# --- Analysis Notes ---
|
||||
|
||||
# Score Distribution
|
||||
score_lines = [
|
||||
"---\ntags: [analysis]\n---\n",
|
||||
"\n# Score Distribution\n\n",
|
||||
"Composite scores across all rated drafts (1.0–5.0 scale).\n\n",
|
||||
]
|
||||
# Mermaid bar chart of score buckets
|
||||
buckets: dict[str, int] = defaultdict(int)
|
||||
for _, r in pairs:
|
||||
b = f"{r.composite_score:.1f}"
|
||||
buckets[b] += 1
|
||||
sorted_buckets = dict(sorted(buckets.items()))
|
||||
if sorted_buckets:
|
||||
labels = [f'"{k}"' for k in sorted_buckets.keys()]
|
||||
values = [str(v) for v in sorted_buckets.values()]
|
||||
score_lines.append(f"""```mermaid
|
||||
xychart-beta
|
||||
title "Score Distribution"
|
||||
x-axis [{", ".join(labels)}]
|
||||
y-axis "Count"
|
||||
bar [{", ".join(values)}]
|
||||
```\n\n""")
|
||||
|
||||
# Dimension averages
|
||||
dims = {"Novelty": [], "Maturity": [], "Overlap": [], "Momentum": [], "Relevance": []}
|
||||
for _, r in pairs:
|
||||
dims["Novelty"].append(r.novelty)
|
||||
dims["Maturity"].append(r.maturity)
|
||||
dims["Overlap"].append(r.overlap)
|
||||
dims["Momentum"].append(r.momentum)
|
||||
dims["Relevance"].append(r.relevance)
|
||||
score_lines.append("## Dimension Averages\n\n")
|
||||
score_lines.append("| Dimension | Average | Min | Max |\n|---|---|---|---|\n")
|
||||
for dim, vals in dims.items():
|
||||
if vals:
|
||||
avg = sum(vals) / len(vals)
|
||||
score_lines.append(f"| {dim} | {avg:.2f} | {min(vals)} | {max(vals)} |\n")
|
||||
|
||||
zf.writestr(f"{prefix}/Analysis/Score Distribution.md", "".join(score_lines))
|
||||
|
||||
# Top Rated
|
||||
top_lines = [
|
||||
"---\ntags: [analysis]\n---\n",
|
||||
"\n# Top Rated Drafts\n\n",
|
||||
"Drafts ranked by composite score.\n\n",
|
||||
"| # | Draft | Score | Novelty | Maturity | Overlap | Momentum | Relevance | Category |\n",
|
||||
"|---|---|---|---|---|---|---|---|---|\n",
|
||||
]
|
||||
for i, (d, r) in enumerate(top_rated[:30], 1):
|
||||
cat = r.categories[0] if r.categories else ""
|
||||
top_lines.append(
|
||||
f"| {i} | [[{d.name}|{(d.title or d.name)[:45]}]] | **{r.composite_score:.2f}** | "
|
||||
f"{r.novelty} | {r.maturity} | {r.overlap} | {r.momentum} | {r.relevance} | {cat} |\n"
|
||||
)
|
||||
zf.writestr(f"{prefix}/Analysis/Top Rated.md", "".join(top_lines))
|
||||
|
||||
# Ideas Overview
|
||||
type_counts = Counter(i.get("type", "other") or "other" for i in all_ideas)
|
||||
ideas_lines = [
|
||||
"---\ntags: [analysis, ideas]\n---\n",
|
||||
f"\n# Extracted Ideas\n\n",
|
||||
f"**{len(all_ideas)}** technical ideas extracted from rated drafts.\n\n",
|
||||
_mermaid_pie("Ideas by Type", dict(type_counts.most_common(10))),
|
||||
"\n\n## By Type\n\n",
|
||||
]
|
||||
for itype, count in type_counts.most_common():
|
||||
ideas_lines.append(f"- **{itype}**: {count} ideas\n")
|
||||
|
||||
ideas_lines.append(f"\n## Recent Ideas\n\n")
|
||||
for idea in all_ideas[:50]:
|
||||
dn = idea.get("draft_name", "")
|
||||
novelty = f" `N:{idea.get('novelty_score')}`" if idea.get("novelty_score") else ""
|
||||
ideas_lines.append(f"- **{idea.get('title', 'Untitled')}**{novelty} — [[{dn}]]\n")
|
||||
if len(all_ideas) > 50:
|
||||
ideas_lines.append(f"\n*...and {len(all_ideas) - 50} more. See individual draft notes.*\n")
|
||||
|
||||
zf.writestr(f"{prefix}/Analysis/Ideas Overview.md", "".join(ideas_lines))
|
||||
|
||||
# Timeline
|
||||
timeline_lines = [
|
||||
"---\ntags: [analysis, timeline]\n---\n",
|
||||
"\n# Timeline\n\n",
|
||||
"Draft submission activity over time.\n\n",
|
||||
_mermaid_timeline_chart(dict(sorted(monthly.items()))),
|
||||
"\n\n## Monthly Counts\n\n",
|
||||
"| Month | Drafts |\n|---|---|\n",
|
||||
]
|
||||
for m in sorted(monthly.keys()):
|
||||
timeline_lines.append(f"| {m} | {monthly[m]} |\n")
|
||||
zf.writestr(f"{prefix}/Analysis/Timeline.md", "".join(timeline_lines))
|
||||
|
||||
# --- Glossary ---
|
||||
glossary = """---
|
||||
tags: [reference, glossary]
|
||||
---
|
||||
|
||||
# Glossary
|
||||
|
||||
Reference for all terms, abbreviations, and scoring dimensions used in this vault.
|
||||
|
||||
## Scoring Dimensions
|
||||
|
||||
Each draft is rated by Claude AI on five dimensions, scored from 1 (lowest) to 5 (highest).
|
||||
|
||||
| Dimension | Description |
|
||||
|---|---|
|
||||
| **Novelty** | How original is this draft? Does it introduce new ideas, or rehash existing approaches? High = genuinely new contribution. |
|
||||
| **Maturity** | How complete and well-developed is the specification? High = detailed protocol, clear data formats, ready for implementation. Low = early sketch or position paper. |
|
||||
| **Overlap** | How much does this draft duplicate existing work? High overlap (5) = very similar to other drafts. Low overlap (1) = unique in the landscape. *Note: In composite score, this is inverted (5 - overlap) so lower overlap contributes positively.* |
|
||||
| **Momentum** | Is this draft gaining traction? High = active revisions, working group adoption, multiple authors/organizations. Low = single submission, no updates. |
|
||||
| **Relevance** | How relevant is this draft to AI agent infrastructure? High = directly addresses agent-to-agent communication, identity, authorization. Low = tangentially related. |
|
||||
|
||||
## Composite Score
|
||||
|
||||
The **composite score** (1.0–5.0) is calculated as:
|
||||
|
||||
```
|
||||
score = (novelty + maturity + (5 - overlap) + momentum + relevance) / 5
|
||||
```
|
||||
|
||||
Overlap is inverted because a *lower* overlap is better (more unique).
|
||||
|
||||
## Score Bars
|
||||
|
||||
Score bars visualize ratings: `\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2591\u2591\u2591` = 3.5/5.0
|
||||
|
||||
- `\u2588` (filled) = earned score
|
||||
- `\u2591` (empty) = remaining
|
||||
|
||||
## Other Terms
|
||||
|
||||
| Term | Meaning |
|
||||
|---|---|
|
||||
| **Draft / I-D** | Internet-Draft — a working document submitted to the IETF. Not yet an RFC (standard). |
|
||||
| **RFC** | Request for Comments — a published IETF standard or informational document. |
|
||||
| **Working Group (WG)** | An IETF group chartered to work on a specific topic (e.g., WIMSE, OAuth). |
|
||||
| **Category** | Topic classification assigned by Claude during analysis (e.g., "A2A protocols", "AI safety/alignment"). A draft can belong to multiple categories. |
|
||||
| **Idea** | A distinct technical concept extracted from a draft by Claude. Each idea has a type (protocol, mechanism, framework, etc.) and a novelty score. |
|
||||
| **Novelty Score (N:1–5)** | Per-idea originality rating. Shown as `N:4` next to ideas. 5 = completely new concept, 1 = well-known approach. |
|
||||
| **Gap** | An area identified where no existing draft adequately addresses a need in the AI agent ecosystem. |
|
||||
| **Affiliation** | The organization an author is associated with (from IETF Datatracker records). |
|
||||
| **Co-authorship** | Two authors who appear together on at least one draft. |
|
||||
| **Datatracker** | The IETF's official system for tracking Internet-Drafts, RFCs, and working groups (datatracker.ietf.org). |
|
||||
"""
|
||||
zf.writestr(f"{prefix}/Analysis/Glossary.md", glossary)
|
||||
|
||||
# --- .obsidian settings for graph colors ---
|
||||
graph_json = """{
|
||||
"collapse-filter": false,
|
||||
"search": "",
|
||||
"showTags": true,
|
||||
"showAttachments": false,
|
||||
"hideUnresolved": false,
|
||||
"showOrphans": true,
|
||||
"collapse-color-groups": false,
|
||||
"colorGroups": [
|
||||
{"query": "path:Drafts", "color": {"a": 1, "rgb": 3444735}},
|
||||
{"query": "path:Authors", "color": {"a": 1, "rgb": 10092441}},
|
||||
{"query": "path:Categories", "color": {"a": 1, "rgb": 16744448}},
|
||||
{"query": "path:Analysis", "color": {"a": 1, "rgb": 2293541}}
|
||||
],
|
||||
"collapse-display": false,
|
||||
"showArrow": true,
|
||||
"textFadeMultiplier": 0,
|
||||
"nodeSizeMultiplier": 1.2,
|
||||
"lineSizeMultiplier": 1,
|
||||
"collapse-forces": true,
|
||||
"centerStrength": 0.5,
|
||||
"repelStrength": 10,
|
||||
"linkStrength": 1,
|
||||
"linkDistance": 100
|
||||
}"""
|
||||
zf.writestr(f"{prefix}/.obsidian/graph.json", graph_json)
|
||||
|
||||
buf.seek(0)
|
||||
return buf.getvalue()
|
||||
Reference in New Issue
Block a user