v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series

Gap-to-Draft Pipeline (ietf pipeline): - Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision - Generator produces outlines + sections using rich context with Claude - Quality gates: novelty (embedding similarity), references, format, self-rating - Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE) - I-D formatter with proper headers, references, 72-char wrapping Living Standards Observatory (ietf observatory): - Source abstraction with IETF + W3C fetchers - 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record - Static GitHub Pages dashboard (explorer, gap tracker, timeline) - Weekly CI/CD automation via GitHub Actions Also includes: - 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps - Blog series (8 posts planned), reports, arXiv paper figures - Agent team infrastructure (CLAUDE.md, scripts, dev journal) - 5 new DB tables, schema migration, ~15 new query methods Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 00:48:57 +01:00
parent be9cf9c5d9
commit d6beb9c0a0
87 changed files with 24471 additions and 401 deletions
--- a/scripts/team-blocs-report.py
+++ b/scripts/team-blocs-report.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""Generate a detailed team blocs report.
+
+Usage:
+    PYTHONPATH=src python scripts/team-blocs-report.py
+    PYTHONPATH=src python scripts/team-blocs-report.py --min-shared 3 --threshold 0.80
+"""
+
+import argparse
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+from ietf_analyzer.config import Config
+from ietf_analyzer.db import Database
+from ietf_analyzer.orgs import detect_blocs, normalize_org, top_orgs_normalized
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Team blocs report")
+    parser.add_argument("--min-shared", type=int, default=2, help="Min shared drafts to form a bloc edge (default: 2)")
+    parser.add_argument("--threshold", type=float, default=0.70, help="Cohesion threshold 0-1 (default: 0.70)")
+    parser.add_argument("--min-size", type=int, default=2, help="Min members per bloc (default: 2)")
+    parser.add_argument("-o", "--output", default=None, help="Output path (default: data/reports/team-blocs.md)")
+    args = parser.parse_args()
+
+    cfg = Config()
+    db = Database(cfg)
+    out = Path(args.output) if args.output else Path(cfg.data_dir) / "reports" / "team-blocs.md"
+
+    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    draft_sets = db.author_draft_sets()
+    draft_counts = db.author_draft_counts()
+    total_authors = db.author_count()
+    total_drafts = db.count_drafts()
+
+    # Build rating lookup for categories
+    pairs_data = db.drafts_with_ratings(limit=500)
+    rating_map = {draft.name: rating for draft, rating in pairs_data}
+
+    blocs = detect_blocs(
+        db,
+        cohesion_threshold=args.threshold,
+        min_size=args.min_size,
+        min_shared_drafts=args.min_shared,
+    )
+
+    # Stats
+    bloc_authors = set()
+    for b in blocs:
+        bloc_authors |= b.member_pids
+    pct_in_blocs = len(bloc_authors) / total_authors * 100 if total_authors else 0
+
+    lines = [
+        "# Team Bloc Analysis",
+        f"*Generated {now} — {total_authors} authors, {total_drafts} drafts*",
+        f"*Parameters: cohesion >= {args.threshold:.0%}, min shared drafts >= {args.min_shared}, min size >= {args.min_size}*\n",
+        "## Summary\n",
+        f"| Metric | Value |",
+        f"|--------|------:|",
+        f"| Total blocs detected | {len(blocs)} |",
+        f"| Authors in blocs | {len(bloc_authors)} / {total_authors} ({pct_in_blocs:.0f}%) |",
+        f"| Largest bloc | {len(blocs[0].members)} members |" if blocs else "| Largest bloc | — |",
+        f"| Most shared drafts | {blocs[0].shared_drafts} |" if blocs else "| Most shared drafts | — |",
+        "",
+    ]
+
+    # Bloc size distribution
+    size_dist: dict[str, int] = defaultdict(int)
+    for b in blocs:
+        n = len(b.members)
+        if n >= 10:
+            size_dist["10+"] += 1
+        elif n >= 5:
+            size_dist["5-9"] += 1
+        elif n >= 3:
+            size_dist["3-4"] += 1
+        else:
+            size_dist["2"] += 1
+
+    lines.extend([
+        "### Bloc Size Distribution\n",
+        "| Size | Count |",
+        "|------|------:|",
+    ])
+    for label in ["10+", "5-9", "3-4", "2"]:
+        if label in size_dist:
+            lines.append(f"| {label} members | {size_dist[label]} |")
+    lines.append("")
+
+    # Org breakdown
+    org_blocs: dict[str, list] = defaultdict(list)
+    for b in blocs:
+        org_blocs[b.primary_org].append(b)
+
+    lines.extend([
+        "### Blocs by Organization\n",
+        "| Organization | Blocs | Total Members | Total Shared Drafts |",
+        "|-------------|------:|--------------:|--------------------:|",
+    ])
+    org_summary = sorted(org_blocs.items(), key=lambda x: -sum(b.shared_drafts for b in x[1]))
+    for org, obs in org_summary:
+        total_m = sum(len(b.members) for b in obs)
+        total_s = sum(b.shared_drafts for b in obs)
+        lines.append(f"| {org} | {len(obs)} | {total_m} | {total_s} |")
+    lines.append("")
+
+    # Detailed blocs
+    lines.extend([
+        "---\n",
+        "## Detailed Bloc Profiles\n",
+    ])
+
+    for i, bloc in enumerate(blocs):
+        lines.append(f"### {i + 1}. {bloc.label}")
+        lines.append("")
+        lines.append(f"| | |")
+        lines.append(f"|---|---|")
+        lines.append(f"| **Members** | {len(bloc.members)} |")
+        lines.append(f"| **Shared Drafts** | {bloc.shared_drafts} |")
+        lines.append(f"| **Cohesion** | {bloc.cohesion:.0%} |")
+        lines.append(f"| **Primary Org** | {bloc.primary_org} |")
+
+        # List orgs if multi-org
+        orgs = set(org for _, _, org in bloc.members if org)
+        if len(orgs) > 1:
+            lines.append(f"| **All Orgs** | {', '.join(sorted(orgs))} |")
+        lines.append("")
+
+        # Member table
+        lines.append("**Members:**\n")
+        lines.append("| Author | Organization | Drafts | In-Bloc Drafts |")
+        lines.append("|--------|-------------|-------:|--------------:|")
+        for pid, name, org in bloc.members:
+            total_d = draft_counts.get(pid, 0)
+            my_drafts = draft_sets.get(pid, set())
+            # Count how many of this person's drafts have another bloc member
+            bloc_other_pids = bloc.member_pids - {pid}
+            in_bloc = sum(
+                1 for d in my_drafts
+                if any(d in draft_sets.get(other, set()) for other in bloc_other_pids)
+            )
+            lines.append(f"| {name} | {org} | {total_d} | {in_bloc} |")
+        lines.append("")
+
+        # Shared drafts list
+        all_drafts: dict[str, int] = defaultdict(int)
+        for pid in bloc.member_pids:
+            for d in draft_sets.get(pid, set()):
+                all_drafts[d] += 1
+        shared_list = sorted(
+            [(d, cnt) for d, cnt in all_drafts.items() if cnt >= 2],
+            key=lambda x: -x[1],
+        )
+
+        if shared_list:
+            lines.append("**Shared Drafts:**\n")
+            lines.append("| Draft | Co-authors | Score | Categories |")
+            lines.append("|-------|----------:|------:|------------|")
+            for d, cnt in shared_list:
+                r = rating_map.get(d)
+                score = f"{r.composite_score:.1f}" if r else "—"
+                cats = ", ".join(r.categories[:2]) if r else ""
+                lines.append(
+                    f"| [{d}](https://datatracker.ietf.org/doc/{d}/) "
+                    f"| {cnt}/{len(bloc.members)} | {score} | {cats} |"
+                )
+            lines.append("")
+
+        lines.append("---\n")
+
+    # Cross-bloc connections
+    lines.extend([
+        "## Cross-Bloc Connections\n",
+        "*Authors who bridge between different blocs or connect blocs to the wider community.*\n",
+    ])
+
+    # Find authors in blocs who also collaborate with people outside their bloc
+    coauthor_rows = db.conn.execute(
+        """SELECT a1.name, da1.person_id, a2.name, da2.person_id, COUNT(*) as shared
+        FROM draft_authors da1
+        JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
+            AND da1.person_id < da2.person_id
+        JOIN authors a1 ON da1.person_id = a1.person_id
+        JOIN authors a2 ON da2.person_id = a2.person_id
+        GROUP BY da1.person_id, da2.person_id
+        HAVING shared >= 2
+        ORDER BY shared DESC"""
+    ).fetchall()
+
+    # Map pid -> bloc index
+    pid_bloc: dict[int, int] = {}
+    for bi, b in enumerate(blocs):
+        for pid in b.member_pids:
+            pid_bloc[pid] = bi
+
+    bridges = []
+    for r in coauthor_rows:
+        _, pid_a, _, pid_b, shared = r[0], r[1], r[2], r[3], r[4]
+        bloc_a = pid_bloc.get(pid_a)
+        bloc_b = pid_bloc.get(pid_b)
+        # One in a bloc, other not — or in different blocs
+        if bloc_a is not None and bloc_b is not None and bloc_a != bloc_b:
+            bridges.append((r[0], blocs[bloc_a].label, r[2], blocs[bloc_b].label, shared))
+        elif bloc_a is not None and bloc_b is None:
+            bridges.append((r[0], blocs[bloc_a].label, r[2], "(independent)", shared))
+        elif bloc_b is not None and bloc_a is None:
+            bridges.append((r[2], blocs[bloc_b].label, r[0], "(independent)", shared))
+
+    if bridges:
+        lines.append("| Bloc Author | Bloc | External Author | Their Affiliation | Shared |")
+        lines.append("|-------------|------|-----------------|-------------------|-------:|")
+        for a, bloc_label, b, other_label, shared in bridges[:30]:
+            lines.append(f"| {a} | {bloc_label} | {b} | {other_label} | {shared} |")
+    else:
+        lines.append("No cross-bloc connections found with >= 2 shared drafts.")
+
+    lines.append("")
+
+    report = "\n".join(lines)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(report)
+    db.close()
+    print(f"Report written to {out} ({len(blocs)} blocs)")
+
+
+if __name__ == "__main__":
+    main()