Files
ietf-draft-analyzer/scripts/team-blocs-report.py
Christian Nennemann d6beb9c0a0 v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series
Gap-to-Draft Pipeline (ietf pipeline):
- Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision
- Generator produces outlines + sections using rich context with Claude
- Quality gates: novelty (embedding similarity), references, format, self-rating
- Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE)
- I-D formatter with proper headers, references, 72-char wrapping

Living Standards Observatory (ietf observatory):
- Source abstraction with IETF + W3C fetchers
- 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record
- Static GitHub Pages dashboard (explorer, gap tracker, timeline)
- Weekly CI/CD automation via GitHub Actions

Also includes:
- 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps
- Blog series (8 posts planned), reports, arXiv paper figures
- Agent team infrastructure (CLAUDE.md, scripts, dev journal)
- 5 new DB tables, schema migration, ~15 new query methods

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 00:48:57 +01:00

229 lines
8.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""Generate a detailed team blocs report.
Usage:
PYTHONPATH=src python scripts/team-blocs-report.py
PYTHONPATH=src python scripts/team-blocs-report.py --min-shared 3 --threshold 0.80
"""
import argparse
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
from ietf_analyzer.config import Config
from ietf_analyzer.db import Database
from ietf_analyzer.orgs import detect_blocs, normalize_org, top_orgs_normalized
def main():
parser = argparse.ArgumentParser(description="Team blocs report")
parser.add_argument("--min-shared", type=int, default=2, help="Min shared drafts to form a bloc edge (default: 2)")
parser.add_argument("--threshold", type=float, default=0.70, help="Cohesion threshold 0-1 (default: 0.70)")
parser.add_argument("--min-size", type=int, default=2, help="Min members per bloc (default: 2)")
parser.add_argument("-o", "--output", default=None, help="Output path (default: data/reports/team-blocs.md)")
args = parser.parse_args()
cfg = Config()
db = Database(cfg)
out = Path(args.output) if args.output else Path(cfg.data_dir) / "reports" / "team-blocs.md"
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
draft_sets = db.author_draft_sets()
draft_counts = db.author_draft_counts()
total_authors = db.author_count()
total_drafts = db.count_drafts()
# Build rating lookup for categories
pairs_data = db.drafts_with_ratings(limit=500)
rating_map = {draft.name: rating for draft, rating in pairs_data}
blocs = detect_blocs(
db,
cohesion_threshold=args.threshold,
min_size=args.min_size,
min_shared_drafts=args.min_shared,
)
# Stats
bloc_authors = set()
for b in blocs:
bloc_authors |= b.member_pids
pct_in_blocs = len(bloc_authors) / total_authors * 100 if total_authors else 0
lines = [
"# Team Bloc Analysis",
f"*Generated {now}{total_authors} authors, {total_drafts} drafts*",
f"*Parameters: cohesion >= {args.threshold:.0%}, min shared drafts >= {args.min_shared}, min size >= {args.min_size}*\n",
"## Summary\n",
f"| Metric | Value |",
f"|--------|------:|",
f"| Total blocs detected | {len(blocs)} |",
f"| Authors in blocs | {len(bloc_authors)} / {total_authors} ({pct_in_blocs:.0f}%) |",
f"| Largest bloc | {len(blocs[0].members)} members |" if blocs else "| Largest bloc | — |",
f"| Most shared drafts | {blocs[0].shared_drafts} |" if blocs else "| Most shared drafts | — |",
"",
]
# Bloc size distribution
size_dist: dict[str, int] = defaultdict(int)
for b in blocs:
n = len(b.members)
if n >= 10:
size_dist["10+"] += 1
elif n >= 5:
size_dist["5-9"] += 1
elif n >= 3:
size_dist["3-4"] += 1
else:
size_dist["2"] += 1
lines.extend([
"### Bloc Size Distribution\n",
"| Size | Count |",
"|------|------:|",
])
for label in ["10+", "5-9", "3-4", "2"]:
if label in size_dist:
lines.append(f"| {label} members | {size_dist[label]} |")
lines.append("")
# Org breakdown
org_blocs: dict[str, list] = defaultdict(list)
for b in blocs:
org_blocs[b.primary_org].append(b)
lines.extend([
"### Blocs by Organization\n",
"| Organization | Blocs | Total Members | Total Shared Drafts |",
"|-------------|------:|--------------:|--------------------:|",
])
org_summary = sorted(org_blocs.items(), key=lambda x: -sum(b.shared_drafts for b in x[1]))
for org, obs in org_summary:
total_m = sum(len(b.members) for b in obs)
total_s = sum(b.shared_drafts for b in obs)
lines.append(f"| {org} | {len(obs)} | {total_m} | {total_s} |")
lines.append("")
# Detailed blocs
lines.extend([
"---\n",
"## Detailed Bloc Profiles\n",
])
for i, bloc in enumerate(blocs):
lines.append(f"### {i + 1}. {bloc.label}")
lines.append("")
lines.append(f"| | |")
lines.append(f"|---|---|")
lines.append(f"| **Members** | {len(bloc.members)} |")
lines.append(f"| **Shared Drafts** | {bloc.shared_drafts} |")
lines.append(f"| **Cohesion** | {bloc.cohesion:.0%} |")
lines.append(f"| **Primary Org** | {bloc.primary_org} |")
# List orgs if multi-org
orgs = set(org for _, _, org in bloc.members if org)
if len(orgs) > 1:
lines.append(f"| **All Orgs** | {', '.join(sorted(orgs))} |")
lines.append("")
# Member table
lines.append("**Members:**\n")
lines.append("| Author | Organization | Drafts | In-Bloc Drafts |")
lines.append("|--------|-------------|-------:|--------------:|")
for pid, name, org in bloc.members:
total_d = draft_counts.get(pid, 0)
my_drafts = draft_sets.get(pid, set())
# Count how many of this person's drafts have another bloc member
bloc_other_pids = bloc.member_pids - {pid}
in_bloc = sum(
1 for d in my_drafts
if any(d in draft_sets.get(other, set()) for other in bloc_other_pids)
)
lines.append(f"| {name} | {org} | {total_d} | {in_bloc} |")
lines.append("")
# Shared drafts list
all_drafts: dict[str, int] = defaultdict(int)
for pid in bloc.member_pids:
for d in draft_sets.get(pid, set()):
all_drafts[d] += 1
shared_list = sorted(
[(d, cnt) for d, cnt in all_drafts.items() if cnt >= 2],
key=lambda x: -x[1],
)
if shared_list:
lines.append("**Shared Drafts:**\n")
lines.append("| Draft | Co-authors | Score | Categories |")
lines.append("|-------|----------:|------:|------------|")
for d, cnt in shared_list:
r = rating_map.get(d)
score = f"{r.composite_score:.1f}" if r else ""
cats = ", ".join(r.categories[:2]) if r else ""
lines.append(
f"| [{d}](https://datatracker.ietf.org/doc/{d}/) "
f"| {cnt}/{len(bloc.members)} | {score} | {cats} |"
)
lines.append("")
lines.append("---\n")
# Cross-bloc connections
lines.extend([
"## Cross-Bloc Connections\n",
"*Authors who bridge between different blocs or connect blocs to the wider community.*\n",
])
# Find authors in blocs who also collaborate with people outside their bloc
coauthor_rows = db.conn.execute(
"""SELECT a1.name, da1.person_id, a2.name, da2.person_id, COUNT(*) as shared
FROM draft_authors da1
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
AND da1.person_id < da2.person_id
JOIN authors a1 ON da1.person_id = a1.person_id
JOIN authors a2 ON da2.person_id = a2.person_id
GROUP BY da1.person_id, da2.person_id
HAVING shared >= 2
ORDER BY shared DESC"""
).fetchall()
# Map pid -> bloc index
pid_bloc: dict[int, int] = {}
for bi, b in enumerate(blocs):
for pid in b.member_pids:
pid_bloc[pid] = bi
bridges = []
for r in coauthor_rows:
_, pid_a, _, pid_b, shared = r[0], r[1], r[2], r[3], r[4]
bloc_a = pid_bloc.get(pid_a)
bloc_b = pid_bloc.get(pid_b)
# One in a bloc, other not — or in different blocs
if bloc_a is not None and bloc_b is not None and bloc_a != bloc_b:
bridges.append((r[0], blocs[bloc_a].label, r[2], blocs[bloc_b].label, shared))
elif bloc_a is not None and bloc_b is None:
bridges.append((r[0], blocs[bloc_a].label, r[2], "(independent)", shared))
elif bloc_b is not None and bloc_a is None:
bridges.append((r[2], blocs[bloc_b].label, r[0], "(independent)", shared))
if bridges:
lines.append("| Bloc Author | Bloc | External Author | Their Affiliation | Shared |")
lines.append("|-------------|------|-----------------|-------------------|-------:|")
for a, bloc_label, b, other_label, shared in bridges[:30]:
lines.append(f"| {a} | {bloc_label} | {b} | {other_label} | {shared} |")
else:
lines.append("No cross-bloc connections found with >= 2 shared drafts.")
lines.append("")
report = "\n".join(lines)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(report)
db.close()
print(f"Report written to {out} ({len(blocs)} blocs)")
if __name__ == "__main__":
main()