v0.3.0: Gap-to-Draft pipeline, Living Standards Observatory, blog series
Gap-to-Draft Pipeline (ietf pipeline): - Context builder assembles ideas, RFC foundations, similar drafts, ecosystem vision - Generator produces outlines + sections using rich context with Claude - Quality gates: novelty (embedding similarity), references, format, self-rating - Family coordinator generates 5-draft ecosystem (AEM/ATD/HITL/AEPB/APAE) - I-D formatter with proper headers, references, 72-char wrapping Living Standards Observatory (ietf observatory): - Source abstraction with IETF + W3C fetchers - 7-step update pipeline: snapshot, fetch, analyze, embed, ideas, gaps, record - Static GitHub Pages dashboard (explorer, gap tracker, timeline) - Weekly CI/CD automation via GitHub Actions Also includes: - 361 drafts (expanded from 260 with 6 new keywords), 403 authors, 1,262 ideas, 12 gaps - Blog series (8 posts planned), reports, arXiv paper figures - Agent team infrastructure (CLAUDE.md, scripts, dev journal) - 5 new DB tables, schema migration, ~15 new query methods Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
228
scripts/team-blocs-report.py
Executable file
228
scripts/team-blocs-report.py
Executable file
@@ -0,0 +1,228 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate a detailed team blocs report.
|
||||
|
||||
Usage:
|
||||
PYTHONPATH=src python scripts/team-blocs-report.py
|
||||
PYTHONPATH=src python scripts/team-blocs-report.py --min-shared 3 --threshold 0.80
|
||||
"""
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from ietf_analyzer.config import Config
|
||||
from ietf_analyzer.db import Database
|
||||
from ietf_analyzer.orgs import detect_blocs, normalize_org, top_orgs_normalized
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Team blocs report")
|
||||
parser.add_argument("--min-shared", type=int, default=2, help="Min shared drafts to form a bloc edge (default: 2)")
|
||||
parser.add_argument("--threshold", type=float, default=0.70, help="Cohesion threshold 0-1 (default: 0.70)")
|
||||
parser.add_argument("--min-size", type=int, default=2, help="Min members per bloc (default: 2)")
|
||||
parser.add_argument("-o", "--output", default=None, help="Output path (default: data/reports/team-blocs.md)")
|
||||
args = parser.parse_args()
|
||||
|
||||
cfg = Config()
|
||||
db = Database(cfg)
|
||||
out = Path(args.output) if args.output else Path(cfg.data_dir) / "reports" / "team-blocs.md"
|
||||
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
draft_sets = db.author_draft_sets()
|
||||
draft_counts = db.author_draft_counts()
|
||||
total_authors = db.author_count()
|
||||
total_drafts = db.count_drafts()
|
||||
|
||||
# Build rating lookup for categories
|
||||
pairs_data = db.drafts_with_ratings(limit=500)
|
||||
rating_map = {draft.name: rating for draft, rating in pairs_data}
|
||||
|
||||
blocs = detect_blocs(
|
||||
db,
|
||||
cohesion_threshold=args.threshold,
|
||||
min_size=args.min_size,
|
||||
min_shared_drafts=args.min_shared,
|
||||
)
|
||||
|
||||
# Stats
|
||||
bloc_authors = set()
|
||||
for b in blocs:
|
||||
bloc_authors |= b.member_pids
|
||||
pct_in_blocs = len(bloc_authors) / total_authors * 100 if total_authors else 0
|
||||
|
||||
lines = [
|
||||
"# Team Bloc Analysis",
|
||||
f"*Generated {now} — {total_authors} authors, {total_drafts} drafts*",
|
||||
f"*Parameters: cohesion >= {args.threshold:.0%}, min shared drafts >= {args.min_shared}, min size >= {args.min_size}*\n",
|
||||
"## Summary\n",
|
||||
f"| Metric | Value |",
|
||||
f"|--------|------:|",
|
||||
f"| Total blocs detected | {len(blocs)} |",
|
||||
f"| Authors in blocs | {len(bloc_authors)} / {total_authors} ({pct_in_blocs:.0f}%) |",
|
||||
f"| Largest bloc | {len(blocs[0].members)} members |" if blocs else "| Largest bloc | — |",
|
||||
f"| Most shared drafts | {blocs[0].shared_drafts} |" if blocs else "| Most shared drafts | — |",
|
||||
"",
|
||||
]
|
||||
|
||||
# Bloc size distribution
|
||||
size_dist: dict[str, int] = defaultdict(int)
|
||||
for b in blocs:
|
||||
n = len(b.members)
|
||||
if n >= 10:
|
||||
size_dist["10+"] += 1
|
||||
elif n >= 5:
|
||||
size_dist["5-9"] += 1
|
||||
elif n >= 3:
|
||||
size_dist["3-4"] += 1
|
||||
else:
|
||||
size_dist["2"] += 1
|
||||
|
||||
lines.extend([
|
||||
"### Bloc Size Distribution\n",
|
||||
"| Size | Count |",
|
||||
"|------|------:|",
|
||||
])
|
||||
for label in ["10+", "5-9", "3-4", "2"]:
|
||||
if label in size_dist:
|
||||
lines.append(f"| {label} members | {size_dist[label]} |")
|
||||
lines.append("")
|
||||
|
||||
# Org breakdown
|
||||
org_blocs: dict[str, list] = defaultdict(list)
|
||||
for b in blocs:
|
||||
org_blocs[b.primary_org].append(b)
|
||||
|
||||
lines.extend([
|
||||
"### Blocs by Organization\n",
|
||||
"| Organization | Blocs | Total Members | Total Shared Drafts |",
|
||||
"|-------------|------:|--------------:|--------------------:|",
|
||||
])
|
||||
org_summary = sorted(org_blocs.items(), key=lambda x: -sum(b.shared_drafts for b in x[1]))
|
||||
for org, obs in org_summary:
|
||||
total_m = sum(len(b.members) for b in obs)
|
||||
total_s = sum(b.shared_drafts for b in obs)
|
||||
lines.append(f"| {org} | {len(obs)} | {total_m} | {total_s} |")
|
||||
lines.append("")
|
||||
|
||||
# Detailed blocs
|
||||
lines.extend([
|
||||
"---\n",
|
||||
"## Detailed Bloc Profiles\n",
|
||||
])
|
||||
|
||||
for i, bloc in enumerate(blocs):
|
||||
lines.append(f"### {i + 1}. {bloc.label}")
|
||||
lines.append("")
|
||||
lines.append(f"| | |")
|
||||
lines.append(f"|---|---|")
|
||||
lines.append(f"| **Members** | {len(bloc.members)} |")
|
||||
lines.append(f"| **Shared Drafts** | {bloc.shared_drafts} |")
|
||||
lines.append(f"| **Cohesion** | {bloc.cohesion:.0%} |")
|
||||
lines.append(f"| **Primary Org** | {bloc.primary_org} |")
|
||||
|
||||
# List orgs if multi-org
|
||||
orgs = set(org for _, _, org in bloc.members if org)
|
||||
if len(orgs) > 1:
|
||||
lines.append(f"| **All Orgs** | {', '.join(sorted(orgs))} |")
|
||||
lines.append("")
|
||||
|
||||
# Member table
|
||||
lines.append("**Members:**\n")
|
||||
lines.append("| Author | Organization | Drafts | In-Bloc Drafts |")
|
||||
lines.append("|--------|-------------|-------:|--------------:|")
|
||||
for pid, name, org in bloc.members:
|
||||
total_d = draft_counts.get(pid, 0)
|
||||
my_drafts = draft_sets.get(pid, set())
|
||||
# Count how many of this person's drafts have another bloc member
|
||||
bloc_other_pids = bloc.member_pids - {pid}
|
||||
in_bloc = sum(
|
||||
1 for d in my_drafts
|
||||
if any(d in draft_sets.get(other, set()) for other in bloc_other_pids)
|
||||
)
|
||||
lines.append(f"| {name} | {org} | {total_d} | {in_bloc} |")
|
||||
lines.append("")
|
||||
|
||||
# Shared drafts list
|
||||
all_drafts: dict[str, int] = defaultdict(int)
|
||||
for pid in bloc.member_pids:
|
||||
for d in draft_sets.get(pid, set()):
|
||||
all_drafts[d] += 1
|
||||
shared_list = sorted(
|
||||
[(d, cnt) for d, cnt in all_drafts.items() if cnt >= 2],
|
||||
key=lambda x: -x[1],
|
||||
)
|
||||
|
||||
if shared_list:
|
||||
lines.append("**Shared Drafts:**\n")
|
||||
lines.append("| Draft | Co-authors | Score | Categories |")
|
||||
lines.append("|-------|----------:|------:|------------|")
|
||||
for d, cnt in shared_list:
|
||||
r = rating_map.get(d)
|
||||
score = f"{r.composite_score:.1f}" if r else "—"
|
||||
cats = ", ".join(r.categories[:2]) if r else ""
|
||||
lines.append(
|
||||
f"| [{d}](https://datatracker.ietf.org/doc/{d}/) "
|
||||
f"| {cnt}/{len(bloc.members)} | {score} | {cats} |"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
lines.append("---\n")
|
||||
|
||||
# Cross-bloc connections
|
||||
lines.extend([
|
||||
"## Cross-Bloc Connections\n",
|
||||
"*Authors who bridge between different blocs or connect blocs to the wider community.*\n",
|
||||
])
|
||||
|
||||
# Find authors in blocs who also collaborate with people outside their bloc
|
||||
coauthor_rows = db.conn.execute(
|
||||
"""SELECT a1.name, da1.person_id, a2.name, da2.person_id, COUNT(*) as shared
|
||||
FROM draft_authors da1
|
||||
JOIN draft_authors da2 ON da1.draft_name = da2.draft_name
|
||||
AND da1.person_id < da2.person_id
|
||||
JOIN authors a1 ON da1.person_id = a1.person_id
|
||||
JOIN authors a2 ON da2.person_id = a2.person_id
|
||||
GROUP BY da1.person_id, da2.person_id
|
||||
HAVING shared >= 2
|
||||
ORDER BY shared DESC"""
|
||||
).fetchall()
|
||||
|
||||
# Map pid -> bloc index
|
||||
pid_bloc: dict[int, int] = {}
|
||||
for bi, b in enumerate(blocs):
|
||||
for pid in b.member_pids:
|
||||
pid_bloc[pid] = bi
|
||||
|
||||
bridges = []
|
||||
for r in coauthor_rows:
|
||||
_, pid_a, _, pid_b, shared = r[0], r[1], r[2], r[3], r[4]
|
||||
bloc_a = pid_bloc.get(pid_a)
|
||||
bloc_b = pid_bloc.get(pid_b)
|
||||
# One in a bloc, other not — or in different blocs
|
||||
if bloc_a is not None and bloc_b is not None and bloc_a != bloc_b:
|
||||
bridges.append((r[0], blocs[bloc_a].label, r[2], blocs[bloc_b].label, shared))
|
||||
elif bloc_a is not None and bloc_b is None:
|
||||
bridges.append((r[0], blocs[bloc_a].label, r[2], "(independent)", shared))
|
||||
elif bloc_b is not None and bloc_a is None:
|
||||
bridges.append((r[2], blocs[bloc_b].label, r[0], "(independent)", shared))
|
||||
|
||||
if bridges:
|
||||
lines.append("| Bloc Author | Bloc | External Author | Their Affiliation | Shared |")
|
||||
lines.append("|-------------|------|-----------------|-------------------|-------:|")
|
||||
for a, bloc_label, b, other_label, shared in bridges[:30]:
|
||||
lines.append(f"| {a} | {bloc_label} | {b} | {other_label} | {shared} |")
|
||||
else:
|
||||
lines.append("No cross-bloc connections found with >= 2 shared drafts.")
|
||||
|
||||
lines.append("")
|
||||
|
||||
report = "\n".join(lines)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
out.write_text(report)
|
||||
db.close()
|
||||
print(f"Report written to {out} ({len(blocs)} blocs)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user