diff --git a/src/ietf_analyzer/cli.py b/src/ietf_analyzer/cli.py index 301bb33..0025bea 100644 --- a/src/ietf_analyzer/cli.py +++ b/src/ietf_analyzer/cli.py @@ -1,22 +1,14 @@ -"""CLI entry point — all user-facing commands.""" +"""CLI entry point — slim wrapper that registers command modules.""" from __future__ import annotations -from pathlib import Path - import click -from rich.console import Console -from rich.table import Table from .config import Config from .db import Database -console = Console() - - -def _get_config() -> Config: - cfg = Config.load() - return cfg +# Re-export shared utilities for backward compatibility +from .commands.common import console, _get_config, pass_cfg_db @click.group() @@ -32,3398 +24,6 @@ def main(ctx): ctx.call_on_close(db.close) -def pass_cfg_db(f): - """Decorator that extracts cfg and db from Click context and passes them as arguments. - - Usage: place @pass_cfg_db after all @click decorators. The decorated function - should accept (cfg, db, ...) instead of manually calling _get_config()/Database(). - """ - import functools - - @click.pass_context - @functools.wraps(f) - def wrapper(ctx, **kwargs): - return f(ctx.obj["cfg"], ctx.obj["db"], **kwargs) - return wrapper - - -# ── fetch ──────────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--keywords", "-k", multiple=True, help="Extra keywords to search for") -@click.option("--since", "-s", help="Only fetch drafts newer than this date (YYYY-MM-DD)") -@click.option("--download-text/--no-download-text", default=True, help="Download full text of drafts") -@click.option("--classify/--no-classify", default=True, help="Pre-filter with local Ollama classifier (saves Claude tokens)") -@pass_cfg_db -def fetch(cfg, db, keywords: tuple[str, ...], since: str | None, download_text: bool, classify: bool): - """Fetch AI/agent drafts from IETF Datatracker.""" - from .fetcher import Fetcher - - fetcher = Fetcher(cfg) - - kw_list = list(cfg.search_keywords) - if keywords: - kw_list.extend(keywords) - - try: - drafts = fetcher.search_drafts(keywords=kw_list, since=since) - console.print(f"Found [bold]{len(drafts)}[/] drafts from Datatracker") - - # Pre-filter with local classifier to avoid storing irrelevant drafts - if classify and drafts: - try: - from .classifier import Classifier - console.print("\n[bold]Running local AI-relevance classifier (Ollama)...[/]") - clf = Classifier(cfg) - draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts] - relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) - relevant_names = {d["name"] for d in relevant} - before = len(drafts) - drafts = [d for d in drafts if d.name in relevant_names] - console.print(f"\n Kept [green]{len(drafts)}[/green] / {before} drafts after classification") - clf.close() - except Exception as e: - console.print(f"[yellow]Classifier unavailable ({e}), storing all drafts[/yellow]") - - for draft in drafts: - db.upsert_draft(draft) - console.print(f"Stored [bold green]{len(drafts)}[/] drafts in database") - - if download_text: - missing = db.drafts_without_text() - if missing: - console.print(f"Downloading text for [bold]{len(missing)}[/] drafts...") - texts = fetcher.download_texts(missing) - for name, text in texts.items(): - draft = db.get_draft(name) - if draft: - draft.full_text = text - db.upsert_draft(draft) - finally: - fetcher.close() - - -# ── classify ───────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--unrated", is_flag=True, help="Classify only unrated drafts") -@click.option("--all", "all_drafts", is_flag=True, help="Classify all drafts (checks accuracy against existing ratings)") -@click.option("--remove", is_flag=True, help="Actually remove drafts classified as irrelevant (use with --unrated)") -@pass_cfg_db -def classify(cfg, db, unrated: bool, all_drafts: bool, remove: bool): - """Pre-classify drafts as AI-relevant using local Ollama model. - - Runs a two-stage filter (embedding similarity + chat model) to identify - irrelevant drafts before spending Claude tokens on rating. - - Examples: - - ietf classify --unrated # preview irrelevant unrated drafts - - ietf classify --unrated --remove # remove them from DB - - ietf classify --all # accuracy check against existing ratings - """ - from .classifier import Classifier - - clf = Classifier(cfg) - - if all_drafts: - # Accuracy check mode: compare against existing FP flags - console.print("[bold]Accuracy check: classifying all rated drafts...[/]\n") - tp_rows = db.conn.execute( - "SELECT d.name, d.title, d.abstract FROM drafts d " - "JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 0" - ).fetchall() - fp_rows = db.conn.execute( - "SELECT d.name, d.title, d.abstract FROM drafts d " - "JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 1" - ).fetchall() - - tp_ok, tp_miss, fp_ok, fp_miss = 0, 0, 0, 0 - for row in tp_rows: - rel, sim, method = clf.classify(row["title"], row["abstract"]) - if rel: - tp_ok += 1 - else: - tp_miss += 1 - for row in fp_rows: - rel, sim, method = clf.classify(row["title"], row["abstract"]) - if not rel: - fp_ok += 1 - else: - fp_miss += 1 - - total_tp = len(tp_rows) - total_fp = len(fp_rows) - precision = tp_ok / (tp_ok + fp_miss) if (tp_ok + fp_miss) else 0 - recall = tp_ok / total_tp if total_tp else 0 - console.print(f"True Positives: [green]{tp_ok}[/]/{total_tp} kept ({tp_miss} missed)") - console.print(f"False Positives: [red]{fp_ok}[/]/{total_fp} filtered ({fp_miss} slipped)") - console.print(f"Precision: [bold]{precision:.1%}[/] Recall: [bold]{recall:.1%}[/]") - - elif unrated: - drafts = db.unrated_drafts(limit=5000) - if not drafts: - console.print("No unrated drafts to classify.") - clf.close() - return - - console.print(f"[bold]Classifying {len(drafts)} unrated drafts...[/]\n") - draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts] - relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) - - if irrelevant: - console.print(f"\n[bold red]Irrelevant drafts ({len(irrelevant)}):[/]") - table = Table() - table.add_column("Name", style="cyan", max_width=50) - table.add_column("Title", max_width=50) - for d in irrelevant: - table.add_row(d["name"], d.get("title", "")[:50]) - console.print(table) - - if remove: - for d in irrelevant: - db.conn.execute("DELETE FROM drafts WHERE name = ?", (d["name"],)) - db.conn.commit() - console.print(f"\n[bold red]Removed {len(irrelevant)} irrelevant drafts from database[/]") - else: - console.print(f"\n[dim]Use --remove to delete these from the DB[/]") - else: - console.print("\nAll unrated drafts appear relevant.") - else: - console.print("Use --unrated or --all. See: ietf classify --help") - - clf.close() - - -# ── list ───────────────────────────────────────────────────────────────────── - - -@main.command("list") -@click.option("--limit", "-n", default=30, help="Number of drafts to show") -@click.option("--sort", "-s", default="time DESC", help="Sort order (e.g. 'time DESC', 'name ASC')") -@pass_cfg_db -def list_drafts(cfg, db, limit: int, sort: str): - """List tracked drafts.""" - drafts = db.list_drafts(limit=limit, order_by=sort) - total = db.count_drafts() - - table = Table(title=f"Tracked Drafts ({total} total, showing {len(drafts)})") - table.add_column("Date", style="dim", width=10) - table.add_column("Name", style="cyan", max_width=55) - table.add_column("Title", max_width=50) - table.add_column("Pg", justify="right", width=4) - table.add_column("Text", justify="center", width=4) - table.add_column("Rated", justify="center", width=5) - - for d in drafts: - has_text = "\u2713" if d.full_text else "" - rated = "\u2713" if db.get_rating(d.name) else "" - table.add_row(d.date, d.name, d.title[:50], str(d.pages or ""), has_text, rated) - - console.print(table) - - -# ── search ─────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("query") -@click.option("--limit", "-n", default=20, help="Max results") -@pass_cfg_db -def search(cfg, db, query: str, limit: int): - """Full-text search across stored drafts.""" - results = db.search_drafts(query, limit=limit) - if not results: - console.print(f"No results for [bold]{query}[/]") - return - - table = Table(title=f"Search: {query} ({len(results)} results)") - table.add_column("Date", style="dim", width=10) - table.add_column("Name", style="cyan") - table.add_column("Title") - - for d in results: - table.add_row(d.date, d.name, d.title[:60]) - - console.print(table) - - -# ── show ───────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("name") -@pass_cfg_db -def show(cfg, db, name: str): - """Show detailed info for a draft.""" - from .reports import Reporter - from .readiness import compute_readiness - - reporter = Reporter(cfg, db) - draft = db.get_draft(name) - if draft is None: - console.print(f"[red]Draft not found: {name}[/]") - return - - rating = db.get_rating(name) - - console.print(f"\n[bold]{draft.title}[/]") - console.print(f"[dim]{draft.name}[/] rev {draft.rev} | {draft.date} | {draft.pages or '?'} pages") - console.print(f"Group: {draft.group or 'individual'} | {draft.datatracker_url}") - console.print(f"\n[italic]{draft.abstract}[/]\n") - - if rating: - console.print("[bold]AI Assessment[/]") - console.print(f" Score: [bold green]{rating.composite_score:.1f}[/]") - console.print(f" Summary: {rating.summary}\n") - - table = Table(show_header=True) - table.add_column("Dimension", width=12) - table.add_column("Score", justify="center", width=7) - table.add_column("Notes") - table.add_row("Novelty", f"{rating.novelty}/5", rating.novelty_note) - table.add_row("Maturity", f"{rating.maturity}/5", rating.maturity_note) - table.add_row("Overlap", f"{rating.overlap}/5", rating.overlap_note) - table.add_row("Momentum", f"{rating.momentum}/5", rating.momentum_note) - table.add_row("Relevance", f"{rating.relevance}/5", rating.relevance_note) - console.print(table) - - if rating.categories: - console.print(f"\nCategories: {', '.join(rating.categories)}") - else: - console.print("[dim]Not yet rated — run: ietf analyze {name}[/]") - - # Readiness score - readiness = compute_readiness(db, name) - if readiness["score"] > 0: - console.print(f"\n[bold]Standards Readiness: [cyan]{readiness['score']}/100[/][/]") - rtable = Table(show_header=True) - rtable.add_column("Factor", width=20) - rtable.add_column("Value", justify="center", width=10) - rtable.add_column("Points", justify="right", width=8) - rtable.add_column("Detail") - for key, f in readiness["factors"].items(): - rtable.add_row(f["label"], f"{f['value']:.2f}", f"+{f['contribution']}", f["detail"]) - console.print(rtable) - - # Save detailed report too - path = reporter.draft_detail(name) - if path: - console.print(f"\n[dim]Report saved: {path}[/]") - - -# ── annotate ───────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("draft_name") -@click.option("--note", "-n", default=None, help="Set/update the note text") -@click.option("--tag", "-t", multiple=True, help="Add a tag (can be used multiple times)") -@click.option("--remove-tag", "-r", multiple=True, help="Remove a tag (can be used multiple times)") -@pass_cfg_db -def annotate(cfg, db, draft_name: str, note: str | None, tag: tuple[str, ...], remove_tag: tuple[str, ...]): - """Add or view annotations (notes & tags) for a draft.""" - draft = db.get_draft(draft_name) - if draft is None: - console.print(f"[red]Draft not found: {draft_name}[/]") - return - - # If no options, display current annotation - if note is None and not tag and not remove_tag: - ann = db.get_annotation(draft_name) - if ann: - console.print(f"\n[bold]Annotation for {draft_name}[/]") - console.print(f" Note: {ann['note'] or '(empty)'}") - console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}") - console.print(f" Updated: {ann['updated_at']}") - else: - console.print(f"[dim]No annotation for {draft_name}. Use --note or --tag to add one.[/]") - return - - # Fetch existing tags for add/remove operations - existing = db.get_annotation(draft_name) - current_tags = existing["tags"] if existing else [] - - for t in tag: - if t not in current_tags: - current_tags.append(t) - for t in remove_tag: - if t in current_tags: - current_tags.remove(t) - - db.upsert_annotation(draft_name, note=note, tags=current_tags) - ann = db.get_annotation(draft_name) - console.print(f"[green]Annotation updated for {draft_name}[/]") - console.print(f" Note: {ann['note'] or '(empty)'}") - console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}") - - -# ── analyze ────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("name", required=False) -@click.option("--all", "analyze_all", is_flag=True, help="Analyze all unrated drafts") -@click.option("--limit", "-n", default=50, help="Max drafts to analyze (with --all)") -@click.option("--retry-failed", is_flag=True, help="Re-analyze drafts that previously failed (clears cache)") -@click.option("--dry-run", is_flag=True, help="Show what would be analyzed without making changes") -@click.option("--pre-classify/--no-pre-classify", "pre_classify", default=False, - help="Pre-filter unrated drafts with local Ollama classifier before Claude") -@pass_cfg_db -def analyze(cfg, db, name: str | None, analyze_all: bool, limit: int, retry_failed: bool, dry_run: bool, pre_classify: bool): - """Analyze and rate drafts using Claude. - - Use --pre-classify to run the local Ollama classifier first, removing - irrelevant drafts before spending Claude tokens. Saves ~40% of API costs. - """ - from .analyzer import Analyzer - - if dry_run: - if retry_failed: - unrated = db.unrated_drafts(limit=limit) - retryable = [] - for draft in unrated: - row = db.conn.execute( - "SELECT COUNT(*) FROM llm_cache WHERE draft_name = ?", - (draft.name,), - ).fetchone() - if row[0] > 0: - retryable.append(draft) - console.print(f"[bold yellow]DRY RUN[/]: Would retry [bold]{len(retryable)}[/] previously failed drafts") - for d in retryable[:20]: - console.print(f" - {d.name}") - if len(retryable) > 20: - console.print(f" ... and {len(retryable) - 20} more") - elif analyze_all: - unrated = db.unrated_drafts(limit=limit) - console.print(f"[bold yellow]DRY RUN[/]: Would analyze [bold]{len(unrated)}[/] unrated drafts") - for d in unrated[:20]: - console.print(f" - {d.name}: {d.title[:60]}") - if len(unrated) > 20: - console.print(f" ... and {len(unrated) - 20} more") - elif name: - existing = db.get_rating(name) - status = "re-analyze (already rated)" if existing else "analyze (not yet rated)" - console.print(f"[bold yellow]DRY RUN[/]: Would {status}: {name}") - else: - console.print("Provide a draft name or use --all") - return - - analyzer = Analyzer(cfg, db) - - if retry_failed: - # Find drafts that have cache entries but no ratings (failed analyses) - unrated = db.unrated_drafts(limit=limit) - retryable = [] - for draft in unrated: - # Check if there's a cache entry for this draft (it was attempted) - row = db.conn.execute( - "SELECT COUNT(*) FROM llm_cache WHERE draft_name = ?", - (draft.name,), - ).fetchone() - if row[0] > 0: - retryable.append(draft) - if not retryable: - console.print("No previously failed drafts to retry.") - else: - console.print(f"Retrying [bold]{len(retryable)}[/] previously failed drafts...") - count = 0 - for draft in retryable: - rating = analyzer.rate_draft(draft.name, use_cache=False) - if rating: - count += 1 - console.print(f"Successfully re-analyzed [bold green]{count}[/] of {len(retryable)} drafts") - elif analyze_all: - if pre_classify: - # Pre-filter with local Ollama classifier - try: - from .classifier import Classifier - unrated = db.unrated_drafts(limit=limit) - if unrated: - console.print(f"\n[bold]Pre-classifying {len(unrated)} unrated drafts with Ollama...[/]") - clf = Classifier(cfg) - draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in unrated] - relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) - clf.close() - - if irrelevant: - console.print(f"\n Removing [red]{len(irrelevant)}[/] irrelevant drafts from DB...") - for d in irrelevant: - db.conn.execute("DELETE FROM drafts WHERE name = ?", (d["name"],)) - db.conn.commit() - console.print(f" Removed. {len(relevant)} drafts remain for Claude analysis.\n") - except Exception as e: - console.print(f"[yellow]Classifier unavailable ({e}), analyzing all[/yellow]") - - count = analyzer.rate_all_unrated(limit=limit) - console.print(f"Analyzed [bold green]{count}[/] drafts") - elif name: - rating = analyzer.rate_draft(name) - if rating: - console.print(f"\n[bold green]Rating for {name}:[/]") - console.print(f" Score: {rating.composite_score:.1f}") - console.print(f" Summary: {rating.summary}") - console.print(f" Novelty={rating.novelty} Maturity={rating.maturity} " - f"Overlap={rating.overlap} Momentum={rating.momentum} " - f"Relevance={rating.relevance}") - else: - console.print("[red]Analysis failed[/]") - else: - console.print("Provide a draft name or use --all") - - -# ── ask ────────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("question") -@click.option("--top", "-n", default=5, help="Number of source drafts to use") -@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") -@pass_cfg_db -def ask(cfg, db, question: str, top: int, cheap: bool): - """Ask a natural language question about the drafts. - - Examples: - ietf ask "Which drafts address agent authentication?" - ietf ask "What are the competing approaches to agent delegation?" --top 10 - ietf ask "How do safety mechanisms work?" --cheap - """ - from .search import HybridSearch - - searcher = HybridSearch(cfg, db) - console.print(f"\n[dim]Searching for relevant drafts...[/]") - result = searcher.ask(question, top_k=top, cheap=cheap) - - # Display the answer - console.print() - console.print("[bold cyan]Answer[/]") - console.print("[dim]" + "-" * 60 + "[/]") - console.print(result["answer"]) - console.print() - - # Display source drafts table - if result["sources"]: - table = Table(title="Source Drafts") - table.add_column("#", style="dim", width=3) - table.add_column("Draft", style="cyan", max_width=50) - table.add_column("Title", max_width=45) - table.add_column("Match", width=10) - table.add_column("Score", justify="right", width=8) - - for i, src in enumerate(result["sources"], 1): - score_str = f"{src['similarity']:.3f}" if src.get("similarity") else "-" - table.add_row( - str(i), - src["name"], - src["title"][:45], - src.get("match_type", ""), - score_str, - ) - - console.print(table) - - -# ── compare ────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("names", nargs=-1, required=True) -@pass_cfg_db -def compare(cfg, db, names: tuple[str, ...]): - """Compare multiple drafts for overlap and unique contributions.""" - from .analyzer import Analyzer - - analyzer = Analyzer(cfg, db) - - result = analyzer.compare_drafts(list(names)) - if "error" in result: - console.print(f"[red]{result['error']}[/]") - else: - console.print(f"\n[bold cyan]Comparison of {len(result['drafts'])} drafts[/]") - console.print("[dim]" + "-" * 60 + "[/]") - console.print(result["text"]) - - -# ── embed ──────────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--dry-run", is_flag=True, help="Show what would be embedded without making changes") -@pass_cfg_db -def embed(cfg, db, dry_run: bool): - """Generate embeddings for all drafts (requires Ollama).""" - if dry_run: - missing = db.drafts_without_embeddings(limit=10000) - console.print(f"[bold yellow]DRY RUN[/]: Would embed [bold]{len(missing)}[/] drafts") - for name in missing[:20]: - console.print(f" - {name}") - if len(missing) > 20: - console.print(f" ... and {len(missing) - 20} more") - return - - from .embeddings import Embedder - - embedder = Embedder(cfg, db) - count = embedder.embed_all_missing() - console.print(f"Embedded [bold green]{count}[/] drafts") - - -# ── embed-ideas ────────────────────────────────────────────────────────────── - - -@main.command("embed-ideas") -@click.option("--limit", default=0, help="Max ideas to embed (0=all)") -@click.option("--batch-size", default=50, help="Batch size for Ollama") -@click.option("--dry-run", is_flag=True, help="Show what would be embedded without making changes") -@pass_cfg_db -def embed_ideas(cfg, db, limit: int, batch_size: int, dry_run: bool): - """Generate embeddings for extracted ideas via Ollama.""" - missing = db.ideas_without_embeddings(limit=limit if limit > 0 else 10000) - if not missing: - console.print("All ideas already have embeddings.") - return - - if dry_run: - console.print(f"[bold yellow]DRY RUN[/]: Would embed [bold]{len(missing)}[/] ideas in batches of {batch_size}") - for idea in missing[:20]: - console.print(f" - [{idea.get('id', '?')}] {idea['title'][:60]}") - if len(missing) > 20: - console.print(f" ... and {len(missing) - 20} more") - return - - import numpy as np - import ollama as ollama_lib - from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn - - client = ollama_lib.Client(host=cfg.ollama_url) - total = len(missing) - console.print(f"Embedding [bold]{total}[/] ideas in batches of {batch_size}...") - - count = 0 - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - MofNCompleteColumn(), - console=console, - ) as progress: - task = progress.add_task("Embedding ideas...", total=total) - for start in range(0, total, batch_size): - batch = missing[start:start + batch_size] - texts = [f"{idea['title']}. {idea['description']}" for idea in batch] - try: - resp = client.embed(model=cfg.ollama_embed_model, input=texts) - for i, idea in enumerate(batch): - vec = np.array(resp["embeddings"][i], dtype=np.float32) - db.store_idea_embedding(idea["id"], cfg.ollama_embed_model, vec) - count += 1 - progress.advance(task) - except Exception as e: - console.print(f"[red]Batch failed: {e}[/]") - for _ in batch: - progress.advance(task) - - console.print(f"Embedded [bold green]{count}[/] ideas") - - -# ── similar ────────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("name") -@click.option("--top", "-n", default=10, help="Number of similar drafts to show") -@pass_cfg_db -def similar(cfg, db, name: str, top: int): - """Find drafts most similar to a given draft.""" - from .embeddings import Embedder - - embedder = Embedder(cfg, db) - results = embedder.find_similar(name, top_n=top) - if not results: - console.print(f"[yellow]No similar drafts found (need embeddings — run `ietf embed` first)[/]") - return - - table = Table(title=f"Drafts similar to {name}") - table.add_column("Similarity", justify="right", width=10) - table.add_column("Draft", style="cyan") - table.add_column("Title") - - for sim_name, score in results: - draft = db.get_draft(sim_name) - title = draft.title[:60] if draft else "" - table.add_row(f"{score:.3f}", sim_name, title) - - console.print(table) - - -# ── clusters ───────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--threshold", "-t", default=0.85, help="Similarity threshold for clustering") -@pass_cfg_db -def clusters(cfg, db, threshold: float): - """Find clusters of highly similar (potentially overlapping) drafts.""" - from .embeddings import Embedder - - embedder = Embedder(cfg, db) - cluster_list = embedder.find_clusters(threshold=threshold) - if not cluster_list: - console.print("No clusters found at this threshold.") - return - - console.print(f"\n[bold]Found {len(cluster_list)} clusters[/] (threshold={threshold})\n") - for i, cluster in enumerate(cluster_list, 1): - console.print(f"[bold cyan]Cluster {i}[/] ({len(cluster)} drafts):") - for name in cluster: - draft = db.get_draft(name) - title = draft.title[:60] if draft else "" - console.print(f" - {name} [dim]{title}[/]") - console.print() - - -# ── report ─────────────────────────────────────────────────────────────────── - - -@main.group() -def report(): - """Generate markdown reports.""" - pass - - -@report.command() -@pass_cfg_db -def overview(cfg, db): - """Overview table of all rated drafts.""" - from .reports import Reporter - path = Reporter(cfg, db).overview() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command() -@pass_cfg_db -def landscape(cfg, db): - """Category-grouped landscape view.""" - from .reports import Reporter - path = Reporter(cfg, db).landscape() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command() -@click.option("--days", "-d", default=7, help="Look back N days") -@pass_cfg_db -def digest(cfg, db, days: int): - """What's new digest.""" - from .reports import Reporter - path = Reporter(cfg, db).digest(since_days=days) - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command() -@pass_cfg_db -def timeline(cfg, db): - """Timeline of draft submissions by month and category.""" - from .reports import Reporter - path = Reporter(cfg, db).timeline() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("overlap-matrix") -@pass_cfg_db -def overlap_matrix(cfg, db): - """Full pairwise overlap matrix report.""" - from .embeddings import Embedder - from .reports import Reporter - embedder = Embedder(cfg, db) - n_drafts = len(db.all_drafts()) - console.print(f"Computing {n_drafts}x{n_drafts} similarity matrix...") - path = Reporter(cfg, db).overlap_matrix(embedder) - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("authors") -@pass_cfg_db -def authors_report(cfg, db): - """Author and organization network report.""" - from .reports import Reporter - path = Reporter(cfg, db).authors_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("ideas") -@pass_cfg_db -def ideas_report(cfg, db): - """Report on extracted technical ideas.""" - from .reports import Reporter - path = Reporter(cfg, db).ideas_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("refs") -@pass_cfg_db -def refs_report(cfg, db): - """Cross-reference report — which standards the ecosystem builds on.""" - from .reports import Reporter - path = Reporter(cfg, db).refs_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("trends") -@pass_cfg_db -def trends_report(cfg, db): - """Category trend analysis report (markdown).""" - from .reports import Reporter - path = Reporter(cfg, db).trends_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("idea-overlap") -@pass_cfg_db -def idea_overlap_report(cfg, db): - """Cross-organization idea overlap report.""" - from .reports import Reporter - path = Reporter(cfg, db).idea_overlap_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("status") -@pass_cfg_db -def status_report(cfg, db): - """WG adoption status report.""" - from .reports import Reporter - path = Reporter(cfg, db).status_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("revisions") -@pass_cfg_db -def revisions_report(cfg, db): - """Draft revision velocity report.""" - from .reports import Reporter - path = Reporter(cfg, db).revisions_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("centrality") -@pass_cfg_db -def centrality_report(cfg, db): - """Author network centrality report.""" - from .reports import Reporter - path = Reporter(cfg, db).centrality_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("co-occurrence") -@pass_cfg_db -def co_occurrence_report(cfg, db): - """Category co-occurrence matrix report.""" - from .reports import Reporter - path = Reporter(cfg, db).co_occurrence_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("wg") -@pass_cfg_db -def wg_report(cfg, db): - """Working group analysis report — overlaps, alignment, submission targets.""" - from .reports import Reporter - path = Reporter(cfg, db).wg_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("sources") -@pass_cfg_db -def sources_report(cfg, db): - """Cross-source comparison report — ratings and categories by standards body.""" - from .reports import Reporter - path = Reporter(cfg, db).sources_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("false-positives") -@pass_cfg_db -def false_positives_report(cfg, db): - """False positive profiling report — what makes drafts look AI-related but not be.""" - from .reports import Reporter - path = Reporter(cfg, db).false_positives_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("citations") -@pass_cfg_db -def citations_report(cfg, db): - """Citation influence and BCP dependency analysis.""" - from .reports import Reporter - path = Reporter(cfg, db).citations_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("complexity") -@pass_cfg_db -def complexity_report(cfg, db): - """Draft complexity matrix: correlations between structural complexity and ratings.""" - from .reports import Reporter - path = Reporter(cfg, db).complexity_report() - console.print(f"Report saved: [bold]{path}[/]") - - -@report.command("idea-analysis") -@pass_cfg_db -def idea_analysis_report(cfg, db): - """Idea novelty deep dive — distribution, types, top ideas, cross-draft patterns.""" - from .reports import Reporter - path = Reporter(cfg, db).idea_analysis() - console.print(f"Report saved: [bold]{path}[/]") - - -# ── wg (working group analysis) ───────────────────────────────────────── - - -@main.group() -def wg(): - """Working group analysis — overlaps, alignment opportunities, submission targets.""" - pass - - -@wg.command("list") -@click.option("--min-drafts", default=1, help="Minimum drafts to show a WG") -@pass_cfg_db -def wg_list(cfg, db, min_drafts: int): - """List working groups with draft counts and average scores.""" - summaries = db.wg_summary() - if not summaries: - console.print("[yellow]No WG data. Run: python scripts/backfill-wg-names.py[/]") - return - - summaries = [s for s in summaries if s["draft_count"] >= min_drafts] - - table = Table(title=f"Working Groups ({len(summaries)} with >= {min_drafts} drafts)") - table.add_column("WG", style="cyan", width=12) - table.add_column("#", justify="right", width=4) - table.add_column("Ideas", justify="right", width=5) - table.add_column("Nov", justify="center", width=4) - table.add_column("Mat", justify="center", width=4) - table.add_column("Ovl", justify="center", width=4) - table.add_column("Mom", justify="center", width=4) - table.add_column("Rel", justify="center", width=4) - table.add_column("Top Categories") - - for s in summaries: - top_cats = sorted(s["categories"].items(), key=lambda x: x[1], reverse=True)[:3] - cats_str = ", ".join(f"{c}({n})" for c, n in top_cats) if top_cats else "-" - table.add_row( - s["wg"], str(s["draft_count"]), str(s["idea_count"]), - str(s["avg_novelty"]), str(s["avg_maturity"]), - str(s["avg_overlap"]), str(s["avg_momentum"]), - str(s["avg_relevance"]), cats_str, - ) - - console.print(table) - - # Also show individual submission count - indiv = db.conn.execute( - 'SELECT COUNT(*) FROM drafts WHERE "group" = \'none\' OR "group" IS NULL' - ).fetchone()[0] - console.print(f"\n[dim]Individual submissions (no WG): {indiv}[/]") - - -@wg.command("show") -@click.argument("name") -@pass_cfg_db -def wg_show(cfg, db, name: str): - """Show details for a specific working group.""" - drafts = db.wg_drafts(name) - if not drafts: - console.print(f"[red]No drafts found for WG: {name}[/]") - return - - console.print(f"\n[bold]Working Group: {name}[/] ({len(drafts)} drafts)\n") - - table = Table() - table.add_column("Date", style="dim", width=10) - table.add_column("Name", style="cyan") - table.add_column("Title", max_width=50) - table.add_column("Score", justify="right", width=6) - - for d in drafts: - rating = db.get_rating(d.name) - score = f"{rating.composite_score:.1f}" if rating else "-" - table.add_row(d.date, d.name, d.title[:50], score) - - console.print(table) - - # Show ideas for this WG - ideas = [] - for d in drafts: - ideas.extend(db.get_ideas_for_draft(d.name)) - if ideas: - console.print(f"\n[bold]Ideas ({len(ideas)}):[/]") - for idea in ideas[:15]: - console.print(f" - [cyan]{idea['title']}[/]: {idea['description'][:80]}") - if len(ideas) > 15: - console.print(f" [dim]... and {len(ideas) - 15} more[/]") - - -@wg.command("overlaps") -@click.option("--min-wgs", default=2, help="Minimum WGs sharing a category to show") -@pass_cfg_db -def wg_overlaps(cfg, db, min_wgs: int): - """Find categories and ideas that span multiple WGs — alignment opportunities.""" - # Category spread across WGs - spread = db.category_wg_spread() - multi = [s for s in spread if s["wg_count"] >= min_wgs - and not all(w["wg"] == "none" for w in s["wgs"])] - - if multi: - console.print(f"\n[bold]Categories spanning {min_wgs}+ WGs[/]\n") - for s in multi: - wg_strs = [f"{w['wg']}({w['count']})" for w in s["wgs"] if w["wg"] != "none"] - if wg_strs: - console.print(f" [cyan]{s['category']}[/] — {s['total_drafts']} drafts across {s['wg_count']} WGs") - console.print(f" WGs: {', '.join(wg_strs)}") - - # Idea overlap across WGs - idea_overlaps = db.wg_idea_overlap() - cross_wg = [o for o in idea_overlaps - if not all(w == "none" for w in o["wg_names"])] - - if cross_wg: - console.print(f"\n[bold]Ideas appearing in {min_wgs}+ WGs ({len(cross_wg)} found)[/]\n") - for o in cross_wg[:20]: - real_wgs = [w for w in o["wg_names"] if w != "none"] - console.print(f" [cyan]{o['idea_title']}[/] — WGs: {', '.join(real_wgs)}") - for entry in o["wgs"]: - if entry["wg"] != "none": - console.print(f" - [{entry['wg']}] {entry['draft_name']}") - if len(cross_wg) > 20: - console.print(f"\n [dim]... and {len(cross_wg) - 20} more[/]") - - if not multi and not cross_wg: - console.print("[yellow]No cross-WG overlaps found.[/]") - - -@wg.command("alignment") -@pass_cfg_db -def wg_alignment(cfg, db): - """Identify where individual drafts should be consolidated into WG standards.""" - # Compare individual vs WG category distribution - dist = db.individual_vs_wg_categories() - indiv = dist["individual"] - adopted = dist["wg_adopted"] - - console.print("\n[bold]Individual vs WG-Adopted Category Distribution[/]\n") - - table = Table() - table.add_column("Category", width=25) - table.add_column("Individual", justify="right", width=10) - table.add_column("WG-Adopted", justify="right", width=10) - table.add_column("Signal", width=40) - - all_cats = sorted(set(list(indiv.keys()) + list(adopted.keys()))) - for cat in all_cats: - i_count = indiv.get(cat, 0) - w_count = adopted.get(cat, 0) - signal = "" - if i_count >= 5 and w_count == 0: - signal = "[yellow]High individual activity, no WG — needs WG?[/]" - elif i_count >= 3 and w_count >= 1: - signal = "[green]WG exists, individual drafts could target it[/]" - elif w_count > i_count and i_count > 0: - signal = "[dim]WG leading, some individual work[/]" - table.add_row(cat, str(i_count), str(w_count), signal) - - console.print(table) - - # Find overlap clusters within individual submissions that might warrant a WG - console.print("\n[bold]Consolidation Candidates[/]") - console.print("[dim]Categories with many individual drafts but no WG adoption — " - "potential for new WG or BoF[/]\n") - - candidates = [] - for cat in all_cats: - i_count = indiv.get(cat, 0) - w_count = adopted.get(cat, 0) - if i_count >= 5 and w_count == 0: - candidates.append((cat, i_count)) - - if candidates: - for cat, count in sorted(candidates, key=lambda x: x[1], reverse=True): - console.print(f" [yellow]{cat}[/]: {count} individual drafts, no WG home") - # Show sample drafts - rows = db.conn.execute(""" - SELECT d.name, d.title FROM drafts d - JOIN ratings r ON d.name = r.draft_name - WHERE (d."group" = 'none' OR d."group" IS NULL) - AND r.categories LIKE ? - ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20 - + r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC - LIMIT 5 - """, (f"%{cat}%",)).fetchall() - for row in rows: - console.print(f" - {row['name']}: {row['title'][:60]}") - console.print() - else: - console.print(" [green]All active categories have WG representation.[/]") - - -@wg.command("targets") -@pass_cfg_db -def wg_targets(cfg, db): - """Suggest best WGs for submitting new work in each category.""" - spread = db.category_wg_spread() - summaries = {s["wg"]: s for s in db.wg_summary()} - - console.print("\n[bold]Recommended Submission Targets by Category[/]\n") - - for s in spread: - cat = s["category"] - # Filter to real WGs (not 'none') - real_wgs = [w for w in s["wgs"] if w["wg"] != "none"] - if not real_wgs: - console.print(f" [cyan]{cat}[/]: [yellow]No active WG — individual submission[/]") - continue - - best = real_wgs[0] - wg_info = summaries.get(best["wg"], {}) - console.print( - f" [cyan]{cat}[/]: [bold green]{best['wg']}[/] " - f"({best['count']} drafts" - f"{', avg relevance ' + str(wg_info.get('avg_relevance', '?')) if wg_info else ''})" - ) - if len(real_wgs) > 1: - alts = ", ".join(f"{w['wg']}({w['count']})" for w in real_wgs[1:3]) - console.print(f" Also: {alts}") - - console.print() - - -# ── visualize ──────────────────────────────────────────────────────────── - - -@main.group() -def viz(): - """Generate interactive visualizations (HTML/PNG).""" - pass - - -@viz.command("all") -@pass_cfg_db -def viz_all(cfg, db): - """Generate all available visualizations.""" - from .visualize import Visualizer - v = Visualizer(cfg, db) - paths = v.generate_all() - console.print(f"\n[bold green]{len(paths)} visualizations[/] saved to {v.output_dir}/") - - -@viz.command("landscape") -@click.option("--method", "-m", default="tsne", type=click.Choice(["umap", "tsne"]), - help="Dimensionality reduction method") -@pass_cfg_db -def viz_landscape(cfg, db, method: str): - """2D scatter of draft embeddings colored by category.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).landscape_scatter(method=method) - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("heatmap") -@pass_cfg_db -def viz_heatmap(cfg, db): - """Clustered similarity heatmap (PNG).""" - from .visualize import Visualizer - path = Visualizer(cfg, db).similarity_heatmap() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("distributions") -@pass_cfg_db -def viz_distributions(cfg, db): - """Rating dimension distributions by category (PNG).""" - from .visualize import Visualizer - path = Visualizer(cfg, db).score_distributions() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("timeline") -@pass_cfg_db -def viz_timeline(cfg, db): - """Stacked area chart of monthly submissions.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).timeline_chart() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("bubble") -@pass_cfg_db -def viz_bubble(cfg, db): - """Interactive bubble chart: novelty vs maturity.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).bubble_explorer() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("radar") -@pass_cfg_db -def viz_radar(cfg, db): - """Radar chart of average category rating profiles.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).category_radar() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("network") -@click.option("--min-shared", "-n", default=2, help="Minimum shared drafts for an edge") -@pass_cfg_db -def viz_network(cfg, db, min_shared: int): - """Interactive author collaboration network graph.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).author_network(min_shared=min_shared) - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("treemap") -@pass_cfg_db -def viz_treemap(cfg, db): - """Category treemap colored by average score.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).category_treemap() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("quality") -@pass_cfg_db -def viz_quality(cfg, db): - """Score vs uniqueness scatter (quality vs redundancy).""" - from .visualize import Visualizer - path = Visualizer(cfg, db).score_vs_overlap() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("orgs") -@pass_cfg_db -def viz_orgs(cfg, db): - """Organization contribution bar chart.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).org_contributions() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("ideas") -@pass_cfg_db -def viz_ideas(cfg, db): - """Ideas frequency chart by type.""" - from .visualize import Visualizer - path = Visualizer(cfg, db).ideas_chart() - console.print(f"Saved: [bold]{path}[/]") - - -@viz.command("browser") -@pass_cfg_db -def viz_browser(cfg, db): - """Interactive filterable draft browser (standalone HTML).""" - from .visualize import Visualizer - path = Visualizer(cfg, db).draft_browser() - console.print(f"Saved: [bold]{path}[/]") - - -# ── authors ───────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("name", required=False) -@click.option("--fetch/--no-fetch", default=False, help="Fetch author data from Datatracker first") -@click.option("--limit", "-n", default=20, help="Number of top authors to show") -@pass_cfg_db -def authors(cfg, db, name: str | None, fetch: bool, limit: int): - """Show authors for a draft, or top authors overall.""" - from .authors import AuthorNetwork - - author_network = AuthorNetwork(cfg, db) - - if fetch: - count = author_network.fetch_all_authors() - console.print(f"Fetched authors for [bold green]{count}[/] drafts") - - if name: - draft_authors = db.get_authors_for_draft(name) - if not draft_authors: - console.print(f"[yellow]No author data for {name}. Run `ietf authors --fetch` first.[/]") - return - console.print(f"\n[bold]Authors of {name}:[/]") - for a in draft_authors: - console.print(f" - {a.name} ({a.affiliation or 'no affiliation'})") - else: - top = db.top_authors(limit=limit) - if not top: - console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]") - return - table = Table(title=f"Top {limit} Authors") - table.add_column("#", justify="right", width=4) - table.add_column("Author", style="cyan") - table.add_column("Organization") - table.add_column("Drafts", justify="right", width=6) - for rank, (aname, aff, cnt, _) in enumerate(top, 1): - table.add_row(str(rank), aname, aff, str(cnt)) - console.print(table) - - -@main.command() -@click.option("--top", "-n", default=20, help="Top N to show") -@pass_cfg_db -def network(cfg, db, top: int): - """Show author collaboration network.""" - console.print("\n[bold]Top Organizations[/]") - orgs = db.top_orgs(limit=top) - if orgs: - table = Table() - table.add_column("#", justify="right", width=4) - table.add_column("Organization", style="cyan") - table.add_column("Authors", justify="right", width=8) - table.add_column("Drafts", justify="right", width=6) - for rank, (org, auth_cnt, draft_cnt) in enumerate(orgs, 1): - table.add_row(str(rank), org, str(auth_cnt), str(draft_cnt)) - console.print(table) - - console.print("\n[bold]Cross-Org Collaboration[/]") - cross = db.cross_org_collaborations(limit=top) - if cross: - table = Table() - table.add_column("Org A", style="cyan") - table.add_column("Org B", style="cyan") - table.add_column("Shared Drafts", justify="right", width=8) - for org_a, org_b, shared in cross: - table.add_row(org_a, org_b, str(shared)) - console.print(table) - else: - console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]") - - -# ── ideas ─────────────────────────────────────────────────────────────── - - -@main.group(invoke_without_command=True) -@click.option("--name", default=None, help="Extract ideas from a specific draft") -@click.option("--all", "extract_all", is_flag=True, help="Extract ideas from all drafts") -@click.option("--limit", "-n", default=50, help="Max drafts to extract (with --all)") -@click.option("--batch", "-b", default=5, help="Drafts per API call (default 5, set 1 for individual)") -@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") -@click.option("--reextract", is_flag=True, help="Clear existing ideas and re-extract with current prompt") -@click.option("--draft", "reextract_draft", default=None, help="Specific draft to re-extract (with --reextract)") -@click.option("--dry-run", is_flag=True, help="Show what would be extracted without making changes") -@click.pass_context -def ideas(ctx, name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool, - reextract: bool, reextract_draft: str | None, dry_run: bool): - """Extract, score, and filter technical ideas from drafts.""" - if ctx.invoked_subcommand is not None: - return - - cfg = ctx.obj["cfg"] - db = ctx.obj["db"] - - if dry_run: - if reextract: - existing = db.idea_count() - if reextract_draft: - ideas_for = db.get_ideas_for_draft(reextract_draft) - console.print(f"[bold yellow]DRY RUN[/]: Would clear [bold]{len(ideas_for)}[/] ideas for {reextract_draft} and re-extract") - else: - console.print(f"[bold yellow]DRY RUN[/]: Would clear all [bold]{existing}[/] ideas and re-extract from up to {limit} drafts") - elif extract_all: - missing = db.drafts_without_ideas(limit=limit) - console.print(f"[bold yellow]DRY RUN[/]: Would extract ideas from [bold]{len(missing)}[/] drafts (batch={batch}, {'cheap' if cheap else 'quality'})") - for d in missing[:20]: - console.print(f" - {d}") - if len(missing) > 20: - console.print(f" ... and {len(missing) - 20} more") - elif name: - existing = db.get_ideas_for_draft(name) - console.print(f"[bold yellow]DRY RUN[/]: Would extract ideas from {name} (currently has {len(existing)} ideas)") - else: - console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter") - return - - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - - if reextract: - # Clear existing ideas, then re-extract - deleted = db.delete_ideas(draft_name=reextract_draft) - if reextract_draft: - console.print(f"Cleared [bold]{deleted}[/] ideas for {reextract_draft}") - idea_list = analyzer.extract_ideas(reextract_draft, use_cache=True) - if idea_list: - console.print(f"Re-extracted [bold green]{len(idea_list)}[/] ideas:") - for idea in idea_list: - console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]") - console.print(f" {idea['description']}\n") - else: - console.print("[red]Re-extraction failed or no ideas found[/]") - else: - console.print(f"Cleared [bold]{deleted}[/] ideas from all drafts") - count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap) - console.print(f"Re-extracted ideas from [bold green]{count}[/] drafts") - elif extract_all: - count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap) - console.print(f"Extracted ideas from [bold green]{count}[/] drafts") - elif name: - idea_list = analyzer.extract_ideas(name) - if idea_list: - console.print(f"\n[bold]Ideas from {name}:[/]\n") - for idea in idea_list: - console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]") - console.print(f" {idea['description']}\n") - else: - console.print("[red]Extraction failed or no ideas found[/]") - else: - console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter") - - -@ideas.command("score") -@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") -@click.option("--batch", "-b", default=20, help="Ideas per API call (default 20)") -@pass_cfg_db -def ideas_score(cfg, db, cheap: bool, batch: int): - """Score ideas for novelty (1=generic, 5=genuinely novel).""" - from .analyzer import Analyzer - - analyzer = Analyzer(cfg, db) - stats = analyzer.score_idea_novelty(batch_size=batch, cheap=cheap) - - if stats["scored_count"] == 0: - return - - # Show distribution table - dist = db.idea_score_distribution() - table = Table(title="Novelty Score Distribution") - table.add_column("Score", style="bold", justify="center") - table.add_column("Label", style="dim") - table.add_column("Count", justify="right") - table.add_column("Bar", min_width=30) - - labels = { - 1: "Generic building block", - 2: "Obvious extension", - 3: "Useful but expected", - 4: "Interesting contribution", - 5: "Genuinely novel", - } - max_count = max(dist.values()) if dist else 1 - for score in range(1, 6): - count = dist.get(score, 0) - bar_len = int(30 * count / max_count) if max_count > 0 else 0 - table.add_row( - str(score), labels[score], str(count), - "[green]" + "#" * bar_len + "[/]" - ) - - total = sum(dist.values()) - unscored = db.idea_count() - total - console.print(table) - console.print(f"\nTotal scored: [bold]{total}[/] | Unscored: {unscored} | Avg: [bold]{stats['avg_score']:.1f}[/]") - - -@ideas.command("filter") -@click.option("--min-score", "-m", default=2, help="Remove ideas below this score (default 2)") -@click.option("--dry-run/--execute", default=True, help="Preview (default) or actually delete") -@pass_cfg_db -def ideas_filter(cfg, db, min_score: int, dry_run: bool): - """Filter out low-novelty ideas by score threshold.""" - candidates = db.ideas_below_score(min_score) - if not candidates: - console.print(f"No ideas with novelty_score < {min_score}.") - return - - # Show what would be removed - table = Table( - title=f"Ideas with novelty_score < {min_score} " - f"({'DRY RUN' if dry_run else 'WILL DELETE'})" - ) - table.add_column("Score", style="bold", justify="center") - table.add_column("Idea", style="cyan", max_width=40) - table.add_column("Draft", max_width=50) - table.add_column("Description", max_width=60) - - for idea in candidates[:50]: # Show first 50 - table.add_row( - str(idea["novelty_score"]), - idea["title"], - idea["draft_title"], - idea["description"][:60] + ("..." if len(idea["description"]) > 60 else ""), - ) - - console.print(table) - - if len(candidates) > 50: - console.print(f" ... and {len(candidates) - 50} more") - - console.print(f"\nTotal to remove: [bold red]{len(candidates)}[/] / {db.idea_count()} ideas") - - if not dry_run: - deleted = db.delete_low_score_ideas(min_score) - console.print(f"[bold red]Deleted {deleted} low-novelty ideas.[/]") - console.print(f"Remaining ideas: [bold green]{db.idea_count()}[/]") - else: - console.print("[dim]Use --execute to actually delete.[/]") - - -@ideas.command("convergence") -@click.option("--threshold", "-t", default=0.75, help="SequenceMatcher ratio threshold (0-1)") -@click.option("--limit", "-n", default=50, help="Max results to show") -@click.option("--list-all", is_flag=True, help="List all convergent idea pairs") -@pass_cfg_db -def ideas_convergence(cfg, db, threshold: float, limit: int, list_all: bool): - """Find cross-org convergent ideas using SequenceMatcher fuzzy matching. - - Groups ideas by fuzzy title similarity, then filters to ideas where - 2+ distinct organizations independently propose similar concepts. - """ - from collections import defaultdict - from difflib import SequenceMatcher - from .orgs import normalize_org - - all_ideas = db.all_ideas() - if not all_ideas: - console.print("[yellow]No ideas extracted yet. Run `ietf ideas --all` first.[/]") - return - - # Build draft -> org mapping - draft_orgs: dict[str, set[str]] = defaultdict(set) - rows = db.conn.execute( - """SELECT da.draft_name, a.affiliation - FROM draft_authors da - JOIN authors a ON da.person_id = a.person_id - WHERE a.affiliation != ''""" - ).fetchall() - for r in rows: - org = normalize_org(r["affiliation"]) - if org and org != "Independent": - draft_orgs[r["draft_name"]].add(org) - - # Group similar ideas by fuzzy title matching - idea_groups: list[dict] = [] - for idea in all_ideas: - title_lower = idea["title"].lower().strip() - matched = False - for group in idea_groups: - ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio() - if ratio >= threshold: - group["ideas"].append(idea) - group["drafts"].add(idea["draft_name"]) - group["orgs"].update(draft_orgs.get(idea["draft_name"], set())) - matched = True - break - if not matched: - idea_groups.append({ - "canonical": title_lower, - "title": idea["title"], - "ideas": [idea], - "drafts": {idea["draft_name"]}, - "orgs": set(draft_orgs.get(idea["draft_name"], set())), - }) - - # Filter to cross-org ideas (2+ orgs) - cross_org = [g for g in idea_groups if len(g["orgs"]) >= 2] - cross_org.sort(key=lambda g: (-len(g["orgs"]), -len(g["drafts"]))) - - console.print(f"\n[bold]Cross-Organization Idea Convergence[/]") - console.print(f"Threshold: {threshold} | {len(all_ideas)} ideas | " - f"{len(idea_groups)} unique clusters | " - f"[bold green]{len(cross_org)}[/] cross-org convergent\n") - - if not cross_org: - console.print("[yellow]No cross-org convergent ideas at this threshold.[/]") - return - - show_n = len(cross_org) if list_all else min(limit, len(cross_org)) - table = Table(title=f"Cross-Org Convergent Ideas (showing {show_n} of {len(cross_org)})") - table.add_column("#", justify="right", width=4) - table.add_column("Idea", style="bold", max_width=40) - table.add_column("Orgs", justify="right", width=5) - table.add_column("Drafts", justify="right", width=6) - table.add_column("Organizations", max_width=50) - - for rank, g in enumerate(cross_org[:show_n], 1): - org_list = ", ".join(sorted(g["orgs"])[:5]) - if len(g["orgs"]) > 5: - org_list += f" +{len(g['orgs']) - 5}" - table.add_row( - str(rank), g["title"][:40], str(len(g["orgs"])), - str(len(g["drafts"])), org_list, - ) - - console.print(table) - console.print(f"\n[bold]Summary[/]: {len(cross_org)} cross-org convergent ideas " - f"out of {len(idea_groups)} unique clusters " - f"({100 * len(cross_org) / len(idea_groups):.0f}%)") - - -# ── dedup-ideas ───────────────────────────────────────────────────────── - - -@main.command("dedup-ideas") -@click.option("--threshold", "-t", default=0.85, type=float, - help="Cosine similarity threshold for merging (default 0.85)") -@click.option("--dry-run/--execute", default=True, - help="Preview merges (default) vs actually delete duplicates") -@click.option("--draft", "draft_name", default=None, - help="Limit to a single draft name") -@pass_cfg_db -def dedup_ideas(cfg, db, threshold: float, dry_run: bool, draft_name: str | None): - """Deduplicate similar ideas within each draft using embedding similarity.""" - from .analyzer import Analyzer - - analyzer = Analyzer(cfg, db) - mode = "[bold yellow]DRY RUN[/]" if dry_run else "[bold red]EXECUTE[/]" - console.print(f"\n{mode} — Deduplicating ideas (threshold={threshold})") - if draft_name: - console.print(f"Limiting to draft: [bold]{draft_name}[/]") - console.print() - - result = analyzer.dedup_ideas( - threshold=threshold, dry_run=dry_run, draft_name=draft_name - ) - - if result["examples"]: - table = Table(title="Merge Candidates" if dry_run else "Merged Ideas") - table.add_column("Draft", style="dim", max_width=40) - table.add_column("Keep", style="green") - table.add_column("Drop", style="red") - table.add_column("Similarity", justify="right") - - for ex in result["examples"]: - table.add_row( - ex["draft"].split("/")[-1][:40], - ex["keep"], - ex["drop"], - f"{ex['similarity']:.3f}", - ) - console.print(table) - console.print() - - action = "Would remove" if dry_run else "Removed" - console.print( - f"Ideas before: [bold]{result['total_before']}[/] | " - f"{action}: [bold]{result['merged_count']}[/] | " - f"After: [bold]{result['total_after']}[/]" - ) - - if dry_run and result["merged_count"] > 0: - console.print( - "\n[dim]Run with --execute to apply these merges.[/]" - ) - - -# ── gaps ──────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--refresh", is_flag=True, help="Re-run gap analysis even if cached") -@click.option("--dry-run", is_flag=True, help="Show existing gaps without running analysis") -@pass_cfg_db -def gaps(cfg, db, refresh: bool, dry_run: bool): - """Identify gaps in the current draft landscape using Claude.""" - from .reports import Reporter - - if dry_run: - existing = db.all_gaps() - console.print(f"[bold yellow]DRY RUN[/]: {len(existing)} gaps currently identified") - if refresh: - console.print(" Would re-run gap analysis via Claude API") - for i, gap in enumerate(existing if existing and isinstance(existing[0], dict) else [], 1): - sev = gap.get("severity", "medium").upper() - console.print(f" [bold]{i}. {gap['topic']}[/] [{sev}]") - return - - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - reporter = Reporter(cfg, db) - - existing = db.all_gaps() - if existing and not refresh: - console.print(f"[bold]{len(existing)} gaps[/] already identified (use --refresh to re-run)\n") - else: - gap_list = analyzer.gap_analysis() - console.print(f"\nIdentified [bold green]{len(gap_list)}[/] gaps\n") - existing = gap_list - - for i, gap in enumerate(existing if isinstance(existing[0], dict) else [], 1): - sev = gap.get("severity", "medium").upper() - console.print(f" [bold]{i}. {gap['topic']}[/] [{sev}]") - console.print(f" {gap['description'][:100]}\n") - - path = reporter.gaps_report() - console.print(f"Report saved: [bold]{path}[/]") - - -# ── refs ──────────────────────────────────────────────────────────────── - - -@main.command() -@click.argument("name", required=False) -@click.option("--extract/--no-extract", default=False, help="Extract refs from all drafts with text") -@click.option("--top", "-n", default=30, help="Number of top-referenced items to show") -@click.option("--type", "ref_type", default="rfc", type=click.Choice(["rfc", "draft", "bcp"]), - help="Reference type to show top results for") -@pass_cfg_db -def refs(cfg, db, name: str | None, extract: bool, top: int, ref_type: str): - """Parse and show cross-references (RFCs, drafts, BCPs) in draft texts.""" - import re - - if extract: - missing = db.drafts_without_refs() - if not missing: - console.print("[green]All drafts with text already have refs extracted.[/]") - else: - console.print(f"Extracting refs from [bold]{len(missing)}[/] drafts...") - extracted = 0 - for draft_name in missing: - draft = db.get_draft(draft_name) - if not draft or not draft.full_text: - continue - found_refs = _extract_refs(draft.full_text, draft.name) - if found_refs: - db.insert_refs(draft_name, found_refs) - extracted += 1 - console.print(f"Extracted refs from [bold green]{extracted}[/] drafts") - - if name: - # Show refs for a specific draft - draft_refs = db.get_refs_for_draft(name) - if not draft_refs: - console.print(f"[yellow]No refs found for {name}. Run `ietf refs --extract` first.[/]") - return - table = Table(title=f"References in {name}") - table.add_column("Type", style="dim", width=6) - table.add_column("Reference", style="cyan") - for rt, rid in sorted(draft_refs): - table.add_row(rt.upper(), rid) - console.print(table) - else: - # Show top-referenced items - stats = db.ref_stats() - if stats["total_refs"] == 0: - console.print("[yellow]No refs extracted yet. Run `ietf refs --extract` first.[/]") - return - - console.print(f"\n[bold]Reference Stats[/]: {stats['drafts_with_refs']} drafts, " - f"{stats['total_refs']} total refs " - f"({stats['rfc_refs']} RFC, {stats['draft_refs']} draft, {stats['bcp_refs']} BCP)\n") - - top_items = db.top_referenced(ref_type=ref_type, limit=top) - table = Table(title=f"Top {len(top_items)} Most-Referenced {ref_type.upper()}s") - table.add_column("#", justify="right", width=4) - table.add_column("Reference", style="cyan", width=30) - table.add_column("Count", justify="right", width=6) - table.add_column("Referenced By", max_width=60) - for rank, (rid, cnt, drafts) in enumerate(top_items, 1): - label = f"RFC {rid}" if ref_type == "rfc" else rid - draft_list = ", ".join(d.replace("draft-", "")[:25] for d in drafts[:4]) - if len(drafts) > 4: - draft_list += f" +{len(drafts) - 4}" - table.add_row(str(rank), label, str(cnt), draft_list) - console.print(table) - - -def _extract_refs(text: str, self_name: str) -> list[tuple[str, str]]: - """Extract RFC, draft, and BCP references from draft full text.""" - import re - - refs: set[tuple[str, str]] = set() - - # RFC references: RFC 1234, RFC1234, [RFC1234], [RFC 1234] - for m in re.finditer(r'\[?RFC\s*(\d{4,})\]?', text, re.IGNORECASE): - refs.add(("rfc", m.group(1))) - - # BCP references: BCP 14, BCP14, [BCP14] - for m in re.finditer(r'\[?BCP\s*(\d+)\]?', text, re.IGNORECASE): - refs.add(("bcp", m.group(1))) - - # Draft references: draft-ietf-something-name - for m in re.finditer(r'(draft-[\w][\w-]+[\w])', text, re.IGNORECASE): - draft_ref = m.group(1).lower() - # Strip trailing version numbers (e.g., draft-foo-bar-03 -> draft-foo-bar) - draft_ref = re.sub(r'-\d{2,}$', '', draft_ref) - # Don't reference self - if draft_ref != self_name: - refs.add(("draft", draft_ref)) - - return list(refs) - - -# ── trends ───────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--category", "-c", help="Filter to a specific category") -@click.option("--json-out", is_flag=True, help="Also output JSON for visualization") -@pass_cfg_db -def trends(cfg, db, category: str | None, json_out: bool): - """Show category trend analysis — monthly breakdown with growth rates.""" - import json as json_mod - from collections import defaultdict - - pairs = db.drafts_with_ratings(limit=500) - all_drafts = db.list_drafts(limit=500, order_by="time ASC") - - if not pairs: - console.print("[yellow]No rated drafts. Run `ietf analyze --all` first.[/]") - return - - # Build rating lookup - rating_map = {draft.name: rating for draft, rating in pairs} - - # Collect monthly counts per category - monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) - all_cats: set[str] = set() - for d in all_drafts: - month = d.time[:7] if d.time else "unknown" - r = rating_map.get(d.name) - if r: - for c in r.categories: - if category and c.lower() != category.lower(): - continue - monthly[month][c] += 1 - all_cats.add(c) - - if not all_cats: - console.print(f"[yellow]No data for category '{category}'[/]" if category - else "[yellow]No category data found.[/]") - return - - months = sorted(m for m in monthly.keys() if m != "unknown") - cats = sorted(all_cats) - - # Compute cumulative and growth - rows_data = [] - cumulative: dict[str, int] = defaultdict(int) - prev_count: dict[str, int] = defaultdict(int) - - for month in months: - for cat in cats: - count = monthly[month].get(cat, 0) - cumulative[cat] += count - growth = 0.0 - if prev_count[cat] > 0: - growth = ((count - prev_count[cat]) / prev_count[cat]) * 100 - rows_data.append({ - "month": month, - "category": cat, - "count": count, - "cumulative": cumulative[cat], - "growth_rate": growth, - }) - prev_count[cat] = count - - # Display summary table - console.print(f"\n[bold]Category Trends[/] — {len(months)} months, {len(cats)} categories\n") - - # Show per-category totals and recent momentum - table = Table(title="Category Growth Summary") - table.add_column("Category", style="cyan") - table.add_column("Total", justify="right", width=6) - table.add_column("Last 3mo", justify="right", width=8) - table.add_column("Prev 3mo", justify="right", width=8) - table.add_column("Growth", justify="right", width=8) - - recent_months = months[-3:] if len(months) >= 3 else months - prev_months = months[-6:-3] if len(months) >= 6 else [] - - for cat in cats: - total = cumulative[cat] - recent = sum(monthly[m].get(cat, 0) for m in recent_months) - prev = sum(monthly[m].get(cat, 0) for m in prev_months) if prev_months else 0 - if prev > 0: - growth_str = f"{((recent - prev) / prev) * 100:+.0f}%" - elif recent > 0: - growth_str = "new" - else: - growth_str = "-" - table.add_row(cat, str(total), str(recent), str(prev) if prev_months else "-", growth_str) - - console.print(table) - - # Monthly detail - console.print(f"\n[bold]Monthly Breakdown[/]\n") - detail_table = Table() - detail_table.add_column("Month", style="dim", width=8) - for cat in cats: - detail_table.add_column(cat[:14], justify="right", width=max(6, len(cat[:14]))) - detail_table.add_column("Total", justify="right", width=6, style="bold") - - for month in months: - row = [month] - total = 0 - for cat in cats: - c = monthly[month].get(cat, 0) - total += c - row.append(str(c) if c else "") - row.append(str(total)) - detail_table.add_row(*row) - - console.print(detail_table) - - # Optional JSON output - if json_out: - out_path = Path(cfg.data_dir) / "reports" / "trends.json" - out_path.write_text(json_mod.dumps(rows_data, indent=2)) - console.print(f"\nJSON saved: [bold]{out_path}[/]") - - -# ── status ────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--wg", "-w", help="Filter to a specific WG") -def status(wg: str | None): - """Show WG adoption status — which drafts have institutional backing.""" - import json as json_mod - from collections import defaultdict - - cfg = _get_config() - db = Database(cfg) - - try: - all_status = db.draft_adoption_status() - total = len(all_status) - adopted = [s for s in all_status if s["wg_adopted"]] - individual = [s for s in all_status if not s["wg_adopted"]] - irtf = [s for s in all_status if s["stream"] == "irtf"] - - console.print(f"\n[bold]Draft Adoption Status[/]: {total} total drafts\n") - console.print(f" WG-adopted (draft-ietf-*): [bold green]{len(adopted)}[/] ({len(adopted)/total*100:.1f}%)") - console.print(f" IRTF (draft-irtf-*): [bold blue]{len(irtf)}[/]") - console.print(f" Individual: [bold]{len(individual)}[/] ({len(individual)/total*100:.1f}%)\n") - - # WG breakdown - wg_groups: dict[str, list[dict]] = defaultdict(list) - for s in adopted: - wg_groups[s["wg_name"]].append(s) - - if wg: - # Show drafts for a specific WG - wg_drafts = wg_groups.get(wg, []) - if not wg_drafts: - console.print(f"[yellow]No WG-adopted drafts for '{wg}'[/]") - return - table = Table(title=f"WG '{wg}' Drafts ({len(wg_drafts)})") - table.add_column("Date", style="dim", width=10) - table.add_column("Name", style="cyan") - table.add_column("Title", max_width=50) - for s in sorted(wg_drafts, key=lambda x: x["time"] or ""): - table.add_row(s["time"][:10] if s["time"] else "", s["name"], s["title"][:50]) - console.print(table) - else: - # Show WG summary - table = Table(title=f"Working Groups with AI/Agent Drafts ({len(wg_groups)} WGs)") - table.add_column("#", justify="right", width=4) - table.add_column("WG", style="cyan", width=12) - table.add_column("Drafts", justify="right", width=6) - table.add_column("Draft Names", max_width=60) - - for rank, (wg_name, drafts) in enumerate( - sorted(wg_groups.items(), key=lambda x: -len(x[1])), 1 - ): - draft_list = ", ".join(d["name"].replace("draft-ietf-", "")[:30] for d in drafts[:4]) - if len(drafts) > 4: - draft_list += f" +{len(drafts) - 4}" - table.add_row(str(rank), wg_name, str(len(drafts)), draft_list) - - console.print(table) - - # Score comparison - pairs = db.drafts_with_ratings(limit=500) - if pairs: - adopted_names = {s["name"] for s in adopted} - adopted_scores = [r.composite_score for d, r in pairs if d.name in adopted_names] - individual_scores = [r.composite_score for d, r in pairs if d.name not in adopted_names] - - if adopted_scores and individual_scores: - console.print(f"\n[bold]Score Comparison[/]:") - avg_adopted = sum(adopted_scores) / len(adopted_scores) - avg_individual = sum(individual_scores) / len(individual_scores) - console.print(f" WG-adopted avg score: [bold green]{avg_adopted:.2f}[/] ({len(adopted_scores)} rated)") - console.print(f" Individual avg score: [bold]{avg_individual:.2f}[/] ({len(individual_scores)} rated)") - - # Check gap coverage - gaps = db.all_gaps() - if gaps: - gap_cats = {g["category"].lower() for g in gaps} - adopted_cats: set[str] = set() - for d, r in pairs: - if d.name in adopted_names: - for c in r.categories: - adopted_cats.add(c.lower()) - - covered = gap_cats & adopted_cats - uncovered = gap_cats - adopted_cats - console.print(f"\n[bold]Gap Coverage by WG-Adopted Work[/]:") - console.print(f" Gap categories with WG backing: {len(covered)}") - if covered: - console.print(f" {', '.join(sorted(covered))}") - console.print(f" Gap categories without WG backing: {len(uncovered)}") - if uncovered: - console.print(f" [yellow]{', '.join(sorted(uncovered))}[/]") - - finally: - db.close() - - -# ── revisions ────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--org", "-o", help="Filter to a specific organization") -@click.option("--top", "-n", default=20, help="Number of orgs to show") -def revisions(org: str | None, top: int): - """Analyze draft revision velocity — who iterates vs fire-and-forget.""" - from collections import defaultdict - from .orgs import normalize_org - - cfg = _get_config() - db = Database(cfg) - - try: - all_revs = db.revision_velocity() - total = len(all_revs) - at_00 = sum(1 for r in all_revs if r["rev_int"] == 0) - avg_rev = sum(r["rev_int"] for r in all_revs) / total if total else 0 - - console.print(f"\n[bold]Draft Revision Velocity[/]: {total} drafts\n") - console.print(f" Average revision: [bold]{avg_rev:.2f}[/]") - console.print(f" At -00 (first draft): [bold]{at_00}[/] ({at_00/total*100:.1f}%)") - console.print(f" Iterated (rev >= 01): [bold]{total - at_00}[/] ({(total-at_00)/total*100:.1f}%)") - console.print(f" Highly iterated (rev >= 05): [bold]{sum(1 for r in all_revs if r['rev_int'] >= 5)}[/]\n") - - # Get per-org stats using normalized org names - aff_rows = db.draft_affiliation_pairs() - - # Map draft -> rev - draft_rev = {r["name"]: r["rev_int"] for r in all_revs} - - # Group drafts by normalized org (deduped) - org_drafts: dict[str, set[str]] = defaultdict(set) - for draft_name, affiliation in aff_rows: - norm = normalize_org(affiliation) - if norm: - org_drafts[norm].add(draft_name) - - if org: - # Show drafts for a specific org - drafts = org_drafts.get(org, set()) - if not drafts: - console.print(f"[yellow]No drafts for '{org}'[/]") - return - table = Table(title=f"'{org}' Drafts by Revision ({len(drafts)})") - table.add_column("Rev", justify="right", width=4) - table.add_column("Name", style="cyan", max_width=50) - table.add_column("Title", max_width=40) - draft_details = [(d, draft_rev.get(d, 0)) for d in drafts] - for name, rev in sorted(draft_details, key=lambda x: -x[1]): - title_row = next((r["title"] for r in all_revs if r["name"] == name), "") - table.add_row(f"-{rev:02d}", name, title_row[:40]) - console.print(table) - else: - # Show org summary - org_stats = [] - for org_name, drafts in org_drafts.items(): - if len(drafts) < 3: - continue - revs = [draft_rev.get(d, 0) for d in drafts] - n_00 = sum(1 for r in revs if r == 0) - org_stats.append({ - "org": org_name, - "drafts": len(drafts), - "avg_rev": sum(revs) / len(revs), - "at_00": n_00, - "pct_00": n_00 / len(drafts) * 100, - "max_rev": max(revs), - }) - - org_stats.sort(key=lambda x: -x["drafts"]) - - table = Table(title=f"Revision Velocity by Organization (>= 3 drafts, top {top})") - table.add_column("#", justify="right", width=4) - table.add_column("Organization", style="cyan", width=28) - table.add_column("Drafts", justify="right", width=6) - table.add_column("Avg Rev", justify="right", width=8) - table.add_column("At -00", justify="right", width=6) - table.add_column("%-00", justify="right", width=6) - table.add_column("Max", justify="right", width=4) - - for rank, s in enumerate(org_stats[:top], 1): - table.add_row( - str(rank), s["org"][:28], str(s["drafts"]), - f"{s['avg_rev']:.2f}", str(s["at_00"]), - f"{s['pct_00']:.0f}%", str(s["max_rev"]), - ) - console.print(table) - - # Highlight the fire-and-forget vs iterators narrative - high_00 = [s for s in org_stats if s["pct_00"] >= 70 and s["drafts"] >= 5] - iterators = [s for s in org_stats if s["avg_rev"] >= 3.0 and s["drafts"] >= 3] - - if high_00: - console.print("\n[bold]Fire-and-Forget[/] (>= 70% at -00, >= 5 drafts):") - for s in high_00: - console.print(f" {s['org']}: {s['at_00']}/{s['drafts']} at -00 ({s['pct_00']:.0f}%)") - - if iterators: - console.print("\n[bold]Active Iterators[/] (avg revision >= 3.0):") - for s in iterators: - console.print(f" {s['org']}: avg rev {s['avg_rev']:.1f}, max -{s['max_rev']:02d}") - - # Generate report - from .reports import Reporter - reporter = Reporter(cfg, db) - path = reporter.revisions_report() - console.print(f"\nReport saved: [bold]{path}[/]") - - finally: - db.close() - - -# ── idea-overlap ──────────────────────────────────────────────────────── - - -@main.command("idea-overlap") -@click.option("--threshold", "-t", default=0.75, help="Title similarity threshold (0-1)") -@click.option("--limit", "-n", default=50, help="Max results to show") -def idea_overlap(threshold: float, limit: int): - """Find ideas that appear across multiple organizations.""" - from collections import defaultdict - from difflib import SequenceMatcher - from .orgs import normalize_org - - cfg = _get_config() - db = Database(cfg) - - try: - all_ideas = db.all_ideas() - if not all_ideas: - console.print("[yellow]No ideas extracted yet. Run `ietf ideas --all` first.[/]") - return - - # Build draft -> org mapping - draft_orgs: dict[str, set[str]] = defaultdict(set) - rows = db.conn.execute( - """SELECT da.draft_name, a.affiliation - FROM draft_authors da - JOIN authors a ON da.person_id = a.person_id - WHERE a.affiliation != ''""" - ).fetchall() - for r in rows: - org = normalize_org(r["affiliation"]) - if org and org != "Independent": - draft_orgs[r["draft_name"]].add(org) - - # Group similar ideas (same logic as ideas_report but tracking orgs) - idea_groups: list[dict] = [] - for idea in all_ideas: - title_lower = idea["title"].lower().strip() - matched = False - for group in idea_groups: - ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio() - if ratio >= threshold: - group["ideas"].append(idea) - group["drafts"].add(idea["draft_name"]) - group["orgs"].update(draft_orgs.get(idea["draft_name"], set())) - matched = True - break - if not matched: - idea_groups.append({ - "canonical": title_lower, - "title": idea["title"], - "ideas": [idea], - "drafts": {idea["draft_name"]}, - "orgs": set(draft_orgs.get(idea["draft_name"], set())), - }) - - # Filter to cross-org ideas (2+ orgs) - cross_org = [g for g in idea_groups if len(g["orgs"]) >= 2] - cross_org.sort(key=lambda g: (-len(g["orgs"]), -len(g["drafts"]))) - - console.print(f"\n[bold]Cross-Organization Idea Overlap[/]") - console.print(f"{len(all_ideas)} ideas, {len(idea_groups)} unique, " - f"[bold green]{len(cross_org)}[/] appear across 2+ orgs\n") - - if not cross_org: - console.print("[yellow]No cross-org idea overlap found at this threshold.[/]") - return - - table = Table(title=f"Ideas Shared Across Organizations (top {min(limit, len(cross_org))})") - table.add_column("#", justify="right", width=4) - table.add_column("Idea", style="bold", max_width=40) - table.add_column("Orgs", justify="right", width=5) - table.add_column("Drafts", justify="right", width=6) - table.add_column("Organizations", max_width=50) - - for rank, g in enumerate(cross_org[:limit], 1): - org_list = ", ".join(sorted(g["orgs"])[:5]) - if len(g["orgs"]) > 5: - org_list += f" +{len(g['orgs']) - 5}" - table.add_row( - str(rank), g["title"][:40], str(len(g["orgs"])), - str(len(g["drafts"])), org_list, - ) - - console.print(table) - - # Also generate the report - from .reports import Reporter - reporter = Reporter(cfg, db) - path = reporter.idea_overlap_report() - console.print(f"\nReport saved: [bold]{path}[/]") - - finally: - db.close() - - -# ── co-occurrence ────────────────────────────────────────────────────── - - -@main.command("co-occurrence") -def co_occurrence(): - """Category co-occurrence matrix — which categories appear together.""" - from collections import defaultdict - - cfg = _get_config() - db = Database(cfg) - - try: - pairs = db.drafts_with_ratings(limit=500) - total = len(pairs) - multi_cat = sum(1 for d, r in pairs if len(r.categories) > 1) - - console.print(f"\n[bold]Category Co-occurrence Analysis[/]: {total} drafts\n") - console.print(f" Multi-category drafts: [bold]{multi_cat}[/] ({multi_cat/total*100:.1f}%)\n") - - # Build counts - cat_counts: dict[str, int] = defaultdict(int) - cooccur: dict[tuple[str, str], int] = defaultdict(int) - for d, r in pairs: - for c in r.categories: - cat_counts[c] += 1 - for i, c1 in enumerate(r.categories): - for c2 in r.categories[i + 1:]: - key = tuple(sorted([c1, c2])) - cooccur[key] += 1 - - # Top co-occurrences - table = Table(title="Top 15 Category Co-occurrences") - table.add_column("#", justify="right", width=4) - table.add_column("Category A", style="cyan", width=22) - table.add_column("Category B", style="cyan", width=22) - table.add_column("Count", justify="right", width=6) - - top_pairs = sorted(cooccur.items(), key=lambda x: -x[1])[:15] - for rank, ((c1, c2), n) in enumerate(top_pairs, 1): - table.add_row(str(rank), c1, c2, str(n)) - console.print(table) - - # AI safety isolation check - safety_cooccur = {k: v for k, v in cooccur.items() if "AI safety/alignment" in k} - if safety_cooccur: - console.print("\n[bold]AI Safety/Alignment Co-occurrences[/]:") - for (c1, c2), n in sorted(safety_cooccur.items(), key=lambda x: -x[1]): - other = c2 if c1 == "AI safety/alignment" else c1 - console.print(f" {n:>3d} + {other}") - - # Generate report - from .reports import Reporter - reporter = Reporter(cfg, db) - path = reporter.co_occurrence_report() - console.print(f"\nReport saved: [bold]{path}[/]") - - finally: - db.close() - - -# ── centrality ───────────────────────────────────────────────────────── - - -@main.command() -@click.option("--top", "-n", default=20, help="Number of results to show") -def centrality(top: int): - """Author network centrality — bridge-builders and key connectors.""" - import networkx as nx - from collections import defaultdict - from .orgs import normalize_org - - cfg = _get_config() - db = Database(cfg) - - try: - # Build co-authorship graph - rows = db.conn.execute( - """SELECT da1.person_id as p1, da2.person_id as p2, COUNT(*) as shared - FROM draft_authors da1 - JOIN draft_authors da2 ON da1.draft_name = da2.draft_name - AND da1.person_id < da2.person_id - GROUP BY da1.person_id, da2.person_id""" - ).fetchall() - - G = nx.Graph() - for r in rows: - G.add_edge(r[0], r[1], weight=r[2]) - - persons = db.all_persons_info() - person_info = {pid: (name, normalize_org(aff)) for pid, name, aff in persons} - - console.print(f"\n[bold]Author Network Analysis[/]: {G.number_of_nodes()} authors, {G.number_of_edges()} co-authorship edges\n") - - # Cross-org vs intra-org edges - chinese_orgs = { - "Huawei", "China Mobile", "China Telecom", "China Unicom", - "ZTE Corporation", "Tsinghua University", "BUPT", - "Pengcheng Laboratory", "CAICT", "AsiaInfo", - "Zhongguancun Laboratory", "CNIC, CAS", - "Tsinghua Shenzhen International Graduate School & Pengcheng Laboratory", - "Huazhong University of Science and Technology", - } - cross_org = intra_org = cross_divide = 0 - for u, v in G.edges(): - _, org_u = person_info.get(u, ("?", "")) - _, org_v = person_info.get(v, ("?", "")) - if org_u and org_v: - if org_u == org_v: - intra_org += 1 - else: - cross_org += 1 - if (org_u in chinese_orgs) != (org_v in chinese_orgs): - cross_divide += 1 - - total_edges = cross_org + intra_org - console.print(f" Intra-org edges: [bold]{intra_org}[/] ({intra_org/total_edges*100:.1f}%)") - console.print(f" Cross-org edges: [bold]{cross_org}[/] ({cross_org/total_edges*100:.1f}%)") - console.print(f" Cross Chinese-Western edges: [bold]{cross_divide}[/]") - - avg_clustering = nx.average_clustering(G) - components = list(nx.connected_components(G)) - console.print(f" Clustering coefficient: [bold]{avg_clustering:.3f}[/]") - console.print(f" Connected components: [bold]{len(components)}[/], largest: {len(max(components, key=len))}\n") - - # Betweenness centrality - bc = nx.betweenness_centrality(G) - - table = Table(title=f"Top {top} Authors by Betweenness Centrality") - table.add_column("#", justify="right", width=4) - table.add_column("Author", style="bold", width=28) - table.add_column("Organization", style="cyan", width=20) - table.add_column("BC Score", justify="right", width=8) - table.add_column("Degree", justify="right", width=6) - table.add_column("CN/West", justify="center", width=8) - - top_bc = sorted(bc.items(), key=lambda x: -x[1])[:top] - for rank, (pid, score) in enumerate(top_bc, 1): - name, org = person_info.get(pid, ("?", "?")) - degree = G.degree(pid) - cn = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] in chinese_orgs) - west = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] not in chinese_orgs and person_info.get(n, ("", ""))[1]) - table.add_row(str(rank), name[:28], org[:20], f"{score:.4f}", str(degree), f"{cn}/{west}") - - console.print(table) - - # Bridge-builders - bridges = [] - for pid in G.nodes(): - name, org = person_info.get(pid, ("?", "")) - cn = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] in chinese_orgs) - west = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] not in chinese_orgs and person_info.get(n, ("", ""))[1]) - if cn > 0 and west > 0: - bridges.append((pid, name, org, bc.get(pid, 0), cn, west)) - - bridges.sort(key=lambda x: -x[3]) - console.print(f"\n[bold]Cross-Divide Bridge-Builders[/] ({len(bridges)} people with neighbors in both blocs):\n") - for pid, name, org, bc_score, cn, west in bridges[:10]: - console.print(f" [bold]{name}[/] ({org}): BC={bc_score:.4f}, CN neighbors={cn}, Western={west}") - - # Generate report - from .reports import Reporter - reporter = Reporter(cfg, db) - path = reporter.centrality_report() - console.print(f"\nReport saved: [bold]{path}[/]") - - finally: - db.close() - - -# ── draft-gen ─────────────────────────────────────────────────────────── - - -@main.command("draft-gen") -@click.argument("gap_topic") -@click.option("--output", "-o", help="Output file path") -def draft_gen(gap_topic: str, output: str | None): - """Generate an Internet-Draft addressing a landscape gap.""" - from .draftgen import DraftGenerator - from .analyzer import Analyzer - - cfg = _get_config() - db = Database(cfg) - analyzer = Analyzer(cfg, db) - generator = DraftGenerator(cfg, db, analyzer) - - try: - out_path = output or str(Path(cfg.data_dir) / "reports" / "generated-draft.txt") - console.print(f"Generating Internet-Draft on: [bold]{gap_topic}[/]") - path = generator.generate(gap_topic, output_path=out_path) - console.print(f"\nDraft saved: [bold green]{path}[/]") - finally: - db.close() - - -# ── proposal intake ────────────────────────────────────────────────────────── - - -@main.command("intake") -@click.argument("input_text", required=False) -@click.option("--file", "-f", type=click.Path(exists=True), help="Read input from a file") -@click.option("--dry-run", is_flag=True, help="Parse and show proposals without storing") -def intake(input_text: str | None, file: str | None, dry_run: bool): - """Generate draft proposals from text/URLs. - - Paste article text, URLs, or notes. Claude analyzes against all gaps - and generates structured IETF draft proposals automatically. - - Examples: - - ietf intake "https://arxiv.org/abs/2503.18813" - - ietf intake -f notes.txt - - echo "interesting paper about agent security" | ietf intake - - """ - from .proposal_intake import ProposalIntake - - if input_text == "-": - import sys - input_text = sys.stdin.read() - elif file: - input_text = Path(file).read_text() - elif not input_text: - # Interactive: read from stdin until EOF - console.print("[dim]Paste text/URLs, then Ctrl+D to submit:[/]") - import sys - input_text = sys.stdin.read() - - if not input_text or not input_text.strip(): - console.print("[red]No input provided.[/]") - raise SystemExit(1) - - cfg = _get_config() - db = Database(cfg) - try: - pipeline = ProposalIntake(cfg, db) - proposals, usage = pipeline.process(input_text, dry_run=dry_run) - - if proposals: - console.print(f"\n[bold green]{len(proposals)} proposal(s) generated[/]") - for p in proposals: - pid = p.get("id", "—") - gaps = ", ".join(f"#{g}" for g in p.get("gap_ids", [])) - console.print(f" [blue]#{pid}[/] {p['title']} [dim]gaps: {gaps}[/]") - if not dry_run: - console.print(f"\nView in web UI: [bold]http://localhost:5000/proposals[/]") - else: - console.print("[yellow]No proposals generated from this input.[/]") - finally: - db.close() - - -# ── config ─────────────────────────────────────────────────────────────────── - - -@main.command("config") -@click.option("--set", "set_key", nargs=2, help="Set a config key (e.g. --set claude_model claude-opus-4-20250514)") -@click.option("--show", is_flag=True, help="Show effective config with env var sources noted") -def config_cmd(set_key: tuple[str, str] | None, show: bool): - """Show or modify configuration.""" - from dataclasses import asdict - cfg = _get_config() - - if set_key: - key, value = set_key - if hasattr(cfg, key): - # Coerce types - current = getattr(cfg, key) - if isinstance(current, float): - value = float(value) - elif isinstance(current, int): - value = int(value) - elif isinstance(current, list): - import json - value = json.loads(value) - setattr(cfg, key, value) - cfg.save() - console.print(f"Set [bold]{key}[/] = {value}") - else: - console.print(f"[red]Unknown config key: {key}[/]") - else: - from dataclasses import asdict - env_sources = cfg.env_sources() - for key, val in asdict(cfg).items(): - source_note = "" - if key in env_sources: - source_note = f" [yellow](from ${env_sources[key]})[/]" - console.print(f" [bold]{key}:[/] {val}{source_note}") - if env_sources: - console.print(f"\n [dim]({len(env_sources)} value(s) overridden by environment variables)[/]") - # Note about ANTHROPIC_API_KEY - import os - if os.environ.get("ANTHROPIC_API_KEY"): - console.print(" [dim]ANTHROPIC_API_KEY is set in environment[/]") - else: - console.print(" [dim]ANTHROPIC_API_KEY is NOT set in environment[/]") - - -# ── pipeline ──────────────────────────────────────────────────────────────── - - -@main.group() -def pipeline(): - """Gap-to-Draft generation pipeline.""" - pass - - -@pipeline.command("context") -@click.argument("gap_topic") -def pipeline_context(gap_topic: str): - """Preview assembled context for a gap topic (dry run).""" - from .pipeline import ContextBuilder - - cfg = _get_config() - db = Database(cfg) - try: - builder = ContextBuilder(cfg, db) - ctx = builder.build_context(gap_topic) - console.print(f"\n[bold]Context for gap: {gap_topic}[/]\n") - - gap = ctx.get("gap") - if gap: - console.print(f"[cyan]Gap:[/] {gap.get('topic', '?')}") - console.print(f" {gap.get('description', '')[:200]}") - console.print(f" Severity: {gap.get('severity', '?')}") - - ideas = ctx.get("ideas", []) - console.print(f"\n[cyan]Convergent ideas:[/] {len(ideas)}") - for idea in ideas[:10]: - console.print(f" - {idea.get('title', '?')}: {idea.get('description', '')[:80]}") - - rfcs = ctx.get("rfc_foundations", []) - console.print(f"\n[cyan]RFC foundations:[/] {len(rfcs)}") - for ref_id, count in rfcs[:10]: - console.print(f" - RFC {ref_id} (cited by {count} drafts)") - - similar = ctx.get("similar_drafts", []) - console.print(f"\n[cyan]Similar existing drafts:[/] {len(similar)}") - for name, score in similar[:8]: - console.print(f" - {name} (similarity: {score:.3f})") - - top_rated = ctx.get("top_rated", []) - console.print(f"\n[cyan]Top-rated in category:[/] {len(top_rated)}") - - wg_ctx = ctx.get("wg_context", []) - adopted = [w for w in wg_ctx if w.get("wg_adopted")] - console.print(f"\n[cyan]WG context:[/] {len(adopted)} WG-adopted drafts") - - vision = ctx.get("ecosystem_vision", "") - if vision: - console.print(f"\n[cyan]Ecosystem vision:[/] {len(vision)} chars loaded") - finally: - db.close() - - -@pipeline.command("generate") -@click.argument("gap_topic") -@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)") -@click.option("--dry-run", is_flag=True, help="Show outline only, don't generate sections") -@click.option("--family", "family_name", default="", help="Family name for multi-draft generation") -def pipeline_generate(gap_topic: str, cheap: bool, dry_run: bool, family_name: str): - """Generate a single draft from a gap topic.""" - from .analyzer import Analyzer - from .pipeline import PipelineGenerator, ContextBuilder - - cfg = _get_config() - db = Database(cfg) - analyzer = Analyzer(cfg, db) - - try: - builder = ContextBuilder(cfg, db) - generator = PipelineGenerator(cfg, db, analyzer) - ctx = builder.build_context(gap_topic) - - console.print(f"[bold]Generating draft for gap: {gap_topic}[/]") - - outline = generator.generate_outline(ctx, cheap=cheap) - console.print(f" Title: [cyan]{outline.get('title', '?')}[/]") - console.print(f" Sections: {len(outline.get('sections', []))}") - console.print(f" Target WG: {outline.get('target_wg', '?')}") - - if dry_run: - import json - console.print("\n[bold]Outline (dry run):[/]") - console.print(json.dumps(outline, indent=2)) - return - - result = generator.generate_full(gap_topic, cheap=cheap) - console.print(f"\n[bold green]Draft generated![/]") - console.print(f" ID: {result.get('id', '?')}") - console.print(f" Draft name: {result.get('draft_name', '?')}") - - # Export text file - output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" - output_dir.mkdir(parents=True, exist_ok=True) - draft_name = result.get("draft_name", "draft-unknown") - out_path = output_dir / f"{draft_name}.txt" - if result.get("full_text"): - out_path.write_text(result["full_text"]) - console.print(f" Saved: {out_path}") - finally: - db.close() - - -@pipeline.command("family") -@click.option("--name", "family_name", default="agent-ecosystem", help="Family name") -@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)") -def pipeline_family(family_name: str, cheap: bool): - """Generate the full 5-draft ecosystem family.""" - from .analyzer import Analyzer - from .pipeline import FamilyCoordinator - - cfg = _get_config() - db = Database(cfg) - analyzer = Analyzer(cfg, db) - - try: - coordinator = FamilyCoordinator(cfg, db, analyzer) - console.print(f"[bold]Generating draft family: {family_name}[/]\n") - results = coordinator.generate_family(family_name=family_name, cheap=cheap) - console.print(f"\n[bold green]Generated {len(results)} drafts![/]") - - # Export all - output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" - output_dir.mkdir(parents=True, exist_ok=True) - - for r in results: - draft_name = r.get("draft_name", "draft-unknown") - if r.get("full_text"): - out_path = output_dir / f"{draft_name}.txt" - out_path.write_text(r["full_text"]) - console.print(f" [green]{r.get('family_role', '?')}[/] → {out_path}") - - # Family summary - summary_path = output_dir / "family-summary.md" - lines = [f"# Draft Family: {family_name}\n"] - for r in results: - lines.append(f"## {r.get('family_role', '?')}: {r.get('title', '?')}") - lines.append(f"- Draft: `{r.get('draft_name', '?')}`") - lines.append(f"- Gap: {r.get('gap_topic', '?')}") - lines.append(f"- Sections: {len(r.get('sections', []))}") - lines.append("") - summary_path.write_text("\n".join(lines)) - console.print(f"\n Summary: {summary_path}") - - # Consistency check - consistency = coordinator.check_consistency(family_name) - if consistency.get("issues"): - console.print(f"\n[yellow]Consistency issues:[/]") - for issue in consistency["issues"]: - console.print(f" - {issue}") - else: - console.print(f"\n[green]No consistency issues found[/]") - finally: - db.close() - - -@pipeline.command("quality") -@click.argument("draft_id", type=int) -def pipeline_quality(draft_id: int): - """Run quality gates on a generated draft.""" - from .analyzer import Analyzer - from .pipeline import QualityGates - - cfg = _get_config() - db = Database(cfg) - analyzer = Analyzer(cfg, db) - - try: - gates = QualityGates(cfg, db, analyzer) - console.print(f"[bold]Running quality gates on draft #{draft_id}[/]\n") - results = gates.run_all(draft_id) - - for gate_name, result in results.items(): - status = "[green]PASS[/]" if result["passed"] else "[red]FAIL[/]" - console.print(f" {status} {gate_name}: {result.get('details', '')[:100]}") - if "score" in result: - console.print(f" Score: {result['score']:.2f}") - finally: - db.close() - - -@pipeline.command("status") -def pipeline_status(): - """Show pipeline health: processing stages, generated drafts, and API cost.""" - cfg = _get_config() - db = Database(cfg) - try: - # Pipeline health overview - total = db.count_drafts() - rated_count = len(db.drafts_with_ratings(limit=10000)) - unrated = len(db.unrated_drafts(limit=10000)) - unembedded = len(db.drafts_without_embeddings(limit=10000)) - embedded_count = total - unembedded - no_ideas = len(db.drafts_without_ideas(limit=10000)) - ideas_count = total - no_ideas - idea_total = db.idea_count() - gap_count = len(db.all_gaps()) - input_tok, output_tok = db.total_tokens_used() - est_cost = (input_tok * 3.0 / 1_000_000) + (output_tok * 15.0 / 1_000_000) - - # Last update - snapshots = db.get_snapshots(limit=1) - last_update = snapshots[0]["snapshot_at"][:19] if snapshots else "never" - - console.print("\n[bold]Pipeline Status[/]\n") - console.print(f" Total documents: [bold]{total}[/]") - console.print(f" Last update: {last_update}") - console.print() - - # Stage table - stage_table = Table(title="Processing Stages") - stage_table.add_column("Stage", width=20) - stage_table.add_column("Done", justify="right", width=8) - stage_table.add_column("Missing", justify="right", width=8) - stage_table.add_column("Progress", width=20) - - def bar(done, total_n): - pct = int(done / total_n * 100) if total_n > 0 else 0 - filled = pct // 5 - return f"[green]{'#' * filled}[/][dim]{'.' * (20 - filled)}[/] {pct}%" - - stage_table.add_row("Rated", str(rated_count), str(unrated), bar(rated_count, total)) - stage_table.add_row("Embedded", str(embedded_count), str(unembedded), bar(embedded_count, total)) - stage_table.add_row("Ideas extracted", str(ideas_count), str(no_ideas), bar(ideas_count, total)) - - console.print(stage_table) - - console.print(f"\n Total ideas: [bold]{idea_total}[/]") - console.print(f" Gaps identified: [bold]{gap_count}[/]") - console.print(f"\n API tokens: {input_tok:,} in + {output_tok:,} out") - console.print(f" Estimated cost: [bold]${est_cost:.2f}[/]") - - # Generated drafts - gen_drafts = db.get_generated_drafts() - if gen_drafts: - console.print() - table = Table(title=f"Generated Drafts ({len(gen_drafts)})") - table.add_column("ID", justify="right", width=4) - table.add_column("Draft Name", style="cyan") - table.add_column("Gap Topic") - table.add_column("Family", width=15) - table.add_column("Status", width=10) - table.add_column("Quality", justify="right", width=7) - table.add_column("Created", width=10) - - for d in gen_drafts: - table.add_row( - str(d["id"]), - d["draft_name"], - d["gap_topic"][:30], - d.get("family_name", ""), - d.get("status", "?"), - f"{d.get('quality_score', 0):.1f}" if d.get("quality_score") else "-", - (d.get("created_at") or "")[:10], - ) - console.print(table) - finally: - db.close() - - -@pipeline.command("export") -@click.argument("draft_id", type=int) -@click.option("--output", "-o", help="Output file path") -def pipeline_export(draft_id: int, output: str | None): - """Export a generated draft as I-D text.""" - cfg = _get_config() - db = Database(cfg) - try: - draft = db.get_generated_draft(draft_id) - if not draft: - console.print(f"[red]Draft #{draft_id} not found[/]") - return - - text = draft.get("full_text", "") - if not text: - console.print(f"[red]Draft #{draft_id} has no generated text[/]") - return - - if output: - out_path = Path(output) - else: - output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" - output_dir.mkdir(parents=True, exist_ok=True) - out_path = output_dir / f"{draft['draft_name']}.txt" - - out_path.write_text(text) - console.print(f"Exported: [bold green]{out_path}[/]") - finally: - db.close() - - -# ── observatory ───────────────────────────────────────────────────────────── - - -@main.group() -def observatory(): - """Living Standards Observatory — monitor AI standards across bodies.""" - pass - - -@observatory.command("update") -@click.option("--source", "-s", default=None, help="Comma-separated sources (e.g. ietf,w3c)") -@click.option("--full/--delta", default=False, help="Full refresh or delta only") -@click.option("--dry-run", is_flag=True, default=False, help="Show what would happen without making changes") -def observatory_update(source: str | None, full: bool, dry_run: bool): - """Fetch, analyze, and update the observatory.""" - from .observatory import Observatory - - cfg = _get_config() - db = Database(cfg) - - try: - if dry_run: - obs = Observatory(cfg, db) - else: - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - obs = Observatory(cfg, db, analyzer) - sources = source.split(",") if source else None - mode = "full" if full else "delta" - console.print(f"[bold]Observatory update[/] ({mode}{' [DRY RUN]' if dry_run else ''})") - result = obs.update(sources=sources, full=full, dry_run=dry_run) - - if not dry_run: - console.print(f"\n[bold green]Update complete![/]") - console.print(f" New docs: {result.get('new_docs', 0)}") - console.print(f" Analyzed: {result.get('analyzed', 0)}") - console.print(f" Embedded: {result.get('embedded', 0)}") - console.print(f" Ideas extracted: {result.get('ideas', 0)}") - if result.get("gaps_changed"): - console.print(f" Gaps re-analyzed: yes") - if result.get("errors"): - console.print(f"\n [yellow]Errors ({len(result['errors'])}):[/]") - for err in result["errors"]: - console.print(f" - {err}") - finally: - db.close() - - -@observatory.command("dashboard") -def observatory_dashboard(): - """Regenerate the static dashboard site.""" - from .dashboard import DashboardGenerator - - cfg = _get_config() - db = Database(cfg) - - try: - gen = DashboardGenerator(cfg, db) - path = gen.generate() - console.print(f"[bold green]Dashboard generated:[/] {path}") - console.print(f" Open: file://{path}/index.html") - finally: - db.close() - - -@observatory.command("status") -def observatory_status(): - """Show observatory status — doc counts, sources, last update.""" - from .observatory import Observatory - - cfg = _get_config() - db = Database(cfg) - - try: - obs = Observatory(cfg, db) - status = obs.status() - - console.print(f"\n[bold]Observatory Status[/]\n") - console.print(f" Total documents: [bold]{status.get('total_docs', 0)}[/]") - console.print(f" Unrated: {status.get('unrated', 0)}") - console.print(f" Unembedded: {status.get('unembedded', 0)}") - console.print(f" Gaps: {status.get('gaps', 0)}") - - sources = status.get("sources", {}) - if sources: - console.print(f"\n [bold]Sources:[/]") - for name, count in sources.items(): - console.print(f" {name}: {count} docs") - - last_update = status.get("last_update") - if last_update: - console.print(f"\n Last update: {last_update[:10]}") - console.print(f" Snapshots: {status.get('snapshots', 0)}") - finally: - db.close() - - -@observatory.command("snapshot") -def observatory_snapshot(): - """Record current state as a snapshot.""" - cfg = _get_config() - db = Database(cfg) - try: - snap_id = db.create_snapshot() - gaps = db.all_gaps() - if gaps: - db.record_gap_history(snap_id, gaps) - console.print(f"[bold green]Snapshot #{snap_id} created[/] ({db.count_drafts()} docs, {len(gaps)} gaps)") - finally: - db.close() - - -@observatory.command("diff") -@click.option("--since", help="Show changes since this date (YYYY-MM-DD)") -def observatory_diff(since: str | None): - """Show what changed since a date.""" - from .observatory import Observatory - - cfg = _get_config() - db = Database(cfg) - - try: - obs = Observatory(cfg, db) - result = obs.diff(since=since) - - console.print(f"\n[bold]Observatory Diff[/]") - if since: - console.print(f" Since: {result.get('since', since)}") - - new_docs = result.get("new_docs", []) - console.print(f" New documents: {result.get('new_doc_count', len(new_docs))}") - gap_changes = result.get("gap_changes", []) - console.print(f" Gap history entries: {len(gap_changes)}") - - if new_docs: - console.print(f"\n [bold]New documents:[/]") - for doc in new_docs[:20]: - d = dict(doc) if not isinstance(doc, dict) else doc - console.print(f" [{d.get('source', '?')}] {d.get('name', '?')}: {d.get('title', '')[:60]}") - finally: - db.close() - - -# ── monitor ───────────────────────────────────────────────────────────── - - -@main.group() -def monitor(): - """Monitor IETF Datatracker for new AI/agent drafts.""" - pass - - -@monitor.command("run") -@click.option("--analyze/--no-analyze", default=True, help="Analyze new drafts") -@click.option("--embed/--no-embed", default=True, help="Generate embeddings") -@click.option("--ideas/--no-ideas", default=True, help="Extract ideas") -def monitor_run(analyze, embed, ideas): - """Run one monitoring cycle: fetch -> analyze -> embed -> ideas.""" - from .analyzer import Analyzer - from .embeddings import Embedder - from .fetcher import Fetcher - - cfg = _get_config() - db = Database(cfg) - run_id = db.start_monitor_run() - stats = { - "new_drafts_found": 0, - "drafts_analyzed": 0, - "drafts_embedded": 0, - "ideas_extracted": 0, - } - - try: - console.print("[bold]Monitor run started[/]") - - # Determine since date from last successful run - last_run = db.get_last_successful_run() - since = last_run["completed_at"][:10] if last_run and last_run.get("completed_at") else cfg.fetch_since - console.print(f" Fetching drafts since: [cyan]{since}[/]") - - # Fetch new drafts - fetcher = Fetcher(cfg) - try: - existing_count = db.count_drafts() - drafts = fetcher.search_drafts(keywords=list(cfg.search_keywords), since=since) - for draft in drafts: - db.upsert_draft(draft) - - # Download text for any missing - missing_text = db.drafts_without_text() - if missing_text: - console.print(f" Downloading text for [bold]{len(missing_text)}[/] drafts...") - texts = fetcher.download_texts(missing_text) - for name, text in texts.items(): - draft = db.get_draft(name) - if draft: - draft.full_text = text - db.upsert_draft(draft) - finally: - fetcher.close() - - new_count = db.count_drafts() - existing_count - stats["new_drafts_found"] = max(new_count, 0) - console.print(f" New drafts found: [bold green]{stats['new_drafts_found']}[/]") - - # Analyze unrated drafts - if analyze: - unrated = db.unrated_drafts(limit=200) - if unrated: - console.print(f" Analyzing [bold]{len(unrated)}[/] unrated drafts...") - analyzer = Analyzer(cfg, db) - count = analyzer.rate_all_unrated(limit=200) - stats["drafts_analyzed"] = count - console.print(f" Analyzed: [bold green]{count}[/]") - - # Embed missing drafts - if embed: - missing_embed = db.drafts_without_embeddings(limit=500) - if missing_embed: - console.print(f" Embedding [bold]{len(missing_embed)}[/] drafts...") - embedder = Embedder(cfg, db) - count = embedder.embed_all_missing() - stats["drafts_embedded"] = count - console.print(f" Embedded: [bold green]{count}[/]") - - # Extract ideas - if ideas: - missing_ideas = db.drafts_without_ideas(limit=500) - if missing_ideas: - console.print(f" Extracting ideas from [bold]{len(missing_ideas)}[/] drafts...") - analyzer = Analyzer(cfg, db) - count = analyzer.extract_all_ideas(limit=500, batch_size=5, cheap=True) - stats["ideas_extracted"] = count - console.print(f" Ideas extracted from: [bold green]{count}[/] drafts") - - db.complete_monitor_run(run_id, stats) - console.print("\n[bold green]Monitor run completed successfully[/]") - - except Exception as e: - db.fail_monitor_run(run_id, str(e)) - console.print(f"\n[bold red]Monitor run failed:[/] {e}") - raise - finally: - db.close() - - -@monitor.command("status") -def monitor_status(): - """Show monitoring status and recent runs.""" - cfg = _get_config() - db = Database(cfg) - - try: - runs = db.get_monitor_runs(limit=20) - last = db.get_last_successful_run() - - # Unprocessed counts - unrated = len(db.unrated_drafts(limit=9999)) - unembedded = len(db.drafts_without_embeddings(limit=9999)) - no_ideas = len(db.drafts_without_ideas(limit=9999)) - - console.print("\n[bold]Monitor Status[/]\n") - - if last: - console.print(f" Last successful run: [green]{last['completed_at']}[/]") - console.print(f" Duration: {last['duration_seconds']:.1f}s") - console.print(f" New drafts: {last['new_drafts_found']}") - else: - console.print(" [yellow]No successful runs yet[/]") - - console.print(f"\n[bold]Unprocessed[/]") - console.print(f" Unrated: [{'yellow' if unrated > 0 else 'green'}]{unrated}[/]") - console.print(f" Unembedded: [{'yellow' if unembedded > 0 else 'green'}]{unembedded}[/]") - console.print(f" No ideas: [{'yellow' if no_ideas > 0 else 'green'}]{no_ideas}[/]") - - if runs: - console.print(f"\n[bold]Recent Runs[/] ({len(runs)} total)\n") - table = Table() - table.add_column("#", justify="right", width=4) - table.add_column("Started", width=20) - table.add_column("Duration", justify="right", width=8) - table.add_column("Status", width=10) - table.add_column("New", justify="right", width=5) - table.add_column("Analyzed", justify="right", width=8) - table.add_column("Embedded", justify="right", width=8) - table.add_column("Ideas", justify="right", width=6) - for r in runs: - status_style = {"completed": "green", "failed": "red", "running": "yellow"}.get(r["status"], "dim") - table.add_row( - str(r["id"]), - r["started_at"][:19] if r["started_at"] else "", - f"{r['duration_seconds']:.1f}s" if r["duration_seconds"] else "-", - f"[{status_style}]{r['status']}[/{status_style}]", - str(r["new_drafts_found"]), - str(r["drafts_analyzed"]), - str(r["drafts_embedded"]), - str(r["ideas_extracted"]), - ) - console.print(table) - finally: - db.close() - - -# ── export ────────────────────────────────────────────────────────────────── - - -@main.command() -@click.option("--type", "export_type", type=click.Choice(["drafts", "ideas", "gaps", "authors", "ratings"]), - required=True, help="Type of data to export") -@click.option("--format", "fmt", type=click.Choice(["json", "csv"]), default="json", help="Output format") -@click.option("--output", "-o", "output_file", type=click.Path(), default=None, - help="Output file (default: stdout)") -def export(export_type: str, fmt: str, output_file: str | None): - """Export data as JSON or CSV.""" - import csv as csv_mod - import io - import json - - cfg = _get_config() - db = Database(cfg) - - try: - rows: list[dict] = [] - - if export_type == "drafts": - drafts = db.list_drafts(limit=10000, order_by="name ASC") - for d in drafts: - rating = db.get_rating(d.name) - row = { - "name": d.name, - "title": d.title, - "rev": d.rev, - "date": d.date, - "pages": d.pages or 0, - "group": d.group or "", - } - if rating: - row["score"] = round(rating.composite_score, 2) - row["novelty"] = rating.novelty - row["maturity"] = rating.maturity - row["overlap"] = rating.overlap - row["momentum"] = rating.momentum - row["relevance"] = rating.relevance - row["categories"] = json.dumps(rating.categories) - row["summary"] = rating.summary - rows.append(row) - - elif export_type == "ideas": - ideas = db.all_ideas() - rows = ideas - - elif export_type == "gaps": - gaps = db.all_gaps() - rows = gaps - - elif export_type == "authors": - top = db.top_authors(limit=10000) - for name, aff, cnt, drafts_list in top: - rows.append({ - "name": name, - "affiliation": aff, - "draft_count": cnt, - "drafts": json.dumps(drafts_list), - }) - - elif export_type == "ratings": - pairs = db.drafts_with_ratings(limit=10000) - for draft, rating in pairs: - rows.append({ - "name": draft.name, - "title": draft.title, - "score": round(rating.composite_score, 2), - "novelty": rating.novelty, - "maturity": rating.maturity, - "overlap": rating.overlap, - "momentum": rating.momentum, - "relevance": rating.relevance, - "categories": json.dumps(rating.categories), - "summary": rating.summary, - }) - - if fmt == "json": - text = json.dumps(rows, indent=2, ensure_ascii=False) - else: - # CSV - if not rows: - text = "" - else: - si = io.StringIO() - writer = csv_mod.DictWriter(si, fieldnames=rows[0].keys()) - writer.writeheader() - for row in rows: - writer.writerow(row) - text = si.getvalue() - - if output_file: - Path(output_file).write_text(text, encoding="utf-8") - console.print(f"Exported [bold green]{len(rows)}[/] {export_type} to [cyan]{output_file}[/] ({fmt})") - else: - click.echo(text) - - finally: - db.close() - - -# ── auto ───────────────────────────────────────────────────────────────────── - - -@main.command("auto") -@click.option("--cost-limit", default=2.0, help="Auto-approve operations under this USD amount (default: $2)") -@click.option("--yes", "-y", is_flag=True, help="Skip all confirmation prompts") -@click.option("--dry-run", is_flag=True, help="Show what would be done without doing it") -@click.option("--source", "-s", default=None, help="Limit to specific source (ietf,w3c,etsi,iso,itu)") -def auto(cost_limit: float, yes: bool, dry_run: bool, source: str | None): - """Auto-heal: fetch, analyze, embed, extract ideas, and update gaps. - - Automatically processes all unrated, unembedded, and idea-less drafts - across all sources. Uses cheap models (Haiku) for bulk operations. - Operations estimated above --cost-limit require confirmation. - - Examples: - - ietf auto # run full pipeline, auto-approve under $2 - - ietf auto --dry-run # show plan without executing - - ietf auto -s iso # only process ISO drafts - - ietf auto --cost-limit 5 # raise approval threshold to $5 - - ietf auto -y # skip all prompts (for cron) - """ - cfg = Config.load() - db = Database(cfg) - - try: - _auto_heal(cfg, db, cost_limit=cost_limit, yes=yes, dry_run=dry_run, source_filter=source) - finally: - db.close() - - -def _estimate_cost(n_drafts: int, operation: str) -> float: - """Estimate USD cost for an operation. Conservative estimates.""" - # Haiku: ~$0.25/M input, ~$1.25/M output - # Sonnet: ~$3/M input, ~$15/M output - # Average draft abstract: ~500 tokens input, ~200 tokens output - costs = { - "analyze_cheap": n_drafts * 0.0005, # ~$0.50 per 1000 drafts (Haiku) - "analyze_quality": n_drafts * 0.005, # ~$5.00 per 1000 drafts (Sonnet) - "ideas_cheap": n_drafts * 0.001, # ~$1.00 per 1000 drafts (Haiku batch) - "ideas_quality": n_drafts * 0.008, # ~$8.00 per 1000 drafts (Sonnet) - "gaps": 0.05, # single Claude call - "embed": 0.0, # Ollama is free/local - "authors": 0.0, # Datatracker API is free - "fetch": 0.0, # Datatracker API is free - } - return costs.get(operation, 0.0) - - -def _auto_heal(cfg, db, cost_limit: float, yes: bool, dry_run: bool, source_filter: str | None): - """Run the full auto-heal pipeline.""" - import time as _time - - from rich.panel import Panel - - steps: list[dict] = [] - total_cost = 0.0 - - # ── Step 1: Fetch new drafts from all sources ── - sources = [source_filter] if source_filter else cfg.observatory_sources - steps.append({ - "name": f"Fetch new drafts from {', '.join(sources)}", - "sources": sources, - "cost": 0.0, - "action": "fetch", - }) - - # ── Step 2: Analyze unrated drafts ── - unrated = db.unrated_drafts(limit=10000) - if source_filter: - unrated = [d for d in unrated if (d.source or "ietf") == source_filter] - n_unrated = len(unrated) - analyze_cost = _estimate_cost(n_unrated, "analyze_cheap") - steps.append({ - "name": f"Analyze {n_unrated} unrated drafts (Haiku)", - "count": n_unrated, - "cost": analyze_cost, - "action": "analyze", - }) - total_cost += analyze_cost - - # ── Step 3: Fetch authors ── - missing_authors = db.conn.execute( - "SELECT COUNT(*) FROM drafts WHERE name NOT IN (SELECT DISTINCT draft_name FROM draft_authors)" - ).fetchone()[0] - steps.append({ - "name": f"Fetch authors for {missing_authors} drafts", - "count": missing_authors, - "cost": 0.0, - "action": "authors", - }) - - # ── Step 4: Embed missing drafts ── - missing_embed = db.drafts_without_embeddings(limit=10000) - if source_filter: - source_names = {row[0] for row in db.conn.execute( - "SELECT name FROM drafts WHERE source = ?", (source_filter,) - ).fetchall()} - missing_embed = [n for n in missing_embed if n in source_names] - n_embed = len(missing_embed) - steps.append({ - "name": f"Embed {n_embed} drafts (Ollama, free)", - "count": n_embed, - "cost": 0.0, - "action": "embed", - }) - - # ── Step 5: Extract ideas ── - missing_ideas = db.drafts_without_ideas(limit=10000) - if source_filter: - if not source_names: - source_names = {row[0] for row in db.conn.execute( - "SELECT name FROM drafts WHERE source = ?", (source_filter,) - ).fetchall()} - missing_ideas = [n for n in missing_ideas if n in source_names] - n_ideas = len(missing_ideas) - ideas_cost = _estimate_cost(n_ideas, "ideas_cheap") - steps.append({ - "name": f"Extract ideas from {n_ideas} drafts (Haiku)", - "count": n_ideas, - "cost": ideas_cost, - "action": "ideas", - }) - total_cost += ideas_cost - - # ── Step 6: Refresh gaps ── - gap_cost = _estimate_cost(0, "gaps") - steps.append({ - "name": "Refresh gap analysis", - "cost": gap_cost, - "action": "gaps", - }) - total_cost += gap_cost - - # ── Show plan ── - plan_lines = [] - for s in steps: - count = s.get("count", 1) - if count == 0: - plan_lines.append(f" [dim]SKIP[/] {s['name']}") - else: - cost_str = f" [yellow]~${s['cost']:.2f}[/]" if s["cost"] > 0 else "" - plan_lines.append(f" [green]RUN[/] {s['name']}{cost_str}") - - auto_approved = total_cost <= cost_limit - plan_lines.append(f"\n [bold]Estimated total cost: ${total_cost:.2f}[/]") - if auto_approved: - plan_lines.append(f" [green]Auto-approved (under ${cost_limit:.2f} limit)[/]") - else: - plan_lines.append(f" [yellow]Requires approval (over ${cost_limit:.2f} limit)[/]") - - console.print(Panel("\n".join(plan_lines), title="Auto-Heal Plan")) - - if dry_run: - console.print("[bold yellow]DRY RUN[/] — no changes made.") - return - - # ── Approval ── - if not auto_approved and not yes: - if not click.confirm(f"Estimated cost ${total_cost:.2f} exceeds ${cost_limit:.2f} limit. Proceed?"): - console.print("[yellow]Aborted.[/]") - return - - # ── Execute ── - start = _time.time() - - for step in steps: - action = step["action"] - count = step.get("count", 0) - - if action == "fetch": - console.print(f"\n[bold cyan]>>> Fetching from {step['sources']}...[/]") - from .sources import get_fetcher - from .observatory import _doc_to_draft - for src_name in step["sources"]: - try: - fetcher = get_fetcher(src_name, cfg) - before = db.count_drafts() - results = fetcher.search(keywords=cfg.search_keywords) - for doc in results: - db.upsert_draft(_doc_to_draft(doc)) - after = db.count_drafts() - new = after - before - console.print(f" [{src_name}] +{new} new drafts") - fetcher.close() - except Exception as e: - console.print(f" [{src_name}] [red]Error: {e}[/]") - - elif action == "analyze" and count > 0: - console.print(f"\n[bold cyan]>>> Analyzing {count} drafts (Haiku)...[/]") - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - orig_model = cfg.claude_model - cfg.claude_model = cfg.claude_model_cheap - try: - done = analyzer.rate_all_unrated(limit=count) - console.print(f" Analyzed [bold green]{done}[/] drafts") - finally: - cfg.claude_model = orig_model - - elif action == "authors" and count > 0: - console.print(f"\n[bold cyan]>>> Fetching authors for {count} drafts...[/]") - from .authors import AuthorNetwork - author_net = AuthorNetwork(cfg, db) - done = author_net.fetch_all_authors() - console.print(f" Fetched authors for [bold green]{done}[/] drafts") - - elif action == "embed" and count > 0: - console.print(f"\n[bold cyan]>>> Embedding {count} drafts (Ollama)...[/]") - from .embeddings import Embedder - with Embedder(cfg, db) as embedder: - done = embedder.embed_all_missing() - console.print(f" Embedded [bold green]{done}[/] drafts") - - elif action == "ideas" and count > 0: - console.print(f"\n[bold cyan]>>> Extracting ideas from {count} drafts (Haiku)...[/]") - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - done = analyzer.extract_all_ideas(limit=count, batch_size=5, cheap=True) - console.print(f" Extracted ideas from [bold green]{done}[/] drafts") - - elif action == "gaps": - console.print(f"\n[bold cyan]>>> Refreshing gap analysis...[/]") - from .analyzer import Analyzer - analyzer = Analyzer(cfg, db) - gaps = analyzer.gap_analysis() - if gaps: - console.print(f" Found [bold green]{len(gaps)}[/] gaps") - - elapsed = _time.time() - start - console.print(f"\n[bold green]Auto-heal complete![/] ({elapsed:.1f}s, ~${total_cost:.2f})") - - # Show final counts - total = db.count_drafts() - rated = db.rated_count() - embedded = db.conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0] - idea_count = db.idea_count(include_false_positives=True) - gap_count = db.gap_count() - console.print(f" Drafts: {total} | Rated: {rated} | Embedded: {embedded} | Ideas: {idea_count} | Gaps: {gap_count}") - - source_str = " | ".join(f"{s}: {c}" for s, c in db.source_counts()) - console.print(f" Sources: {source_str}") +# Register all command modules +from .commands import register_commands +register_commands(main) diff --git a/src/ietf_analyzer/commands/__init__.py b/src/ietf_analyzer/commands/__init__.py new file mode 100644 index 0000000..bb78505 --- /dev/null +++ b/src/ietf_analyzer/commands/__init__.py @@ -0,0 +1,12 @@ +"""CLI command modules — registers all commands with the main Click group.""" + +from . import fetch, analysis, reports, admin, proposals + + +def register_commands(main): + """Register all command modules with the main CLI group.""" + fetch.register(main) + analysis.register(main) + reports.register(main) + admin.register(main) + proposals.register(main) diff --git a/src/ietf_analyzer/commands/admin.py b/src/ietf_analyzer/commands/admin.py new file mode 100644 index 0000000..5fb9de1 --- /dev/null +++ b/src/ietf_analyzer/commands/admin.py @@ -0,0 +1,913 @@ +"""Config, pipeline, observatory, monitor, and auto-heal commands.""" + +from __future__ import annotations + +from pathlib import Path + +import click +from rich.table import Table + +from .common import console, pass_cfg_db, _get_config +from ..config import Config +from ..db import Database + + +def register(main): + """Register all admin commands with the main CLI group.""" + main.add_command(config_cmd) + main.add_command(pipeline) + main.add_command(observatory) + main.add_command(monitor) + main.add_command(auto) + + +# ── config ─────────────────────────────────────────────────────────────────── + + +@click.command("config") +@click.option("--set", "set_key", nargs=2, help="Set a config key (e.g. --set claude_model claude-opus-4-20250514)") +@click.option("--show", is_flag=True, help="Show effective config with env var sources noted") +def config_cmd(set_key: tuple[str, str] | None, show: bool): + """Show or modify configuration.""" + from dataclasses import asdict + cfg = _get_config() + + if set_key: + key, value = set_key + if hasattr(cfg, key): + # Coerce types + current = getattr(cfg, key) + if isinstance(current, float): + value = float(value) + elif isinstance(current, int): + value = int(value) + elif isinstance(current, list): + import json + value = json.loads(value) + setattr(cfg, key, value) + cfg.save() + console.print(f"Set [bold]{key}[/] = {value}") + else: + console.print(f"[red]Unknown config key: {key}[/]") + else: + from dataclasses import asdict + env_sources = cfg.env_sources() + for key, val in asdict(cfg).items(): + source_note = "" + if key in env_sources: + source_note = f" [yellow](from ${env_sources[key]})[/]" + console.print(f" [bold]{key}:[/] {val}{source_note}") + if env_sources: + console.print(f"\n [dim]({len(env_sources)} value(s) overridden by environment variables)[/]") + # Note about ANTHROPIC_API_KEY + import os + if os.environ.get("ANTHROPIC_API_KEY"): + console.print(" [dim]ANTHROPIC_API_KEY is set in environment[/]") + else: + console.print(" [dim]ANTHROPIC_API_KEY is NOT set in environment[/]") + + +# ── pipeline ──────────────────────────────────────────────────────────────── + + +@click.group() +def pipeline(): + """Gap-to-Draft generation pipeline.""" + pass + + +@pipeline.command("context") +@click.argument("gap_topic") +def pipeline_context(gap_topic: str): + """Preview assembled context for a gap topic (dry run).""" + from ..pipeline import ContextBuilder + + cfg = _get_config() + db = Database(cfg) + try: + builder = ContextBuilder(cfg, db) + ctx = builder.build_context(gap_topic) + console.print(f"\n[bold]Context for gap: {gap_topic}[/]\n") + + gap = ctx.get("gap") + if gap: + console.print(f"[cyan]Gap:[/] {gap.get('topic', '?')}") + console.print(f" {gap.get('description', '')[:200]}") + console.print(f" Severity: {gap.get('severity', '?')}") + + ideas = ctx.get("ideas", []) + console.print(f"\n[cyan]Convergent ideas:[/] {len(ideas)}") + for idea in ideas[:10]: + console.print(f" - {idea.get('title', '?')}: {idea.get('description', '')[:80]}") + + rfcs = ctx.get("rfc_foundations", []) + console.print(f"\n[cyan]RFC foundations:[/] {len(rfcs)}") + for ref_id, count in rfcs[:10]: + console.print(f" - RFC {ref_id} (cited by {count} drafts)") + + similar = ctx.get("similar_drafts", []) + console.print(f"\n[cyan]Similar existing drafts:[/] {len(similar)}") + for name, score in similar[:8]: + console.print(f" - {name} (similarity: {score:.3f})") + + top_rated = ctx.get("top_rated", []) + console.print(f"\n[cyan]Top-rated in category:[/] {len(top_rated)}") + + wg_ctx = ctx.get("wg_context", []) + adopted = [w for w in wg_ctx if w.get("wg_adopted")] + console.print(f"\n[cyan]WG context:[/] {len(adopted)} WG-adopted drafts") + + vision = ctx.get("ecosystem_vision", "") + if vision: + console.print(f"\n[cyan]Ecosystem vision:[/] {len(vision)} chars loaded") + finally: + db.close() + + +@pipeline.command("generate") +@click.argument("gap_topic") +@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)") +@click.option("--dry-run", is_flag=True, help="Show outline only, don't generate sections") +@click.option("--family", "family_name", default="", help="Family name for multi-draft generation") +def pipeline_generate(gap_topic: str, cheap: bool, dry_run: bool, family_name: str): + """Generate a single draft from a gap topic.""" + from ..analyzer import Analyzer + from ..pipeline import PipelineGenerator, ContextBuilder + + cfg = _get_config() + db = Database(cfg) + analyzer = Analyzer(cfg, db) + + try: + builder = ContextBuilder(cfg, db) + generator = PipelineGenerator(cfg, db, analyzer) + ctx = builder.build_context(gap_topic) + + console.print(f"[bold]Generating draft for gap: {gap_topic}[/]") + + outline = generator.generate_outline(ctx, cheap=cheap) + console.print(f" Title: [cyan]{outline.get('title', '?')}[/]") + console.print(f" Sections: {len(outline.get('sections', []))}") + console.print(f" Target WG: {outline.get('target_wg', '?')}") + + if dry_run: + import json + console.print("\n[bold]Outline (dry run):[/]") + console.print(json.dumps(outline, indent=2)) + return + + result = generator.generate_full(gap_topic, cheap=cheap) + console.print(f"\n[bold green]Draft generated![/]") + console.print(f" ID: {result.get('id', '?')}") + console.print(f" Draft name: {result.get('draft_name', '?')}") + + # Export text file + output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" + output_dir.mkdir(parents=True, exist_ok=True) + draft_name = result.get("draft_name", "draft-unknown") + out_path = output_dir / f"{draft_name}.txt" + if result.get("full_text"): + out_path.write_text(result["full_text"]) + console.print(f" Saved: {out_path}") + finally: + db.close() + + +@pipeline.command("family") +@click.option("--name", "family_name", default="agent-ecosystem", help="Family name") +@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)") +def pipeline_family(family_name: str, cheap: bool): + """Generate the full 5-draft ecosystem family.""" + from ..analyzer import Analyzer + from ..pipeline import FamilyCoordinator + + cfg = _get_config() + db = Database(cfg) + analyzer = Analyzer(cfg, db) + + try: + coordinator = FamilyCoordinator(cfg, db, analyzer) + console.print(f"[bold]Generating draft family: {family_name}[/]\n") + results = coordinator.generate_family(family_name=family_name, cheap=cheap) + console.print(f"\n[bold green]Generated {len(results)} drafts![/]") + + # Export all + output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" + output_dir.mkdir(parents=True, exist_ok=True) + + for r in results: + draft_name = r.get("draft_name", "draft-unknown") + if r.get("full_text"): + out_path = output_dir / f"{draft_name}.txt" + out_path.write_text(r["full_text"]) + console.print(f" [green]{r.get('family_role', '?')}[/] -> {out_path}") + + # Family summary + summary_path = output_dir / "family-summary.md" + lines = [f"# Draft Family: {family_name}\n"] + for r in results: + lines.append(f"## {r.get('family_role', '?')}: {r.get('title', '?')}") + lines.append(f"- Draft: `{r.get('draft_name', '?')}`") + lines.append(f"- Gap: {r.get('gap_topic', '?')}") + lines.append(f"- Sections: {len(r.get('sections', []))}") + lines.append("") + summary_path.write_text("\n".join(lines)) + console.print(f"\n Summary: {summary_path}") + + # Consistency check + consistency = coordinator.check_consistency(family_name) + if consistency.get("issues"): + console.print(f"\n[yellow]Consistency issues:[/]") + for issue in consistency["issues"]: + console.print(f" - {issue}") + else: + console.print(f"\n[green]No consistency issues found[/]") + finally: + db.close() + + +@pipeline.command("quality") +@click.argument("draft_id", type=int) +def pipeline_quality(draft_id: int): + """Run quality gates on a generated draft.""" + from ..analyzer import Analyzer + from ..pipeline import QualityGates + + cfg = _get_config() + db = Database(cfg) + analyzer = Analyzer(cfg, db) + + try: + gates = QualityGates(cfg, db, analyzer) + console.print(f"[bold]Running quality gates on draft #{draft_id}[/]\n") + results = gates.run_all(draft_id) + + for gate_name, result in results.items(): + status = "[green]PASS[/]" if result["passed"] else "[red]FAIL[/]" + console.print(f" {status} {gate_name}: {result.get('details', '')[:100]}") + if "score" in result: + console.print(f" Score: {result['score']:.2f}") + finally: + db.close() + + +@pipeline.command("status") +def pipeline_status(): + """Show pipeline health: processing stages, generated drafts, and API cost.""" + cfg = _get_config() + db = Database(cfg) + try: + # Pipeline health overview + total = db.count_drafts() + rated_count = len(db.drafts_with_ratings(limit=10000)) + unrated = len(db.unrated_drafts(limit=10000)) + unembedded = len(db.drafts_without_embeddings(limit=10000)) + embedded_count = total - unembedded + no_ideas = len(db.drafts_without_ideas(limit=10000)) + ideas_count = total - no_ideas + idea_total = db.idea_count() + gap_count = len(db.all_gaps()) + input_tok, output_tok = db.total_tokens_used() + est_cost = (input_tok * 3.0 / 1_000_000) + (output_tok * 15.0 / 1_000_000) + + # Last update + snapshots = db.get_snapshots(limit=1) + last_update = snapshots[0]["snapshot_at"][:19] if snapshots else "never" + + console.print("\n[bold]Pipeline Status[/]\n") + console.print(f" Total documents: [bold]{total}[/]") + console.print(f" Last update: {last_update}") + console.print() + + # Stage table + stage_table = Table(title="Processing Stages") + stage_table.add_column("Stage", width=20) + stage_table.add_column("Done", justify="right", width=8) + stage_table.add_column("Missing", justify="right", width=8) + stage_table.add_column("Progress", width=20) + + def bar(done, total_n): + pct = int(done / total_n * 100) if total_n > 0 else 0 + filled = pct // 5 + return f"[green]{'#' * filled}[/][dim]{'.' * (20 - filled)}[/] {pct}%" + + stage_table.add_row("Rated", str(rated_count), str(unrated), bar(rated_count, total)) + stage_table.add_row("Embedded", str(embedded_count), str(unembedded), bar(embedded_count, total)) + stage_table.add_row("Ideas extracted", str(ideas_count), str(no_ideas), bar(ideas_count, total)) + + console.print(stage_table) + + console.print(f"\n Total ideas: [bold]{idea_total}[/]") + console.print(f" Gaps identified: [bold]{gap_count}[/]") + console.print(f"\n API tokens: {input_tok:,} in + {output_tok:,} out") + console.print(f" Estimated cost: [bold]${est_cost:.2f}[/]") + + # Generated drafts + gen_drafts = db.get_generated_drafts() + if gen_drafts: + console.print() + table = Table(title=f"Generated Drafts ({len(gen_drafts)})") + table.add_column("ID", justify="right", width=4) + table.add_column("Draft Name", style="cyan") + table.add_column("Gap Topic") + table.add_column("Family", width=15) + table.add_column("Status", width=10) + table.add_column("Quality", justify="right", width=7) + table.add_column("Created", width=10) + + for d in gen_drafts: + table.add_row( + str(d["id"]), + d["draft_name"], + d["gap_topic"][:30], + d.get("family_name", ""), + d.get("status", "?"), + f"{d.get('quality_score', 0):.1f}" if d.get("quality_score") else "-", + (d.get("created_at") or "")[:10], + ) + console.print(table) + finally: + db.close() + + +@pipeline.command("export") +@click.argument("draft_id", type=int) +@click.option("--output", "-o", help="Output file path") +def pipeline_export(draft_id: int, output: str | None): + """Export a generated draft as I-D text.""" + cfg = _get_config() + db = Database(cfg) + try: + draft = db.get_generated_draft(draft_id) + if not draft: + console.print(f"[red]Draft #{draft_id} not found[/]") + return + + text = draft.get("full_text", "") + if not text: + console.print(f"[red]Draft #{draft_id} has no generated text[/]") + return + + if output: + out_path = Path(output) + else: + output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts" + output_dir.mkdir(parents=True, exist_ok=True) + out_path = output_dir / f"{draft['draft_name']}.txt" + + out_path.write_text(text) + console.print(f"Exported: [bold green]{out_path}[/]") + finally: + db.close() + + +# ── observatory ───────────────────────────────────────────────────────────── + + +@click.group() +def observatory(): + """Living Standards Observatory — monitor AI standards across bodies.""" + pass + + +@observatory.command("update") +@click.option("--source", "-s", default=None, help="Comma-separated sources (e.g. ietf,w3c)") +@click.option("--full/--delta", default=False, help="Full refresh or delta only") +@click.option("--dry-run", is_flag=True, default=False, help="Show what would happen without making changes") +def observatory_update(source: str | None, full: bool, dry_run: bool): + """Fetch, analyze, and update the observatory.""" + from ..observatory import Observatory + + cfg = _get_config() + db = Database(cfg) + + try: + if dry_run: + obs = Observatory(cfg, db) + else: + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + obs = Observatory(cfg, db, analyzer) + sources = source.split(",") if source else None + mode = "full" if full else "delta" + console.print(f"[bold]Observatory update[/] ({mode}{' [DRY RUN]' if dry_run else ''})") + result = obs.update(sources=sources, full=full, dry_run=dry_run) + + if not dry_run: + console.print(f"\n[bold green]Update complete![/]") + console.print(f" New docs: {result.get('new_docs', 0)}") + console.print(f" Analyzed: {result.get('analyzed', 0)}") + console.print(f" Embedded: {result.get('embedded', 0)}") + console.print(f" Ideas extracted: {result.get('ideas', 0)}") + if result.get("gaps_changed"): + console.print(f" Gaps re-analyzed: yes") + if result.get("errors"): + console.print(f"\n [yellow]Errors ({len(result['errors'])}):[/]") + for err in result["errors"]: + console.print(f" - {err}") + finally: + db.close() + + +@observatory.command("dashboard") +def observatory_dashboard(): + """Regenerate the static dashboard site.""" + from ..dashboard import DashboardGenerator + + cfg = _get_config() + db = Database(cfg) + + try: + gen = DashboardGenerator(cfg, db) + path = gen.generate() + console.print(f"[bold green]Dashboard generated:[/] {path}") + console.print(f" Open: file://{path}/index.html") + finally: + db.close() + + +@observatory.command("status") +def observatory_status(): + """Show observatory status — doc counts, sources, last update.""" + from ..observatory import Observatory + + cfg = _get_config() + db = Database(cfg) + + try: + obs = Observatory(cfg, db) + status = obs.status() + + console.print(f"\n[bold]Observatory Status[/]\n") + console.print(f" Total documents: [bold]{status.get('total_docs', 0)}[/]") + console.print(f" Unrated: {status.get('unrated', 0)}") + console.print(f" Unembedded: {status.get('unembedded', 0)}") + console.print(f" Gaps: {status.get('gaps', 0)}") + + sources = status.get("sources", {}) + if sources: + console.print(f"\n [bold]Sources:[/]") + for name, count in sources.items(): + console.print(f" {name}: {count} docs") + + last_update = status.get("last_update") + if last_update: + console.print(f"\n Last update: {last_update[:10]}") + console.print(f" Snapshots: {status.get('snapshots', 0)}") + finally: + db.close() + + +@observatory.command("snapshot") +def observatory_snapshot(): + """Record current state as a snapshot.""" + cfg = _get_config() + db = Database(cfg) + try: + snap_id = db.create_snapshot() + gaps = db.all_gaps() + if gaps: + db.record_gap_history(snap_id, gaps) + console.print(f"[bold green]Snapshot #{snap_id} created[/] ({db.count_drafts()} docs, {len(gaps)} gaps)") + finally: + db.close() + + +@observatory.command("diff") +@click.option("--since", help="Show changes since this date (YYYY-MM-DD)") +def observatory_diff(since: str | None): + """Show what changed since a date.""" + from ..observatory import Observatory + + cfg = _get_config() + db = Database(cfg) + + try: + obs = Observatory(cfg, db) + result = obs.diff(since=since) + + console.print(f"\n[bold]Observatory Diff[/]") + if since: + console.print(f" Since: {result.get('since', since)}") + + new_docs = result.get("new_docs", []) + console.print(f" New documents: {result.get('new_doc_count', len(new_docs))}") + gap_changes = result.get("gap_changes", []) + console.print(f" Gap history entries: {len(gap_changes)}") + + if new_docs: + console.print(f"\n [bold]New documents:[/]") + for doc in new_docs[:20]: + d = dict(doc) if not isinstance(doc, dict) else doc + console.print(f" [{d.get('source', '?')}] {d.get('name', '?')}: {d.get('title', '')[:60]}") + finally: + db.close() + + +# ── monitor ───────────────────────────────────────────────────────────── + + +@click.group() +def monitor(): + """Monitor IETF Datatracker for new AI/agent drafts.""" + pass + + +@monitor.command("run") +@click.option("--analyze/--no-analyze", default=True, help="Analyze new drafts") +@click.option("--embed/--no-embed", default=True, help="Generate embeddings") +@click.option("--ideas/--no-ideas", default=True, help="Extract ideas") +def monitor_run(analyze, embed, ideas): + """Run one monitoring cycle: fetch -> analyze -> embed -> ideas.""" + from ..analyzer import Analyzer + from ..embeddings import Embedder + from ..fetcher import Fetcher + + cfg = _get_config() + db = Database(cfg) + run_id = db.start_monitor_run() + stats = { + "new_drafts_found": 0, + "drafts_analyzed": 0, + "drafts_embedded": 0, + "ideas_extracted": 0, + } + + try: + console.print("[bold]Monitor run started[/]") + + # Determine since date from last successful run + last_run = db.get_last_successful_run() + since = last_run["completed_at"][:10] if last_run and last_run.get("completed_at") else cfg.fetch_since + console.print(f" Fetching drafts since: [cyan]{since}[/]") + + # Fetch new drafts + fetcher = Fetcher(cfg) + try: + existing_count = db.count_drafts() + drafts = fetcher.search_drafts(keywords=list(cfg.search_keywords), since=since) + for draft in drafts: + db.upsert_draft(draft) + + # Download text for any missing + missing_text = db.drafts_without_text() + if missing_text: + console.print(f" Downloading text for [bold]{len(missing_text)}[/] drafts...") + texts = fetcher.download_texts(missing_text) + for name, text in texts.items(): + draft = db.get_draft(name) + if draft: + draft.full_text = text + db.upsert_draft(draft) + finally: + fetcher.close() + + new_count = db.count_drafts() - existing_count + stats["new_drafts_found"] = max(new_count, 0) + console.print(f" New drafts found: [bold green]{stats['new_drafts_found']}[/]") + + # Analyze unrated drafts + if analyze: + unrated = db.unrated_drafts(limit=200) + if unrated: + console.print(f" Analyzing [bold]{len(unrated)}[/] unrated drafts...") + analyzer = Analyzer(cfg, db) + count = analyzer.rate_all_unrated(limit=200) + stats["drafts_analyzed"] = count + console.print(f" Analyzed: [bold green]{count}[/]") + + # Embed missing drafts + if embed: + missing_embed = db.drafts_without_embeddings(limit=500) + if missing_embed: + console.print(f" Embedding [bold]{len(missing_embed)}[/] drafts...") + embedder = Embedder(cfg, db) + count = embedder.embed_all_missing() + stats["drafts_embedded"] = count + console.print(f" Embedded: [bold green]{count}[/]") + + # Extract ideas + if ideas: + missing_ideas = db.drafts_without_ideas(limit=500) + if missing_ideas: + console.print(f" Extracting ideas from [bold]{len(missing_ideas)}[/] drafts...") + analyzer = Analyzer(cfg, db) + count = analyzer.extract_all_ideas(limit=500, batch_size=5, cheap=True) + stats["ideas_extracted"] = count + console.print(f" Ideas extracted from: [bold green]{count}[/] drafts") + + db.complete_monitor_run(run_id, stats) + console.print("\n[bold green]Monitor run completed successfully[/]") + + except Exception as e: + db.fail_monitor_run(run_id, str(e)) + console.print(f"\n[bold red]Monitor run failed:[/] {e}") + raise + finally: + db.close() + + +@monitor.command("status") +def monitor_status(): + """Show monitoring status and recent runs.""" + cfg = _get_config() + db = Database(cfg) + + try: + runs = db.get_monitor_runs(limit=20) + last = db.get_last_successful_run() + + # Unprocessed counts + unrated = len(db.unrated_drafts(limit=9999)) + unembedded = len(db.drafts_without_embeddings(limit=9999)) + no_ideas = len(db.drafts_without_ideas(limit=9999)) + + console.print("\n[bold]Monitor Status[/]\n") + + if last: + console.print(f" Last successful run: [green]{last['completed_at']}[/]") + console.print(f" Duration: {last['duration_seconds']:.1f}s") + console.print(f" New drafts: {last['new_drafts_found']}") + else: + console.print(" [yellow]No successful runs yet[/]") + + console.print(f"\n[bold]Unprocessed[/]") + console.print(f" Unrated: [{'yellow' if unrated > 0 else 'green'}]{unrated}[/]") + console.print(f" Unembedded: [{'yellow' if unembedded > 0 else 'green'}]{unembedded}[/]") + console.print(f" No ideas: [{'yellow' if no_ideas > 0 else 'green'}]{no_ideas}[/]") + + if runs: + console.print(f"\n[bold]Recent Runs[/] ({len(runs)} total)\n") + table = Table() + table.add_column("#", justify="right", width=4) + table.add_column("Started", width=20) + table.add_column("Duration", justify="right", width=8) + table.add_column("Status", width=10) + table.add_column("New", justify="right", width=5) + table.add_column("Analyzed", justify="right", width=8) + table.add_column("Embedded", justify="right", width=8) + table.add_column("Ideas", justify="right", width=6) + for r in runs: + status_style = {"completed": "green", "failed": "red", "running": "yellow"}.get(r["status"], "dim") + table.add_row( + str(r["id"]), + r["started_at"][:19] if r["started_at"] else "", + f"{r['duration_seconds']:.1f}s" if r["duration_seconds"] else "-", + f"[{status_style}]{r['status']}[/{status_style}]", + str(r["new_drafts_found"]), + str(r["drafts_analyzed"]), + str(r["drafts_embedded"]), + str(r["ideas_extracted"]), + ) + console.print(table) + finally: + db.close() + + +# ── auto ───────────────────────────────────────────────────────────────────── + + +@click.command("auto") +@click.option("--cost-limit", default=2.0, help="Auto-approve operations under this USD amount (default: $2)") +@click.option("--yes", "-y", is_flag=True, help="Skip all confirmation prompts") +@click.option("--dry-run", is_flag=True, help="Show what would be done without doing it") +@click.option("--source", "-s", default=None, help="Limit to specific source (ietf,w3c,etsi,iso,itu)") +def auto(cost_limit: float, yes: bool, dry_run: bool, source: str | None): + """Auto-heal: fetch, analyze, embed, extract ideas, and update gaps. + + Automatically processes all unrated, unembedded, and idea-less drafts + across all sources. Uses cheap models (Haiku) for bulk operations. + Operations estimated above --cost-limit require confirmation. + + Examples: + + ietf auto # run full pipeline, auto-approve under $2 + + ietf auto --dry-run # show plan without executing + + ietf auto -s iso # only process ISO drafts + + ietf auto --cost-limit 5 # raise approval threshold to $5 + + ietf auto -y # skip all prompts (for cron) + """ + cfg = Config.load() + db = Database(cfg) + + try: + _auto_heal(cfg, db, cost_limit=cost_limit, yes=yes, dry_run=dry_run, source_filter=source) + finally: + db.close() + + +def _estimate_cost(n_drafts: int, operation: str) -> float: + """Estimate USD cost for an operation. Conservative estimates.""" + costs = { + "analyze_cheap": n_drafts * 0.0005, + "analyze_quality": n_drafts * 0.005, + "ideas_cheap": n_drafts * 0.001, + "ideas_quality": n_drafts * 0.008, + "gaps": 0.05, + "embed": 0.0, + "authors": 0.0, + "fetch": 0.0, + } + return costs.get(operation, 0.0) + + +def _auto_heal(cfg, db, cost_limit: float, yes: bool, dry_run: bool, source_filter: str | None): + """Run the full auto-heal pipeline.""" + import time as _time + + from rich.panel import Panel + + steps: list[dict] = [] + total_cost = 0.0 + + # ── Step 1: Fetch new drafts from all sources ── + sources = [source_filter] if source_filter else cfg.observatory_sources + steps.append({ + "name": f"Fetch new drafts from {', '.join(sources)}", + "sources": sources, + "cost": 0.0, + "action": "fetch", + }) + + # ── Step 2: Analyze unrated drafts ── + unrated = db.unrated_drafts(limit=10000) + if source_filter: + unrated = [d for d in unrated if (d.source or "ietf") == source_filter] + n_unrated = len(unrated) + analyze_cost = _estimate_cost(n_unrated, "analyze_cheap") + steps.append({ + "name": f"Analyze {n_unrated} unrated drafts (Haiku)", + "count": n_unrated, + "cost": analyze_cost, + "action": "analyze", + }) + total_cost += analyze_cost + + # ── Step 3: Fetch authors ── + missing_authors = db.conn.execute( + "SELECT COUNT(*) FROM drafts WHERE name NOT IN (SELECT DISTINCT draft_name FROM draft_authors)" + ).fetchone()[0] + steps.append({ + "name": f"Fetch authors for {missing_authors} drafts", + "count": missing_authors, + "cost": 0.0, + "action": "authors", + }) + + # ── Step 4: Embed missing drafts ── + missing_embed = db.drafts_without_embeddings(limit=10000) + if source_filter: + source_names = {row[0] for row in db.conn.execute( + "SELECT name FROM drafts WHERE source = ?", (source_filter,) + ).fetchall()} + missing_embed = [n for n in missing_embed if n in source_names] + n_embed = len(missing_embed) + steps.append({ + "name": f"Embed {n_embed} drafts (Ollama, free)", + "count": n_embed, + "cost": 0.0, + "action": "embed", + }) + + # ── Step 5: Extract ideas ── + missing_ideas = db.drafts_without_ideas(limit=10000) + if source_filter: + if not source_names: + source_names = {row[0] for row in db.conn.execute( + "SELECT name FROM drafts WHERE source = ?", (source_filter,) + ).fetchall()} + missing_ideas = [n for n in missing_ideas if n in source_names] + n_ideas = len(missing_ideas) + ideas_cost = _estimate_cost(n_ideas, "ideas_cheap") + steps.append({ + "name": f"Extract ideas from {n_ideas} drafts (Haiku)", + "count": n_ideas, + "cost": ideas_cost, + "action": "ideas", + }) + total_cost += ideas_cost + + # ── Step 6: Refresh gaps ── + gap_cost = _estimate_cost(0, "gaps") + steps.append({ + "name": "Refresh gap analysis", + "cost": gap_cost, + "action": "gaps", + }) + total_cost += gap_cost + + # ── Show plan ── + plan_lines = [] + for s in steps: + count = s.get("count", 1) + if count == 0: + plan_lines.append(f" [dim]SKIP[/] {s['name']}") + else: + cost_str = f" [yellow]~${s['cost']:.2f}[/]" if s["cost"] > 0 else "" + plan_lines.append(f" [green]RUN[/] {s['name']}{cost_str}") + + auto_approved = total_cost <= cost_limit + plan_lines.append(f"\n [bold]Estimated total cost: ${total_cost:.2f}[/]") + if auto_approved: + plan_lines.append(f" [green]Auto-approved (under ${cost_limit:.2f} limit)[/]") + else: + plan_lines.append(f" [yellow]Requires approval (over ${cost_limit:.2f} limit)[/]") + + console.print(Panel("\n".join(plan_lines), title="Auto-Heal Plan")) + + if dry_run: + console.print("[bold yellow]DRY RUN[/] — no changes made.") + return + + # ── Approval ── + if not auto_approved and not yes: + if not click.confirm(f"Estimated cost ${total_cost:.2f} exceeds ${cost_limit:.2f} limit. Proceed?"): + console.print("[yellow]Aborted.[/]") + return + + # ── Execute ── + start = _time.time() + + for step in steps: + action = step["action"] + count = step.get("count", 0) + + if action == "fetch": + console.print(f"\n[bold cyan]>>> Fetching from {step['sources']}...[/]") + from ..sources import get_fetcher + from ..observatory import _doc_to_draft + for src_name in step["sources"]: + try: + fetcher = get_fetcher(src_name, cfg) + before = db.count_drafts() + results = fetcher.search(keywords=cfg.search_keywords) + for doc in results: + db.upsert_draft(_doc_to_draft(doc)) + after = db.count_drafts() + new = after - before + console.print(f" [{src_name}] +{new} new drafts") + fetcher.close() + except Exception as e: + console.print(f" [{src_name}] [red]Error: {e}[/]") + + elif action == "analyze" and count > 0: + console.print(f"\n[bold cyan]>>> Analyzing {count} drafts (Haiku)...[/]") + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + orig_model = cfg.claude_model + cfg.claude_model = cfg.claude_model_cheap + try: + done = analyzer.rate_all_unrated(limit=count) + console.print(f" Analyzed [bold green]{done}[/] drafts") + finally: + cfg.claude_model = orig_model + + elif action == "authors" and count > 0: + console.print(f"\n[bold cyan]>>> Fetching authors for {count} drafts...[/]") + from ..authors import AuthorNetwork + author_net = AuthorNetwork(cfg, db) + done = author_net.fetch_all_authors() + console.print(f" Fetched authors for [bold green]{done}[/] drafts") + + elif action == "embed" and count > 0: + console.print(f"\n[bold cyan]>>> Embedding {count} drafts (Ollama)...[/]") + from ..embeddings import Embedder + with Embedder(cfg, db) as embedder: + done = embedder.embed_all_missing() + console.print(f" Embedded [bold green]{done}[/] drafts") + + elif action == "ideas" and count > 0: + console.print(f"\n[bold cyan]>>> Extracting ideas from {count} drafts (Haiku)...[/]") + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + done = analyzer.extract_all_ideas(limit=count, batch_size=5, cheap=True) + console.print(f" Extracted ideas from [bold green]{done}[/] drafts") + + elif action == "gaps": + console.print(f"\n[bold cyan]>>> Refreshing gap analysis...[/]") + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + gaps = analyzer.gap_analysis() + if gaps: + console.print(f" Found [bold green]{len(gaps)}[/] gaps") + + elapsed = _time.time() - start + console.print(f"\n[bold green]Auto-heal complete![/] ({elapsed:.1f}s, ~${total_cost:.2f})") + + # Show final counts + total = db.count_drafts() + rated = db.conn.execute("SELECT COUNT(*) FROM ratings").fetchone()[0] + embedded = db.conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0] + idea_count = db.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0] + gap_count = db.conn.execute("SELECT COUNT(*) FROM gaps").fetchone()[0] + console.print(f" Drafts: {total} | Rated: {rated} | Embedded: {embedded} | Ideas: {idea_count} | Gaps: {gap_count}") + + by_source = db.conn.execute( + "SELECT source, COUNT(*) FROM drafts GROUP BY source ORDER BY COUNT(*) DESC" + ).fetchall() + source_str = " | ".join(f"{s}: {c}" for s, c in by_source) + console.print(f" Sources: {source_str}") diff --git a/src/ietf_analyzer/commands/analysis.py b/src/ietf_analyzer/commands/analysis.py new file mode 100644 index 0000000..8bd8139 --- /dev/null +++ b/src/ietf_analyzer/commands/analysis.py @@ -0,0 +1,1412 @@ +"""Analysis, embedding, ideas, gaps, refs, trends, and related commands.""" + +from __future__ import annotations + +from pathlib import Path + +import click +from rich.table import Table + +from .common import console, pass_cfg_db, _get_config +from ..config import Config +from ..db import Database + + +def register(main): + """Register all analysis commands with the main CLI group.""" + main.add_command(analyze) + main.add_command(ask) + main.add_command(compare) + main.add_command(embed) + main.add_command(embed_ideas) + main.add_command(similar) + main.add_command(clusters) + main.add_command(ideas) + main.add_command(dedup_ideas) + main.add_command(gaps) + main.add_command(refs) + main.add_command(trends) + main.add_command(status) + main.add_command(revisions) + main.add_command(idea_overlap) + main.add_command(co_occurrence) + main.add_command(centrality) + + +# ── analyze ────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("name", required=False) +@click.option("--all", "analyze_all", is_flag=True, help="Analyze all unrated drafts") +@click.option("--limit", "-n", default=50, help="Max drafts to analyze (with --all)") +@click.option("--retry-failed", is_flag=True, help="Re-analyze drafts that previously failed (clears cache)") +@click.option("--dry-run", is_flag=True, help="Show what would be analyzed without making changes") +@click.option("--pre-classify/--no-pre-classify", "pre_classify", default=False, + help="Pre-filter unrated drafts with local Ollama classifier before Claude") +@pass_cfg_db +def analyze(cfg, db, name: str | None, analyze_all: bool, limit: int, retry_failed: bool, dry_run: bool, pre_classify: bool): + """Analyze and rate drafts using Claude. + + Use --pre-classify to run the local Ollama classifier first, removing + irrelevant drafts before spending Claude tokens. Saves ~40% of API costs. + """ + from ..analyzer import Analyzer + + if dry_run: + if retry_failed: + unrated = db.unrated_drafts(limit=limit) + retryable = [] + for draft in unrated: + row = db.conn.execute( + "SELECT COUNT(*) FROM llm_cache WHERE draft_name = ?", + (draft.name,), + ).fetchone() + if row[0] > 0: + retryable.append(draft) + console.print(f"[bold yellow]DRY RUN[/]: Would retry [bold]{len(retryable)}[/] previously failed drafts") + for d in retryable[:20]: + console.print(f" - {d.name}") + if len(retryable) > 20: + console.print(f" ... and {len(retryable) - 20} more") + elif analyze_all: + unrated = db.unrated_drafts(limit=limit) + console.print(f"[bold yellow]DRY RUN[/]: Would analyze [bold]{len(unrated)}[/] unrated drafts") + for d in unrated[:20]: + console.print(f" - {d.name}: {d.title[:60]}") + if len(unrated) > 20: + console.print(f" ... and {len(unrated) - 20} more") + elif name: + existing = db.get_rating(name) + status = "re-analyze (already rated)" if existing else "analyze (not yet rated)" + console.print(f"[bold yellow]DRY RUN[/]: Would {status}: {name}") + else: + console.print("Provide a draft name or use --all") + return + + analyzer = Analyzer(cfg, db) + + if retry_failed: + # Find drafts that have cache entries but no ratings (failed analyses) + unrated = db.unrated_drafts(limit=limit) + retryable = [] + for draft in unrated: + # Check if there's a cache entry for this draft (it was attempted) + row = db.conn.execute( + "SELECT COUNT(*) FROM llm_cache WHERE draft_name = ?", + (draft.name,), + ).fetchone() + if row[0] > 0: + retryable.append(draft) + if not retryable: + console.print("No previously failed drafts to retry.") + else: + console.print(f"Retrying [bold]{len(retryable)}[/] previously failed drafts...") + count = 0 + for draft in retryable: + rating = analyzer.rate_draft(draft.name, use_cache=False) + if rating: + count += 1 + console.print(f"Successfully re-analyzed [bold green]{count}[/] of {len(retryable)} drafts") + elif analyze_all: + if pre_classify: + # Pre-filter with local Ollama classifier + try: + from ..classifier import Classifier + unrated = db.unrated_drafts(limit=limit) + if unrated: + console.print(f"\n[bold]Pre-classifying {len(unrated)} unrated drafts with Ollama...[/]") + clf = Classifier(cfg) + draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in unrated] + relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) + clf.close() + + if irrelevant: + console.print(f"\n Removing [red]{len(irrelevant)}[/] irrelevant drafts from DB...") + for d in irrelevant: + db.conn.execute("DELETE FROM drafts WHERE name = ?", (d["name"],)) + db.conn.commit() + console.print(f" Removed. {len(relevant)} drafts remain for Claude analysis.\n") + except Exception as e: + console.print(f"[yellow]Classifier unavailable ({e}), analyzing all[/yellow]") + + count = analyzer.rate_all_unrated(limit=limit) + console.print(f"Analyzed [bold green]{count}[/] drafts") + elif name: + rating = analyzer.rate_draft(name) + if rating: + console.print(f"\n[bold green]Rating for {name}:[/]") + console.print(f" Score: {rating.composite_score:.1f}") + console.print(f" Summary: {rating.summary}") + console.print(f" Novelty={rating.novelty} Maturity={rating.maturity} " + f"Overlap={rating.overlap} Momentum={rating.momentum} " + f"Relevance={rating.relevance}") + else: + console.print("[red]Analysis failed[/]") + else: + console.print("Provide a draft name or use --all") + + +# ── ask ────────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("question") +@click.option("--top", "-n", default=5, help="Number of source drafts to use") +@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") +@pass_cfg_db +def ask(cfg, db, question: str, top: int, cheap: bool): + """Ask a natural language question about the drafts. + + Examples: + ietf ask "Which drafts address agent authentication?" + ietf ask "What are the competing approaches to agent delegation?" --top 10 + ietf ask "How do safety mechanisms work?" --cheap + """ + from ..search import HybridSearch + + searcher = HybridSearch(cfg, db) + console.print(f"\n[dim]Searching for relevant drafts...[/]") + result = searcher.ask(question, top_k=top, cheap=cheap) + + # Display the answer + console.print() + console.print("[bold cyan]Answer[/]") + console.print("[dim]" + "-" * 60 + "[/]") + console.print(result["answer"]) + console.print() + + # Display source drafts table + if result["sources"]: + table = Table(title="Source Drafts") + table.add_column("#", style="dim", width=3) + table.add_column("Draft", style="cyan", max_width=50) + table.add_column("Title", max_width=45) + table.add_column("Match", width=10) + table.add_column("Score", justify="right", width=8) + + for i, src in enumerate(result["sources"], 1): + score_str = f"{src['similarity']:.3f}" if src.get("similarity") else "-" + table.add_row( + str(i), + src["name"], + src["title"][:45], + src.get("match_type", ""), + score_str, + ) + + console.print(table) + + +# ── compare ────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("names", nargs=-1, required=True) +@pass_cfg_db +def compare(cfg, db, names: tuple[str, ...]): + """Compare multiple drafts for overlap and unique contributions.""" + from ..analyzer import Analyzer + + analyzer = Analyzer(cfg, db) + + result = analyzer.compare_drafts(list(names)) + if "error" in result: + console.print(f"[red]{result['error']}[/]") + else: + console.print(f"\n[bold cyan]Comparison of {len(result['drafts'])} drafts[/]") + console.print("[dim]" + "-" * 60 + "[/]") + console.print(result["text"]) + + +# ── embed ──────────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--dry-run", is_flag=True, help="Show what would be embedded without making changes") +@pass_cfg_db +def embed(cfg, db, dry_run: bool): + """Generate embeddings for all drafts (requires Ollama).""" + if dry_run: + missing = db.drafts_without_embeddings(limit=10000) + console.print(f"[bold yellow]DRY RUN[/]: Would embed [bold]{len(missing)}[/] drafts") + for name in missing[:20]: + console.print(f" - {name}") + if len(missing) > 20: + console.print(f" ... and {len(missing) - 20} more") + return + + from ..embeddings import Embedder + + embedder = Embedder(cfg, db) + count = embedder.embed_all_missing() + console.print(f"Embedded [bold green]{count}[/] drafts") + + +# ── embed-ideas ────────────────────────────────────────────────────────────── + + +@click.command("embed-ideas") +@click.option("--limit", default=0, help="Max ideas to embed (0=all)") +@click.option("--batch-size", default=50, help="Batch size for Ollama") +@click.option("--dry-run", is_flag=True, help="Show what would be embedded without making changes") +@pass_cfg_db +def embed_ideas(cfg, db, limit: int, batch_size: int, dry_run: bool): + """Generate embeddings for extracted ideas via Ollama.""" + missing = db.ideas_without_embeddings(limit=limit if limit > 0 else 10000) + if not missing: + console.print("All ideas already have embeddings.") + return + + if dry_run: + console.print(f"[bold yellow]DRY RUN[/]: Would embed [bold]{len(missing)}[/] ideas in batches of {batch_size}") + for idea in missing[:20]: + console.print(f" - [{idea.get('id', '?')}] {idea['title'][:60]}") + if len(missing) > 20: + console.print(f" ... and {len(missing) - 20} more") + return + + import numpy as np + import ollama as ollama_lib + from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn + + client = ollama_lib.Client(host=cfg.ollama_url) + total = len(missing) + console.print(f"Embedding [bold]{total}[/] ideas in batches of {batch_size}...") + + count = 0 + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + MofNCompleteColumn(), + console=console, + ) as progress: + task = progress.add_task("Embedding ideas...", total=total) + for start in range(0, total, batch_size): + batch = missing[start:start + batch_size] + texts = [f"{idea['title']}. {idea['description']}" for idea in batch] + try: + resp = client.embed(model=cfg.ollama_embed_model, input=texts) + for i, idea in enumerate(batch): + vec = np.array(resp["embeddings"][i], dtype=np.float32) + db.store_idea_embedding(idea["id"], cfg.ollama_embed_model, vec) + count += 1 + progress.advance(task) + except Exception as e: + console.print(f"[red]Batch failed: {e}[/]") + for _ in batch: + progress.advance(task) + + console.print(f"Embedded [bold green]{count}[/] ideas") + + +# ── similar ────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("name") +@click.option("--top", "-n", default=10, help="Number of similar drafts to show") +@pass_cfg_db +def similar(cfg, db, name: str, top: int): + """Find drafts most similar to a given draft.""" + from ..embeddings import Embedder + + embedder = Embedder(cfg, db) + results = embedder.find_similar(name, top_n=top) + if not results: + console.print(f"[yellow]No similar drafts found (need embeddings — run `ietf embed` first)[/]") + return + + table = Table(title=f"Drafts similar to {name}") + table.add_column("Similarity", justify="right", width=10) + table.add_column("Draft", style="cyan") + table.add_column("Title") + + for sim_name, score in results: + draft = db.get_draft(sim_name) + title = draft.title[:60] if draft else "" + table.add_row(f"{score:.3f}", sim_name, title) + + console.print(table) + + +# ── clusters ───────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--threshold", "-t", default=0.85, help="Similarity threshold for clustering") +@pass_cfg_db +def clusters(cfg, db, threshold: float): + """Find clusters of highly similar (potentially overlapping) drafts.""" + from ..embeddings import Embedder + + embedder = Embedder(cfg, db) + cluster_list = embedder.find_clusters(threshold=threshold) + if not cluster_list: + console.print("No clusters found at this threshold.") + return + + console.print(f"\n[bold]Found {len(cluster_list)} clusters[/] (threshold={threshold})\n") + for i, cluster in enumerate(cluster_list, 1): + console.print(f"[bold cyan]Cluster {i}[/] ({len(cluster)} drafts):") + for name in cluster: + draft = db.get_draft(name) + title = draft.title[:60] if draft else "" + console.print(f" - {name} [dim]{title}[/]") + console.print() + + +# ── ideas ─────────────────────────────────────────────────────────────── + + +@click.group(invoke_without_command=True) +@click.option("--name", default=None, help="Extract ideas from a specific draft") +@click.option("--all", "extract_all", is_flag=True, help="Extract ideas from all drafts") +@click.option("--limit", "-n", default=50, help="Max drafts to extract (with --all)") +@click.option("--batch", "-b", default=5, help="Drafts per API call (default 5, set 1 for individual)") +@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") +@click.option("--reextract", is_flag=True, help="Clear existing ideas and re-extract with current prompt") +@click.option("--draft", "reextract_draft", default=None, help="Specific draft to re-extract (with --reextract)") +@click.option("--dry-run", is_flag=True, help="Show what would be extracted without making changes") +@click.pass_context +def ideas(ctx, name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool, + reextract: bool, reextract_draft: str | None, dry_run: bool): + """Extract, score, and filter technical ideas from drafts.""" + if ctx.invoked_subcommand is not None: + return + + cfg = ctx.obj["cfg"] + db = ctx.obj["db"] + + if dry_run: + if reextract: + existing = db.idea_count() + if reextract_draft: + ideas_for = db.get_ideas_for_draft(reextract_draft) + console.print(f"[bold yellow]DRY RUN[/]: Would clear [bold]{len(ideas_for)}[/] ideas for {reextract_draft} and re-extract") + else: + console.print(f"[bold yellow]DRY RUN[/]: Would clear all [bold]{existing}[/] ideas and re-extract from up to {limit} drafts") + elif extract_all: + missing = db.drafts_without_ideas(limit=limit) + console.print(f"[bold yellow]DRY RUN[/]: Would extract ideas from [bold]{len(missing)}[/] drafts (batch={batch}, {'cheap' if cheap else 'quality'})") + for d in missing[:20]: + console.print(f" - {d}") + if len(missing) > 20: + console.print(f" ... and {len(missing) - 20} more") + elif name: + existing = db.get_ideas_for_draft(name) + console.print(f"[bold yellow]DRY RUN[/]: Would extract ideas from {name} (currently has {len(existing)} ideas)") + else: + console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter") + return + + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + + if reextract: + # Clear existing ideas, then re-extract + deleted = db.delete_ideas(draft_name=reextract_draft) + if reextract_draft: + console.print(f"Cleared [bold]{deleted}[/] ideas for {reextract_draft}") + idea_list = analyzer.extract_ideas(reextract_draft, use_cache=True) + if idea_list: + console.print(f"Re-extracted [bold green]{len(idea_list)}[/] ideas:") + for idea in idea_list: + console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]") + console.print(f" {idea['description']}\n") + else: + console.print("[red]Re-extraction failed or no ideas found[/]") + else: + console.print(f"Cleared [bold]{deleted}[/] ideas from all drafts") + count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap) + console.print(f"Re-extracted ideas from [bold green]{count}[/] drafts") + elif extract_all: + count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap) + console.print(f"Extracted ideas from [bold green]{count}[/] drafts") + elif name: + idea_list = analyzer.extract_ideas(name) + if idea_list: + console.print(f"\n[bold]Ideas from {name}:[/]\n") + for idea in idea_list: + console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]") + console.print(f" {idea['description']}\n") + else: + console.print("[red]Extraction failed or no ideas found[/]") + else: + console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter") + + +@ideas.command("score") +@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)") +@click.option("--batch", "-b", default=20, help="Ideas per API call (default 20)") +@pass_cfg_db +def ideas_score(cfg, db, cheap: bool, batch: int): + """Score ideas for novelty (1=generic, 5=genuinely novel).""" + from ..analyzer import Analyzer + + analyzer = Analyzer(cfg, db) + stats = analyzer.score_idea_novelty(batch_size=batch, cheap=cheap) + + if stats["scored_count"] == 0: + return + + # Show distribution table + dist = db.idea_score_distribution() + table = Table(title="Novelty Score Distribution") + table.add_column("Score", style="bold", justify="center") + table.add_column("Label", style="dim") + table.add_column("Count", justify="right") + table.add_column("Bar", min_width=30) + + labels = { + 1: "Generic building block", + 2: "Obvious extension", + 3: "Useful but expected", + 4: "Interesting contribution", + 5: "Genuinely novel", + } + max_count = max(dist.values()) if dist else 1 + for score in range(1, 6): + count = dist.get(score, 0) + bar_len = int(30 * count / max_count) if max_count > 0 else 0 + table.add_row( + str(score), labels[score], str(count), + "[green]" + "#" * bar_len + "[/]" + ) + + total = sum(dist.values()) + unscored = db.idea_count() - total + console.print(table) + console.print(f"\nTotal scored: [bold]{total}[/] | Unscored: {unscored} | Avg: [bold]{stats['avg_score']:.1f}[/]") + + +@ideas.command("filter") +@click.option("--min-score", "-m", default=2, help="Remove ideas below this score (default 2)") +@click.option("--dry-run/--execute", default=True, help="Preview (default) or actually delete") +@pass_cfg_db +def ideas_filter(cfg, db, min_score: int, dry_run: bool): + """Filter out low-novelty ideas by score threshold.""" + candidates = db.ideas_below_score(min_score) + if not candidates: + console.print(f"No ideas with novelty_score < {min_score}.") + return + + # Show what would be removed + table = Table( + title=f"Ideas with novelty_score < {min_score} " + f"({'DRY RUN' if dry_run else 'WILL DELETE'})" + ) + table.add_column("Score", style="bold", justify="center") + table.add_column("Idea", style="cyan", max_width=40) + table.add_column("Draft", max_width=50) + table.add_column("Description", max_width=60) + + for idea in candidates[:50]: # Show first 50 + table.add_row( + str(idea["novelty_score"]), + idea["title"], + idea["draft_title"], + idea["description"][:60] + ("..." if len(idea["description"]) > 60 else ""), + ) + + console.print(table) + + if len(candidates) > 50: + console.print(f" ... and {len(candidates) - 50} more") + + console.print(f"\nTotal to remove: [bold red]{len(candidates)}[/] / {db.idea_count()} ideas") + + if not dry_run: + deleted = db.delete_low_score_ideas(min_score) + console.print(f"[bold red]Deleted {deleted} low-novelty ideas.[/]") + console.print(f"Remaining ideas: [bold green]{db.idea_count()}[/]") + else: + console.print("[dim]Use --execute to actually delete.[/]") + + +@ideas.command("convergence") +@click.option("--threshold", "-t", default=0.75, help="SequenceMatcher ratio threshold (0-1)") +@click.option("--limit", "-n", default=50, help="Max results to show") +@click.option("--list-all", is_flag=True, help="List all convergent idea pairs") +@pass_cfg_db +def ideas_convergence(cfg, db, threshold: float, limit: int, list_all: bool): + """Find cross-org convergent ideas using SequenceMatcher fuzzy matching. + + Groups ideas by fuzzy title similarity, then filters to ideas where + 2+ distinct organizations independently propose similar concepts. + """ + from collections import defaultdict + from difflib import SequenceMatcher + from ..orgs import normalize_org + + all_ideas = db.all_ideas() + if not all_ideas: + console.print("[yellow]No ideas extracted yet. Run `ietf ideas --all` first.[/]") + return + + # Build draft -> org mapping + draft_orgs: dict[str, set[str]] = defaultdict(set) + rows = db.conn.execute( + """SELECT da.draft_name, a.affiliation + FROM draft_authors da + JOIN authors a ON da.person_id = a.person_id + WHERE a.affiliation != ''""" + ).fetchall() + for r in rows: + org = normalize_org(r["affiliation"]) + if org and org != "Independent": + draft_orgs[r["draft_name"]].add(org) + + # Group similar ideas by fuzzy title matching + idea_groups: list[dict] = [] + for idea in all_ideas: + title_lower = idea["title"].lower().strip() + matched = False + for group in idea_groups: + ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio() + if ratio >= threshold: + group["ideas"].append(idea) + group["drafts"].add(idea["draft_name"]) + group["orgs"].update(draft_orgs.get(idea["draft_name"], set())) + matched = True + break + if not matched: + idea_groups.append({ + "canonical": title_lower, + "title": idea["title"], + "ideas": [idea], + "drafts": {idea["draft_name"]}, + "orgs": set(draft_orgs.get(idea["draft_name"], set())), + }) + + # Filter to cross-org ideas (2+ orgs) + cross_org = [g for g in idea_groups if len(g["orgs"]) >= 2] + cross_org.sort(key=lambda g: (-len(g["orgs"]), -len(g["drafts"]))) + + console.print(f"\n[bold]Cross-Organization Idea Convergence[/]") + console.print(f"Threshold: {threshold} | {len(all_ideas)} ideas | " + f"{len(idea_groups)} unique clusters | " + f"[bold green]{len(cross_org)}[/] cross-org convergent\n") + + if not cross_org: + console.print("[yellow]No cross-org convergent ideas at this threshold.[/]") + return + + show_n = len(cross_org) if list_all else min(limit, len(cross_org)) + table = Table(title=f"Cross-Org Convergent Ideas (showing {show_n} of {len(cross_org)})") + table.add_column("#", justify="right", width=4) + table.add_column("Idea", style="bold", max_width=40) + table.add_column("Orgs", justify="right", width=5) + table.add_column("Drafts", justify="right", width=6) + table.add_column("Organizations", max_width=50) + + for rank, g in enumerate(cross_org[:show_n], 1): + org_list = ", ".join(sorted(g["orgs"])[:5]) + if len(g["orgs"]) > 5: + org_list += f" +{len(g['orgs']) - 5}" + table.add_row( + str(rank), g["title"][:40], str(len(g["orgs"])), + str(len(g["drafts"])), org_list, + ) + + console.print(table) + console.print(f"\n[bold]Summary[/]: {len(cross_org)} cross-org convergent ideas " + f"out of {len(idea_groups)} unique clusters " + f"({100 * len(cross_org) / len(idea_groups):.0f}%)") + + +# ── dedup-ideas ───────────────────────────────────────────────────────── + + +@click.command("dedup-ideas") +@click.option("--threshold", "-t", default=0.85, type=float, + help="Cosine similarity threshold for merging (default 0.85)") +@click.option("--dry-run/--execute", default=True, + help="Preview merges (default) vs actually delete duplicates") +@click.option("--draft", "draft_name", default=None, + help="Limit to a single draft name") +@pass_cfg_db +def dedup_ideas(cfg, db, threshold: float, dry_run: bool, draft_name: str | None): + """Deduplicate similar ideas within each draft using embedding similarity.""" + from ..analyzer import Analyzer + + analyzer = Analyzer(cfg, db) + mode = "[bold yellow]DRY RUN[/]" if dry_run else "[bold red]EXECUTE[/]" + console.print(f"\n{mode} — Deduplicating ideas (threshold={threshold})") + if draft_name: + console.print(f"Limiting to draft: [bold]{draft_name}[/]") + console.print() + + result = analyzer.dedup_ideas( + threshold=threshold, dry_run=dry_run, draft_name=draft_name + ) + + if result["examples"]: + table = Table(title="Merge Candidates" if dry_run else "Merged Ideas") + table.add_column("Draft", style="dim", max_width=40) + table.add_column("Keep", style="green") + table.add_column("Drop", style="red") + table.add_column("Similarity", justify="right") + + for ex in result["examples"]: + table.add_row( + ex["draft"].split("/")[-1][:40], + ex["keep"], + ex["drop"], + f"{ex['similarity']:.3f}", + ) + console.print(table) + console.print() + + action = "Would remove" if dry_run else "Removed" + console.print( + f"Ideas before: [bold]{result['total_before']}[/] | " + f"{action}: [bold]{result['merged_count']}[/] | " + f"After: [bold]{result['total_after']}[/]" + ) + + if dry_run and result["merged_count"] > 0: + console.print( + "\n[dim]Run with --execute to apply these merges.[/]" + ) + + +# ── gaps ──────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--refresh", is_flag=True, help="Re-run gap analysis even if cached") +@click.option("--dry-run", is_flag=True, help="Show existing gaps without running analysis") +@pass_cfg_db +def gaps(cfg, db, refresh: bool, dry_run: bool): + """Identify gaps in the current draft landscape using Claude.""" + from ..reports import Reporter + + if dry_run: + existing = db.all_gaps() + console.print(f"[bold yellow]DRY RUN[/]: {len(existing)} gaps currently identified") + if refresh: + console.print(" Would re-run gap analysis via Claude API") + for i, gap in enumerate(existing if existing and isinstance(existing[0], dict) else [], 1): + sev = gap.get("severity", "medium").upper() + console.print(f" [bold]{i}. {gap['topic']}[/] [{sev}]") + return + + from ..analyzer import Analyzer + analyzer = Analyzer(cfg, db) + reporter = Reporter(cfg, db) + + existing = db.all_gaps() + if existing and not refresh: + console.print(f"[bold]{len(existing)} gaps[/] already identified (use --refresh to re-run)\n") + else: + gap_list = analyzer.gap_analysis() + console.print(f"\nIdentified [bold green]{len(gap_list)}[/] gaps\n") + existing = gap_list + + for i, gap in enumerate(existing if isinstance(existing[0], dict) else [], 1): + sev = gap.get("severity", "medium").upper() + console.print(f" [bold]{i}. {gap['topic']}[/] [{sev}]") + console.print(f" {gap['description'][:100]}\n") + + path = reporter.gaps_report() + console.print(f"Report saved: [bold]{path}[/]") + + +# ── refs ──────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("name", required=False) +@click.option("--extract/--no-extract", default=False, help="Extract refs from all drafts with text") +@click.option("--top", "-n", default=30, help="Number of top-referenced items to show") +@click.option("--type", "ref_type", default="rfc", type=click.Choice(["rfc", "draft", "bcp"]), + help="Reference type to show top results for") +@pass_cfg_db +def refs(cfg, db, name: str | None, extract: bool, top: int, ref_type: str): + """Parse and show cross-references (RFCs, drafts, BCPs) in draft texts.""" + import re + + if extract: + missing = db.drafts_without_refs() + if not missing: + console.print("[green]All drafts with text already have refs extracted.[/]") + else: + console.print(f"Extracting refs from [bold]{len(missing)}[/] drafts...") + extracted = 0 + for draft_name in missing: + draft = db.get_draft(draft_name) + if not draft or not draft.full_text: + continue + found_refs = _extract_refs(draft.full_text, draft.name) + if found_refs: + db.insert_refs(draft_name, found_refs) + extracted += 1 + console.print(f"Extracted refs from [bold green]{extracted}[/] drafts") + + if name: + # Show refs for a specific draft + draft_refs = db.get_refs_for_draft(name) + if not draft_refs: + console.print(f"[yellow]No refs found for {name}. Run `ietf refs --extract` first.[/]") + return + table = Table(title=f"References in {name}") + table.add_column("Type", style="dim", width=6) + table.add_column("Reference", style="cyan") + for rt, rid in sorted(draft_refs): + table.add_row(rt.upper(), rid) + console.print(table) + else: + # Show top-referenced items + stats = db.ref_stats() + if stats["total_refs"] == 0: + console.print("[yellow]No refs extracted yet. Run `ietf refs --extract` first.[/]") + return + + console.print(f"\n[bold]Reference Stats[/]: {stats['drafts_with_refs']} drafts, " + f"{stats['total_refs']} total refs " + f"({stats['rfc_refs']} RFC, {stats['draft_refs']} draft, {stats['bcp_refs']} BCP)\n") + + top_items = db.top_referenced(ref_type=ref_type, limit=top) + table = Table(title=f"Top {len(top_items)} Most-Referenced {ref_type.upper()}s") + table.add_column("#", justify="right", width=4) + table.add_column("Reference", style="cyan", width=30) + table.add_column("Count", justify="right", width=6) + table.add_column("Referenced By", max_width=60) + for rank, (rid, cnt, drafts) in enumerate(top_items, 1): + label = f"RFC {rid}" if ref_type == "rfc" else rid + draft_list = ", ".join(d.replace("draft-", "")[:25] for d in drafts[:4]) + if len(drafts) > 4: + draft_list += f" +{len(drafts) - 4}" + table.add_row(str(rank), label, str(cnt), draft_list) + console.print(table) + + +def _extract_refs(text: str, self_name: str) -> list[tuple[str, str]]: + """Extract RFC, draft, and BCP references from draft full text.""" + import re + + refs_set: set[tuple[str, str]] = set() + + # RFC references: RFC 1234, RFC1234, [RFC1234], [RFC 1234] + for m in re.finditer(r'\[?RFC\s*(\d{4,})\]?', text, re.IGNORECASE): + refs_set.add(("rfc", m.group(1))) + + # BCP references: BCP 14, BCP14, [BCP14] + for m in re.finditer(r'\[?BCP\s*(\d+)\]?', text, re.IGNORECASE): + refs_set.add(("bcp", m.group(1))) + + # Draft references: draft-ietf-something-name + for m in re.finditer(r'(draft-[\w][\w-]+[\w])', text, re.IGNORECASE): + draft_ref = m.group(1).lower() + # Strip trailing version numbers (e.g., draft-foo-bar-03 -> draft-foo-bar) + draft_ref = re.sub(r'-\d{2,}$', '', draft_ref) + # Don't reference self + if draft_ref != self_name: + refs_set.add(("draft", draft_ref)) + + return list(refs_set) + + +# ── trends ───────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--category", "-c", help="Filter to a specific category") +@click.option("--json-out", is_flag=True, help="Also output JSON for visualization") +@pass_cfg_db +def trends(cfg, db, category: str | None, json_out: bool): + """Show category trend analysis — monthly breakdown with growth rates.""" + import json as json_mod + from collections import defaultdict + + pairs = db.drafts_with_ratings(limit=500) + all_drafts = db.list_drafts(limit=500, order_by="time ASC") + + if not pairs: + console.print("[yellow]No rated drafts. Run `ietf analyze --all` first.[/]") + return + + # Build rating lookup + rating_map = {draft.name: rating for draft, rating in pairs} + + # Collect monthly counts per category + monthly: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int)) + all_cats: set[str] = set() + for d in all_drafts: + month = d.time[:7] if d.time else "unknown" + r = rating_map.get(d.name) + if r: + for c in r.categories: + if category and c.lower() != category.lower(): + continue + monthly[month][c] += 1 + all_cats.add(c) + + if not all_cats: + console.print(f"[yellow]No data for category '{category}'[/]" if category + else "[yellow]No category data found.[/]") + return + + months = sorted(m for m in monthly.keys() if m != "unknown") + cats = sorted(all_cats) + + # Compute cumulative and growth + rows_data = [] + cumulative: dict[str, int] = defaultdict(int) + prev_count: dict[str, int] = defaultdict(int) + + for month in months: + for cat in cats: + count = monthly[month].get(cat, 0) + cumulative[cat] += count + growth = 0.0 + if prev_count[cat] > 0: + growth = ((count - prev_count[cat]) / prev_count[cat]) * 100 + rows_data.append({ + "month": month, + "category": cat, + "count": count, + "cumulative": cumulative[cat], + "growth_rate": growth, + }) + prev_count[cat] = count + + # Display summary table + console.print(f"\n[bold]Category Trends[/] — {len(months)} months, {len(cats)} categories\n") + + # Show per-category totals and recent momentum + table = Table(title="Category Growth Summary") + table.add_column("Category", style="cyan") + table.add_column("Total", justify="right", width=6) + table.add_column("Last 3mo", justify="right", width=8) + table.add_column("Prev 3mo", justify="right", width=8) + table.add_column("Growth", justify="right", width=8) + + recent_months = months[-3:] if len(months) >= 3 else months + prev_months = months[-6:-3] if len(months) >= 6 else [] + + for cat in cats: + total = cumulative[cat] + recent = sum(monthly[m].get(cat, 0) for m in recent_months) + prev = sum(monthly[m].get(cat, 0) for m in prev_months) if prev_months else 0 + if prev > 0: + growth_str = f"{((recent - prev) / prev) * 100:+.0f}%" + elif recent > 0: + growth_str = "new" + else: + growth_str = "-" + table.add_row(cat, str(total), str(recent), str(prev) if prev_months else "-", growth_str) + + console.print(table) + + # Monthly detail + console.print(f"\n[bold]Monthly Breakdown[/]\n") + detail_table = Table() + detail_table.add_column("Month", style="dim", width=8) + for cat in cats: + detail_table.add_column(cat[:14], justify="right", width=max(6, len(cat[:14]))) + detail_table.add_column("Total", justify="right", width=6, style="bold") + + for month in months: + row = [month] + total = 0 + for cat in cats: + c = monthly[month].get(cat, 0) + total += c + row.append(str(c) if c else "") + row.append(str(total)) + detail_table.add_row(*row) + + console.print(detail_table) + + # Optional JSON output + if json_out: + out_path = Path(cfg.data_dir) / "reports" / "trends.json" + out_path.write_text(json_mod.dumps(rows_data, indent=2)) + console.print(f"\nJSON saved: [bold]{out_path}[/]") + + +# ── status ────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--wg", "-w", help="Filter to a specific WG") +def status(wg: str | None): + """Show WG adoption status — which drafts have institutional backing.""" + import json as json_mod + from collections import defaultdict + + cfg = _get_config() + db = Database(cfg) + + try: + all_status = db.draft_adoption_status() + total = len(all_status) + adopted = [s for s in all_status if s["wg_adopted"]] + individual = [s for s in all_status if not s["wg_adopted"]] + irtf = [s for s in all_status if s["stream"] == "irtf"] + + console.print(f"\n[bold]Draft Adoption Status[/]: {total} total drafts\n") + console.print(f" WG-adopted (draft-ietf-*): [bold green]{len(adopted)}[/] ({len(adopted)/total*100:.1f}%)") + console.print(f" IRTF (draft-irtf-*): [bold blue]{len(irtf)}[/]") + console.print(f" Individual: [bold]{len(individual)}[/] ({len(individual)/total*100:.1f}%)\n") + + # WG breakdown + wg_groups: dict[str, list[dict]] = defaultdict(list) + for s in adopted: + wg_groups[s["wg_name"]].append(s) + + if wg: + # Show drafts for a specific WG + wg_drafts = wg_groups.get(wg, []) + if not wg_drafts: + console.print(f"[yellow]No WG-adopted drafts for '{wg}'[/]") + return + table = Table(title=f"WG '{wg}' Drafts ({len(wg_drafts)})") + table.add_column("Date", style="dim", width=10) + table.add_column("Name", style="cyan") + table.add_column("Title", max_width=50) + for s in sorted(wg_drafts, key=lambda x: x["time"] or ""): + table.add_row(s["time"][:10] if s["time"] else "", s["name"], s["title"][:50]) + console.print(table) + else: + # Show WG summary + table = Table(title=f"Working Groups with AI/Agent Drafts ({len(wg_groups)} WGs)") + table.add_column("#", justify="right", width=4) + table.add_column("WG", style="cyan", width=12) + table.add_column("Drafts", justify="right", width=6) + table.add_column("Draft Names", max_width=60) + + for rank, (wg_name, drafts) in enumerate( + sorted(wg_groups.items(), key=lambda x: -len(x[1])), 1 + ): + draft_list = ", ".join(d["name"].replace("draft-ietf-", "")[:30] for d in drafts[:4]) + if len(drafts) > 4: + draft_list += f" +{len(drafts) - 4}" + table.add_row(str(rank), wg_name, str(len(drafts)), draft_list) + + console.print(table) + + # Score comparison + pairs = db.drafts_with_ratings(limit=500) + if pairs: + adopted_names = {s["name"] for s in adopted} + adopted_scores = [r.composite_score for d, r in pairs if d.name in adopted_names] + individual_scores = [r.composite_score for d, r in pairs if d.name not in adopted_names] + + if adopted_scores and individual_scores: + console.print(f"\n[bold]Score Comparison[/]:") + avg_adopted = sum(adopted_scores) / len(adopted_scores) + avg_individual = sum(individual_scores) / len(individual_scores) + console.print(f" WG-adopted avg score: [bold green]{avg_adopted:.2f}[/] ({len(adopted_scores)} rated)") + console.print(f" Individual avg score: [bold]{avg_individual:.2f}[/] ({len(individual_scores)} rated)") + + # Check gap coverage + gaps_list = db.all_gaps() + if gaps_list: + gap_cats = {g["category"].lower() for g in gaps_list} + adopted_cats: set[str] = set() + for d, r in pairs: + if d.name in adopted_names: + for c in r.categories: + adopted_cats.add(c.lower()) + + covered = gap_cats & adopted_cats + uncovered = gap_cats - adopted_cats + console.print(f"\n[bold]Gap Coverage by WG-Adopted Work[/]:") + console.print(f" Gap categories with WG backing: {len(covered)}") + if covered: + console.print(f" {', '.join(sorted(covered))}") + console.print(f" Gap categories without WG backing: {len(uncovered)}") + if uncovered: + console.print(f" [yellow]{', '.join(sorted(uncovered))}[/]") + + finally: + db.close() + + +# ── revisions ────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--org", "-o", help="Filter to a specific organization") +@click.option("--top", "-n", default=20, help="Number of orgs to show") +def revisions(org: str | None, top: int): + """Analyze draft revision velocity — who iterates vs fire-and-forget.""" + from collections import defaultdict + from ..orgs import normalize_org + + cfg = _get_config() + db = Database(cfg) + + try: + all_revs = db.revision_velocity() + total = len(all_revs) + at_00 = sum(1 for r in all_revs if r["rev_int"] == 0) + avg_rev = sum(r["rev_int"] for r in all_revs) / total if total else 0 + + console.print(f"\n[bold]Draft Revision Velocity[/]: {total} drafts\n") + console.print(f" Average revision: [bold]{avg_rev:.2f}[/]") + console.print(f" At -00 (first draft): [bold]{at_00}[/] ({at_00/total*100:.1f}%)") + console.print(f" Iterated (rev >= 01): [bold]{total - at_00}[/] ({(total-at_00)/total*100:.1f}%)") + console.print(f" Highly iterated (rev >= 05): [bold]{sum(1 for r in all_revs if r['rev_int'] >= 5)}[/]\n") + + # Get per-org stats using normalized org names + aff_rows = db.conn.execute( + "SELECT da.draft_name, a.affiliation FROM draft_authors da " + "JOIN authors a ON da.person_id = a.person_id " + "WHERE a.affiliation != ''" + ).fetchall() + + # Map draft -> rev + draft_rev = {r["name"]: r["rev_int"] for r in all_revs} + + # Group drafts by normalized org (deduped) + org_drafts: dict[str, set[str]] = defaultdict(set) + for row in aff_rows: + norm = normalize_org(row["affiliation"]) + if norm: + org_drafts[norm].add(row["draft_name"]) + + if org: + # Show drafts for a specific org + drafts = org_drafts.get(org, set()) + if not drafts: + console.print(f"[yellow]No drafts for '{org}'[/]") + return + table = Table(title=f"'{org}' Drafts by Revision ({len(drafts)})") + table.add_column("Rev", justify="right", width=4) + table.add_column("Name", style="cyan", max_width=50) + table.add_column("Title", max_width=40) + draft_details = [(d, draft_rev.get(d, 0)) for d in drafts] + for name, rev in sorted(draft_details, key=lambda x: -x[1]): + title_row = next((r["title"] for r in all_revs if r["name"] == name), "") + table.add_row(f"-{rev:02d}", name, title_row[:40]) + console.print(table) + else: + # Show org summary + org_stats = [] + for org_name, drafts in org_drafts.items(): + if len(drafts) < 3: + continue + revs = [draft_rev.get(d, 0) for d in drafts] + n_00 = sum(1 for r in revs if r == 0) + org_stats.append({ + "org": org_name, + "drafts": len(drafts), + "avg_rev": sum(revs) / len(revs), + "at_00": n_00, + "pct_00": n_00 / len(drafts) * 100, + "max_rev": max(revs), + }) + + org_stats.sort(key=lambda x: -x["drafts"]) + + table = Table(title=f"Revision Velocity by Organization (>= 3 drafts, top {top})") + table.add_column("#", justify="right", width=4) + table.add_column("Organization", style="cyan", width=28) + table.add_column("Drafts", justify="right", width=6) + table.add_column("Avg Rev", justify="right", width=8) + table.add_column("At -00", justify="right", width=6) + table.add_column("%-00", justify="right", width=6) + table.add_column("Max", justify="right", width=4) + + for rank, s in enumerate(org_stats[:top], 1): + table.add_row( + str(rank), s["org"][:28], str(s["drafts"]), + f"{s['avg_rev']:.2f}", str(s["at_00"]), + f"{s['pct_00']:.0f}%", str(s["max_rev"]), + ) + console.print(table) + + # Highlight the fire-and-forget vs iterators narrative + high_00 = [s for s in org_stats if s["pct_00"] >= 70 and s["drafts"] >= 5] + iterators = [s for s in org_stats if s["avg_rev"] >= 3.0 and s["drafts"] >= 3] + + if high_00: + console.print("\n[bold]Fire-and-Forget[/] (>= 70% at -00, >= 5 drafts):") + for s in high_00: + console.print(f" {s['org']}: {s['at_00']}/{s['drafts']} at -00 ({s['pct_00']:.0f}%)") + + if iterators: + console.print("\n[bold]Active Iterators[/] (avg revision >= 3.0):") + for s in iterators: + console.print(f" {s['org']}: avg rev {s['avg_rev']:.1f}, max -{s['max_rev']:02d}") + + # Generate report + from ..reports import Reporter + reporter = Reporter(cfg, db) + path = reporter.revisions_report() + console.print(f"\nReport saved: [bold]{path}[/]") + + finally: + db.close() + + +# ── idea-overlap ──────────────────────────────────────────────────────── + + +@click.command("idea-overlap") +@click.option("--threshold", "-t", default=0.75, help="Title similarity threshold (0-1)") +@click.option("--limit", "-n", default=50, help="Max results to show") +def idea_overlap(threshold: float, limit: int): + """Find ideas that appear across multiple organizations.""" + from collections import defaultdict + from difflib import SequenceMatcher + from ..orgs import normalize_org + + cfg = _get_config() + db = Database(cfg) + + try: + all_ideas = db.all_ideas() + if not all_ideas: + console.print("[yellow]No ideas extracted yet. Run `ietf ideas --all` first.[/]") + return + + # Build draft -> org mapping + draft_orgs: dict[str, set[str]] = defaultdict(set) + rows = db.conn.execute( + """SELECT da.draft_name, a.affiliation + FROM draft_authors da + JOIN authors a ON da.person_id = a.person_id + WHERE a.affiliation != ''""" + ).fetchall() + for r in rows: + org = normalize_org(r["affiliation"]) + if org and org != "Independent": + draft_orgs[r["draft_name"]].add(org) + + # Group similar ideas (same logic as ideas_report but tracking orgs) + idea_groups: list[dict] = [] + for idea in all_ideas: + title_lower = idea["title"].lower().strip() + matched = False + for group in idea_groups: + ratio = SequenceMatcher(None, title_lower, group["canonical"]).ratio() + if ratio >= threshold: + group["ideas"].append(idea) + group["drafts"].add(idea["draft_name"]) + group["orgs"].update(draft_orgs.get(idea["draft_name"], set())) + matched = True + break + if not matched: + idea_groups.append({ + "canonical": title_lower, + "title": idea["title"], + "ideas": [idea], + "drafts": {idea["draft_name"]}, + "orgs": set(draft_orgs.get(idea["draft_name"], set())), + }) + + # Filter to cross-org ideas (2+ orgs) + cross_org = [g for g in idea_groups if len(g["orgs"]) >= 2] + cross_org.sort(key=lambda g: (-len(g["orgs"]), -len(g["drafts"]))) + + console.print(f"\n[bold]Cross-Organization Idea Overlap[/]") + console.print(f"{len(all_ideas)} ideas, {len(idea_groups)} unique, " + f"[bold green]{len(cross_org)}[/] appear across 2+ orgs\n") + + if not cross_org: + console.print("[yellow]No cross-org idea overlap found at this threshold.[/]") + return + + table = Table(title=f"Ideas Shared Across Organizations (top {min(limit, len(cross_org))})") + table.add_column("#", justify="right", width=4) + table.add_column("Idea", style="bold", max_width=40) + table.add_column("Orgs", justify="right", width=5) + table.add_column("Drafts", justify="right", width=6) + table.add_column("Organizations", max_width=50) + + for rank, g in enumerate(cross_org[:limit], 1): + org_list = ", ".join(sorted(g["orgs"])[:5]) + if len(g["orgs"]) > 5: + org_list += f" +{len(g['orgs']) - 5}" + table.add_row( + str(rank), g["title"][:40], str(len(g["orgs"])), + str(len(g["drafts"])), org_list, + ) + + console.print(table) + + # Also generate the report + from ..reports import Reporter + reporter = Reporter(cfg, db) + path = reporter.idea_overlap_report() + console.print(f"\nReport saved: [bold]{path}[/]") + + finally: + db.close() + + +# ── co-occurrence ────────────────────────────────────────────────────── + + +@click.command("co-occurrence") +def co_occurrence(): + """Category co-occurrence matrix — which categories appear together.""" + from collections import defaultdict + + cfg = _get_config() + db = Database(cfg) + + try: + pairs = db.drafts_with_ratings(limit=500) + total = len(pairs) + multi_cat = sum(1 for d, r in pairs if len(r.categories) > 1) + + console.print(f"\n[bold]Category Co-occurrence Analysis[/]: {total} drafts\n") + console.print(f" Multi-category drafts: [bold]{multi_cat}[/] ({multi_cat/total*100:.1f}%)\n") + + # Build counts + cat_counts: dict[str, int] = defaultdict(int) + cooccur: dict[tuple[str, str], int] = defaultdict(int) + for d, r in pairs: + for c in r.categories: + cat_counts[c] += 1 + for i, c1 in enumerate(r.categories): + for c2 in r.categories[i + 1:]: + key = tuple(sorted([c1, c2])) + cooccur[key] += 1 + + # Top co-occurrences + table = Table(title="Top 15 Category Co-occurrences") + table.add_column("#", justify="right", width=4) + table.add_column("Category A", style="cyan", width=22) + table.add_column("Category B", style="cyan", width=22) + table.add_column("Count", justify="right", width=6) + + top_pairs = sorted(cooccur.items(), key=lambda x: -x[1])[:15] + for rank, ((c1, c2), n) in enumerate(top_pairs, 1): + table.add_row(str(rank), c1, c2, str(n)) + console.print(table) + + # AI safety isolation check + safety_cooccur = {k: v for k, v in cooccur.items() if "AI safety/alignment" in k} + if safety_cooccur: + console.print("\n[bold]AI Safety/Alignment Co-occurrences[/]:") + for (c1, c2), n in sorted(safety_cooccur.items(), key=lambda x: -x[1]): + other = c2 if c1 == "AI safety/alignment" else c1 + console.print(f" {n:>3d} + {other}") + + # Generate report + from ..reports import Reporter + reporter = Reporter(cfg, db) + path = reporter.co_occurrence_report() + console.print(f"\nReport saved: [bold]{path}[/]") + + finally: + db.close() + + +# ── centrality ───────────────────────────────────────────────────────── + + +@click.command() +@click.option("--top", "-n", default=20, help="Number of results to show") +def centrality(top: int): + """Author network centrality — bridge-builders and key connectors.""" + import networkx as nx + from collections import defaultdict + from ..orgs import normalize_org + + cfg = _get_config() + db = Database(cfg) + + try: + # Build co-authorship graph + rows = db.conn.execute( + """SELECT da1.person_id as p1, da2.person_id as p2, COUNT(*) as shared + FROM draft_authors da1 + JOIN draft_authors da2 ON da1.draft_name = da2.draft_name + AND da1.person_id < da2.person_id + GROUP BY da1.person_id, da2.person_id""" + ).fetchall() + + G = nx.Graph() + for r in rows: + G.add_edge(r[0], r[1], weight=r[2]) + + persons = db.conn.execute( + "SELECT person_id, name, affiliation FROM authors" + ).fetchall() + person_info = {r[0]: (r[1], normalize_org(r[2])) for r in persons} + + console.print(f"\n[bold]Author Network Analysis[/]: {G.number_of_nodes()} authors, {G.number_of_edges()} co-authorship edges\n") + + # Cross-org vs intra-org edges + chinese_orgs = { + "Huawei", "China Mobile", "China Telecom", "China Unicom", + "ZTE Corporation", "Tsinghua University", "BUPT", + "Pengcheng Laboratory", "CAICT", "AsiaInfo", + "Zhongguancun Laboratory", "CNIC, CAS", + "Tsinghua Shenzhen International Graduate School & Pengcheng Laboratory", + "Huazhong University of Science and Technology", + } + cross_org = intra_org = cross_divide = 0 + for u, v in G.edges(): + _, org_u = person_info.get(u, ("?", "")) + _, org_v = person_info.get(v, ("?", "")) + if org_u and org_v: + if org_u == org_v: + intra_org += 1 + else: + cross_org += 1 + if (org_u in chinese_orgs) != (org_v in chinese_orgs): + cross_divide += 1 + + total_edges = cross_org + intra_org + console.print(f" Intra-org edges: [bold]{intra_org}[/] ({intra_org/total_edges*100:.1f}%)") + console.print(f" Cross-org edges: [bold]{cross_org}[/] ({cross_org/total_edges*100:.1f}%)") + console.print(f" Cross Chinese-Western edges: [bold]{cross_divide}[/]") + + avg_clustering = nx.average_clustering(G) + components = list(nx.connected_components(G)) + console.print(f" Clustering coefficient: [bold]{avg_clustering:.3f}[/]") + console.print(f" Connected components: [bold]{len(components)}[/], largest: {len(max(components, key=len))}\n") + + # Betweenness centrality + bc = nx.betweenness_centrality(G) + + table = Table(title=f"Top {top} Authors by Betweenness Centrality") + table.add_column("#", justify="right", width=4) + table.add_column("Author", style="bold", width=28) + table.add_column("Organization", style="cyan", width=20) + table.add_column("BC Score", justify="right", width=8) + table.add_column("Degree", justify="right", width=6) + table.add_column("CN/West", justify="center", width=8) + + top_bc = sorted(bc.items(), key=lambda x: -x[1])[:top] + for rank, (pid, score) in enumerate(top_bc, 1): + name, org = person_info.get(pid, ("?", "?")) + degree = G.degree(pid) + cn = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] in chinese_orgs) + west = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] not in chinese_orgs and person_info.get(n, ("", ""))[1]) + table.add_row(str(rank), name[:28], org[:20], f"{score:.4f}", str(degree), f"{cn}/{west}") + + console.print(table) + + # Bridge-builders + bridges = [] + for pid in G.nodes(): + name, org = person_info.get(pid, ("?", "")) + cn = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] in chinese_orgs) + west = sum(1 for n in G.neighbors(pid) if person_info.get(n, ("", ""))[1] not in chinese_orgs and person_info.get(n, ("", ""))[1]) + if cn > 0 and west > 0: + bridges.append((pid, name, org, bc.get(pid, 0), cn, west)) + + bridges.sort(key=lambda x: -x[3]) + console.print(f"\n[bold]Cross-Divide Bridge-Builders[/] ({len(bridges)} people with neighbors in both blocs):\n") + for pid, name, org, bc_score, cn, west in bridges[:10]: + console.print(f" [bold]{name}[/] ({org}): BC={bc_score:.4f}, CN neighbors={cn}, Western={west}") + + # Generate report + from ..reports import Reporter + reporter = Reporter(cfg, db) + path = reporter.centrality_report() + console.print(f"\nReport saved: [bold]{path}[/]") + + finally: + db.close() diff --git a/src/ietf_analyzer/commands/common.py b/src/ietf_analyzer/commands/common.py new file mode 100644 index 0000000..a23c0b5 --- /dev/null +++ b/src/ietf_analyzer/commands/common.py @@ -0,0 +1,32 @@ +"""Shared utilities for CLI command modules.""" + +from __future__ import annotations + +import functools + +import click +from rich.console import Console + +from ..config import Config +from ..db import Database + +console = Console() + + +def _get_config() -> Config: + cfg = Config.load() + return cfg + + +def pass_cfg_db(f): + """Decorator that extracts cfg and db from Click context and passes them as arguments. + + Usage: place @pass_cfg_db after all @click decorators. The decorated function + should accept (cfg, db, ...) instead of manually calling _get_config()/Database(). + """ + + @click.pass_context + @functools.wraps(f) + def wrapper(ctx, **kwargs): + return f(ctx.obj["cfg"], ctx.obj["db"], **kwargs) + return wrapper diff --git a/src/ietf_analyzer/commands/fetch.py b/src/ietf_analyzer/commands/fetch.py new file mode 100644 index 0000000..6427645 --- /dev/null +++ b/src/ietf_analyzer/commands/fetch.py @@ -0,0 +1,409 @@ +"""Fetch, search, and browse commands.""" + +from __future__ import annotations + +import click +from rich.table import Table + +from .common import console, pass_cfg_db, _get_config +from ..config import Config +from ..db import Database + + +def register(main): + """Register all fetch/browse commands with the main CLI group.""" + main.add_command(fetch) + main.add_command(classify) + main.add_command(list_drafts) + main.add_command(search) + main.add_command(show) + main.add_command(annotate) + main.add_command(authors) + main.add_command(network) + + +# ── fetch ──────────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--keywords", "-k", multiple=True, help="Extra keywords to search for") +@click.option("--since", "-s", help="Only fetch drafts newer than this date (YYYY-MM-DD)") +@click.option("--download-text/--no-download-text", default=True, help="Download full text of drafts") +@click.option("--classify/--no-classify", default=True, help="Pre-filter with local Ollama classifier (saves Claude tokens)") +@pass_cfg_db +def fetch(cfg, db, keywords: tuple[str, ...], since: str | None, download_text: bool, classify: bool): + """Fetch AI/agent drafts from IETF Datatracker.""" + from ..fetcher import Fetcher + + fetcher = Fetcher(cfg) + + kw_list = list(cfg.search_keywords) + if keywords: + kw_list.extend(keywords) + + try: + drafts = fetcher.search_drafts(keywords=kw_list, since=since) + console.print(f"Found [bold]{len(drafts)}[/] drafts from Datatracker") + + # Pre-filter with local classifier to avoid storing irrelevant drafts + if classify and drafts: + try: + from ..classifier import Classifier + console.print("\n[bold]Running local AI-relevance classifier (Ollama)...[/]") + clf = Classifier(cfg) + draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts] + relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) + relevant_names = {d["name"] for d in relevant} + before = len(drafts) + drafts = [d for d in drafts if d.name in relevant_names] + console.print(f"\n Kept [green]{len(drafts)}[/green] / {before} drafts after classification") + clf.close() + except Exception as e: + console.print(f"[yellow]Classifier unavailable ({e}), storing all drafts[/yellow]") + + for draft in drafts: + db.upsert_draft(draft) + console.print(f"Stored [bold green]{len(drafts)}[/] drafts in database") + + if download_text: + missing = db.drafts_without_text() + if missing: + console.print(f"Downloading text for [bold]{len(missing)}[/] drafts...") + texts = fetcher.download_texts(missing) + for name, text in texts.items(): + draft = db.get_draft(name) + if draft: + draft.full_text = text + db.upsert_draft(draft) + finally: + fetcher.close() + + +# ── classify ───────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--unrated", is_flag=True, help="Classify only unrated drafts") +@click.option("--all", "all_drafts", is_flag=True, help="Classify all drafts (checks accuracy against existing ratings)") +@click.option("--remove", is_flag=True, help="Actually remove drafts classified as irrelevant (use with --unrated)") +@pass_cfg_db +def classify(cfg, db, unrated: bool, all_drafts: bool, remove: bool): + """Pre-classify drafts as AI-relevant using local Ollama model. + + Runs a two-stage filter (embedding similarity + chat model) to identify + irrelevant drafts before spending Claude tokens on rating. + + Examples: + + ietf classify --unrated # preview irrelevant unrated drafts + + ietf classify --unrated --remove # remove them from DB + + ietf classify --all # accuracy check against existing ratings + """ + from ..classifier import Classifier + + clf = Classifier(cfg) + + if all_drafts: + # Accuracy check mode: compare against existing FP flags + console.print("[bold]Accuracy check: classifying all rated drafts...[/]\n") + tp_rows = db.conn.execute( + "SELECT d.name, d.title, d.abstract FROM drafts d " + "JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 0" + ).fetchall() + fp_rows = db.conn.execute( + "SELECT d.name, d.title, d.abstract FROM drafts d " + "JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 1" + ).fetchall() + + tp_ok, tp_miss, fp_ok, fp_miss = 0, 0, 0, 0 + for row in tp_rows: + rel, sim, method = clf.classify(row["title"], row["abstract"]) + if rel: + tp_ok += 1 + else: + tp_miss += 1 + for row in fp_rows: + rel, sim, method = clf.classify(row["title"], row["abstract"]) + if not rel: + fp_ok += 1 + else: + fp_miss += 1 + + total_tp = len(tp_rows) + total_fp = len(fp_rows) + precision = tp_ok / (tp_ok + fp_miss) if (tp_ok + fp_miss) else 0 + recall = tp_ok / total_tp if total_tp else 0 + console.print(f"True Positives: [green]{tp_ok}[/]/{total_tp} kept ({tp_miss} missed)") + console.print(f"False Positives: [red]{fp_ok}[/]/{total_fp} filtered ({fp_miss} slipped)") + console.print(f"Precision: [bold]{precision:.1%}[/] Recall: [bold]{recall:.1%}[/]") + + elif unrated: + drafts = db.unrated_drafts(limit=5000) + if not drafts: + console.print("No unrated drafts to classify.") + clf.close() + return + + console.print(f"[bold]Classifying {len(drafts)} unrated drafts...[/]\n") + draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts] + relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True) + + if irrelevant: + console.print(f"\n[bold red]Irrelevant drafts ({len(irrelevant)}):[/]") + table = Table() + table.add_column("Name", style="cyan", max_width=50) + table.add_column("Title", max_width=50) + for d in irrelevant: + table.add_row(d["name"], d.get("title", "")[:50]) + console.print(table) + + if remove: + for d in irrelevant: + db.conn.execute("DELETE FROM drafts WHERE name = ?", (d["name"],)) + db.conn.commit() + console.print(f"\n[bold red]Removed {len(irrelevant)} irrelevant drafts from database[/]") + else: + console.print(f"\n[dim]Use --remove to delete these from the DB[/]") + else: + console.print("\nAll unrated drafts appear relevant.") + else: + console.print("Use --unrated or --all. See: ietf classify --help") + + clf.close() + + +# ── list ───────────────────────────────────────────────────────────────────── + + +@click.command("list") +@click.option("--limit", "-n", default=30, help="Number of drafts to show") +@click.option("--sort", "-s", default="time DESC", help="Sort order (e.g. 'time DESC', 'name ASC')") +@pass_cfg_db +def list_drafts(cfg, db, limit: int, sort: str): + """List tracked drafts.""" + drafts = db.list_drafts(limit=limit, order_by=sort) + total = db.count_drafts() + + table = Table(title=f"Tracked Drafts ({total} total, showing {len(drafts)})") + table.add_column("Date", style="dim", width=10) + table.add_column("Name", style="cyan", max_width=55) + table.add_column("Title", max_width=50) + table.add_column("Pg", justify="right", width=4) + table.add_column("Text", justify="center", width=4) + table.add_column("Rated", justify="center", width=5) + + for d in drafts: + has_text = "\u2713" if d.full_text else "" + rated = "\u2713" if db.get_rating(d.name) else "" + table.add_row(d.date, d.name, d.title[:50], str(d.pages or ""), has_text, rated) + + console.print(table) + + +# ── search ─────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("query") +@click.option("--limit", "-n", default=20, help="Max results") +@pass_cfg_db +def search(cfg, db, query: str, limit: int): + """Full-text search across stored drafts.""" + results = db.search_drafts(query, limit=limit) + if not results: + console.print(f"No results for [bold]{query}[/]") + return + + table = Table(title=f"Search: {query} ({len(results)} results)") + table.add_column("Date", style="dim", width=10) + table.add_column("Name", style="cyan") + table.add_column("Title") + + for d in results: + table.add_row(d.date, d.name, d.title[:60]) + + console.print(table) + + +# ── show ───────────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("name") +@pass_cfg_db +def show(cfg, db, name: str): + """Show detailed info for a draft.""" + from ..reports import Reporter + from ..readiness import compute_readiness + + reporter = Reporter(cfg, db) + draft = db.get_draft(name) + if draft is None: + console.print(f"[red]Draft not found: {name}[/]") + return + + rating = db.get_rating(name) + + console.print(f"\n[bold]{draft.title}[/]") + console.print(f"[dim]{draft.name}[/] rev {draft.rev} | {draft.date} | {draft.pages or '?'} pages") + console.print(f"Group: {draft.group or 'individual'} | {draft.datatracker_url}") + console.print(f"\n[italic]{draft.abstract}[/]\n") + + if rating: + console.print("[bold]AI Assessment[/]") + console.print(f" Score: [bold green]{rating.composite_score:.1f}[/]") + console.print(f" Summary: {rating.summary}\n") + + table = Table(show_header=True) + table.add_column("Dimension", width=12) + table.add_column("Score", justify="center", width=7) + table.add_column("Notes") + table.add_row("Novelty", f"{rating.novelty}/5", rating.novelty_note) + table.add_row("Maturity", f"{rating.maturity}/5", rating.maturity_note) + table.add_row("Overlap", f"{rating.overlap}/5", rating.overlap_note) + table.add_row("Momentum", f"{rating.momentum}/5", rating.momentum_note) + table.add_row("Relevance", f"{rating.relevance}/5", rating.relevance_note) + console.print(table) + + if rating.categories: + console.print(f"\nCategories: {', '.join(rating.categories)}") + else: + console.print("[dim]Not yet rated — run: ietf analyze {name}[/]") + + # Readiness score + readiness = compute_readiness(db, name) + if readiness["score"] > 0: + console.print(f"\n[bold]Standards Readiness: [cyan]{readiness['score']}/100[/][/]") + rtable = Table(show_header=True) + rtable.add_column("Factor", width=20) + rtable.add_column("Value", justify="center", width=10) + rtable.add_column("Points", justify="right", width=8) + rtable.add_column("Detail") + for key, f in readiness["factors"].items(): + rtable.add_row(f["label"], f"{f['value']:.2f}", f"+{f['contribution']}", f["detail"]) + console.print(rtable) + + # Save detailed report too + path = reporter.draft_detail(name) + if path: + console.print(f"\n[dim]Report saved: {path}[/]") + + +# ── annotate ───────────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("draft_name") +@click.option("--note", "-n", default=None, help="Set/update the note text") +@click.option("--tag", "-t", multiple=True, help="Add a tag (can be used multiple times)") +@click.option("--remove-tag", "-r", multiple=True, help="Remove a tag (can be used multiple times)") +@pass_cfg_db +def annotate(cfg, db, draft_name: str, note: str | None, tag: tuple[str, ...], remove_tag: tuple[str, ...]): + """Add or view annotations (notes & tags) for a draft.""" + draft = db.get_draft(draft_name) + if draft is None: + console.print(f"[red]Draft not found: {draft_name}[/]") + return + + # If no options, display current annotation + if note is None and not tag and not remove_tag: + ann = db.get_annotation(draft_name) + if ann: + console.print(f"\n[bold]Annotation for {draft_name}[/]") + console.print(f" Note: {ann['note'] or '(empty)'}") + console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}") + console.print(f" Updated: {ann['updated_at']}") + else: + console.print(f"[dim]No annotation for {draft_name}. Use --note or --tag to add one.[/]") + return + + # Fetch existing tags for add/remove operations + existing = db.get_annotation(draft_name) + current_tags = existing["tags"] if existing else [] + + for t in tag: + if t not in current_tags: + current_tags.append(t) + for t in remove_tag: + if t in current_tags: + current_tags.remove(t) + + db.upsert_annotation(draft_name, note=note, tags=current_tags) + ann = db.get_annotation(draft_name) + console.print(f"[green]Annotation updated for {draft_name}[/]") + console.print(f" Note: {ann['note'] or '(empty)'}") + console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}") + + +# ── authors ───────────────────────────────────────────────────────────── + + +@click.command() +@click.argument("name", required=False) +@click.option("--fetch/--no-fetch", default=False, help="Fetch author data from Datatracker first") +@click.option("--limit", "-n", default=20, help="Number of top authors to show") +@pass_cfg_db +def authors(cfg, db, name: str | None, fetch: bool, limit: int): + """Show authors for a draft, or top authors overall.""" + from ..authors import AuthorNetwork + + author_network = AuthorNetwork(cfg, db) + + if fetch: + count = author_network.fetch_all_authors() + console.print(f"Fetched authors for [bold green]{count}[/] drafts") + + if name: + draft_authors = db.get_authors_for_draft(name) + if not draft_authors: + console.print(f"[yellow]No author data for {name}. Run `ietf authors --fetch` first.[/]") + return + console.print(f"\n[bold]Authors of {name}:[/]") + for a in draft_authors: + console.print(f" - {a.name} ({a.affiliation or 'no affiliation'})") + else: + top = db.top_authors(limit=limit) + if not top: + console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]") + return + table = Table(title=f"Top {limit} Authors") + table.add_column("#", justify="right", width=4) + table.add_column("Author", style="cyan") + table.add_column("Organization") + table.add_column("Drafts", justify="right", width=6) + for rank, (aname, aff, cnt, _, _pid) in enumerate(top, 1): + table.add_row(str(rank), aname, aff, str(cnt)) + console.print(table) + + +@click.command() +@click.option("--top", "-n", default=20, help="Top N to show") +@pass_cfg_db +def network(cfg, db, top: int): + """Show author collaboration network.""" + console.print("\n[bold]Top Organizations[/]") + orgs = db.top_orgs(limit=top) + if orgs: + table = Table() + table.add_column("#", justify="right", width=4) + table.add_column("Organization", style="cyan") + table.add_column("Authors", justify="right", width=8) + table.add_column("Drafts", justify="right", width=6) + for rank, (org, auth_cnt, draft_cnt) in enumerate(orgs, 1): + table.add_row(str(rank), org, str(auth_cnt), str(draft_cnt)) + console.print(table) + + console.print("\n[bold]Cross-Org Collaboration[/]") + cross = db.cross_org_collaborations(limit=top) + if cross: + table = Table() + table.add_column("Org A", style="cyan") + table.add_column("Org B", style="cyan") + table.add_column("Shared Drafts", justify="right", width=8) + for org_a, org_b, shared in cross: + table.add_row(org_a, org_b, str(shared)) + console.print(table) + else: + console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]") diff --git a/src/ietf_analyzer/commands/proposals.py b/src/ietf_analyzer/commands/proposals.py new file mode 100644 index 0000000..2caa0c1 --- /dev/null +++ b/src/ietf_analyzer/commands/proposals.py @@ -0,0 +1,100 @@ +"""Draft generation and proposal intake commands.""" + +from __future__ import annotations + +from pathlib import Path + +import click + +from .common import console, _get_config +from ..config import Config +from ..db import Database + + +def register(main): + """Register all proposal commands with the main CLI group.""" + main.add_command(draft_gen) + main.add_command(intake) + + +# ── draft-gen ─────────────────────────────────────────────────────────── + + +@click.command("draft-gen") +@click.argument("gap_topic") +@click.option("--output", "-o", help="Output file path") +def draft_gen(gap_topic: str, output: str | None): + """Generate an Internet-Draft addressing a landscape gap.""" + from ..draftgen import DraftGenerator + from ..analyzer import Analyzer + + cfg = _get_config() + db = Database(cfg) + analyzer = Analyzer(cfg, db) + generator = DraftGenerator(cfg, db, analyzer) + + try: + out_path = output or str(Path(cfg.data_dir) / "reports" / "generated-draft.txt") + console.print(f"Generating Internet-Draft on: [bold]{gap_topic}[/]") + path = generator.generate(gap_topic, output_path=out_path) + console.print(f"\nDraft saved: [bold green]{path}[/]") + finally: + db.close() + + +# ── proposal intake ────────────────────────────────────────────────────────── + + +@click.command("intake") +@click.argument("input_text", required=False) +@click.option("--file", "-f", type=click.Path(exists=True), help="Read input from a file") +@click.option("--dry-run", is_flag=True, help="Parse and show proposals without storing") +def intake(input_text: str | None, file: str | None, dry_run: bool): + """Generate draft proposals from text/URLs. + + Paste article text, URLs, or notes. Claude analyzes against all gaps + and generates structured IETF draft proposals automatically. + + Examples: + + ietf intake "https://arxiv.org/abs/2503.18813" + + ietf intake -f notes.txt + + echo "interesting paper about agent security" | ietf intake - + """ + from ..proposal_intake import ProposalIntake + + if input_text == "-": + import sys + input_text = sys.stdin.read() + elif file: + input_text = Path(file).read_text() + elif not input_text: + # Interactive: read from stdin until EOF + console.print("[dim]Paste text/URLs, then Ctrl+D to submit:[/]") + import sys + input_text = sys.stdin.read() + + if not input_text or not input_text.strip(): + console.print("[red]No input provided.[/]") + raise SystemExit(1) + + cfg = _get_config() + db = Database(cfg) + try: + pipeline = ProposalIntake(cfg, db) + proposals, usage = pipeline.process(input_text, dry_run=dry_run) + + if proposals: + console.print(f"\n[bold green]{len(proposals)} proposal(s) generated[/]") + for p in proposals: + pid = p.get("id", "---") + gaps = ", ".join(f"#{g}" for g in p.get("gap_ids", [])) + console.print(f" [blue]#{pid}[/] {p['title']} [dim]gaps: {gaps}[/]") + if not dry_run: + console.print(f"\nView in web UI: [bold]http://localhost:5000/proposals[/]") + else: + console.print("[yellow]No proposals generated from this input.[/]") + finally: + db.close() diff --git a/src/ietf_analyzer/commands/reports.py b/src/ietf_analyzer/commands/reports.py new file mode 100644 index 0000000..da8b0da --- /dev/null +++ b/src/ietf_analyzer/commands/reports.py @@ -0,0 +1,667 @@ +"""Report generation, visualization, working group analysis, and export commands.""" + +from __future__ import annotations + +from pathlib import Path + +import click +from rich.table import Table + +from .common import console, pass_cfg_db, _get_config +from ..config import Config +from ..db import Database + + +def register(main): + """Register all report/viz/wg/export commands with the main CLI group.""" + main.add_command(report) + main.add_command(viz) + main.add_command(wg) + main.add_command(export) + + +# ── report ─────────────────────────────────────────────────────────────────── + + +@click.group() +def report(): + """Generate markdown reports.""" + pass + + +@report.command() +@pass_cfg_db +def overview(cfg, db): + """Overview table of all rated drafts.""" + from ..reports import Reporter + path = Reporter(cfg, db).overview() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command() +@pass_cfg_db +def landscape(cfg, db): + """Category-grouped landscape view.""" + from ..reports import Reporter + path = Reporter(cfg, db).landscape() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command() +@click.option("--days", "-d", default=7, help="Look back N days") +@pass_cfg_db +def digest(cfg, db, days: int): + """What's new digest.""" + from ..reports import Reporter + path = Reporter(cfg, db).digest(since_days=days) + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command() +@pass_cfg_db +def timeline(cfg, db): + """Timeline of draft submissions by month and category.""" + from ..reports import Reporter + path = Reporter(cfg, db).timeline() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("overlap-matrix") +@pass_cfg_db +def overlap_matrix(cfg, db): + """Full pairwise overlap matrix report.""" + from ..embeddings import Embedder + from ..reports import Reporter + embedder = Embedder(cfg, db) + n_drafts = len(db.all_drafts()) + console.print(f"Computing {n_drafts}x{n_drafts} similarity matrix...") + path = Reporter(cfg, db).overlap_matrix(embedder) + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("authors") +@pass_cfg_db +def authors_report(cfg, db): + """Author and organization network report.""" + from ..reports import Reporter + path = Reporter(cfg, db).authors_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("ideas") +@pass_cfg_db +def ideas_report(cfg, db): + """Report on extracted technical ideas.""" + from ..reports import Reporter + path = Reporter(cfg, db).ideas_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("refs") +@pass_cfg_db +def refs_report(cfg, db): + """Cross-reference report — which standards the ecosystem builds on.""" + from ..reports import Reporter + path = Reporter(cfg, db).refs_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("trends") +@pass_cfg_db +def trends_report(cfg, db): + """Category trend analysis report (markdown).""" + from ..reports import Reporter + path = Reporter(cfg, db).trends_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("idea-overlap") +@pass_cfg_db +def idea_overlap_report(cfg, db): + """Cross-organization idea overlap report.""" + from ..reports import Reporter + path = Reporter(cfg, db).idea_overlap_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("status") +@pass_cfg_db +def status_report(cfg, db): + """WG adoption status report.""" + from ..reports import Reporter + path = Reporter(cfg, db).status_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("revisions") +@pass_cfg_db +def revisions_report(cfg, db): + """Draft revision velocity report.""" + from ..reports import Reporter + path = Reporter(cfg, db).revisions_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("centrality") +@pass_cfg_db +def centrality_report(cfg, db): + """Author network centrality report.""" + from ..reports import Reporter + path = Reporter(cfg, db).centrality_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("co-occurrence") +@pass_cfg_db +def co_occurrence_report(cfg, db): + """Category co-occurrence matrix report.""" + from ..reports import Reporter + path = Reporter(cfg, db).co_occurrence_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("wg") +@pass_cfg_db +def wg_report(cfg, db): + """Working group analysis report — overlaps, alignment, submission targets.""" + from ..reports import Reporter + path = Reporter(cfg, db).wg_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("sources") +@pass_cfg_db +def sources_report(cfg, db): + """Cross-source comparison report — ratings and categories by standards body.""" + from ..reports import Reporter + path = Reporter(cfg, db).sources_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("false-positives") +@pass_cfg_db +def false_positives_report(cfg, db): + """False positive profiling report — what makes drafts look AI-related but not be.""" + from ..reports import Reporter + path = Reporter(cfg, db).false_positives_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("citations") +@pass_cfg_db +def citations_report(cfg, db): + """Citation influence and BCP dependency analysis.""" + from ..reports import Reporter + path = Reporter(cfg, db).citations_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("complexity") +@pass_cfg_db +def complexity_report(cfg, db): + """Draft complexity matrix: correlations between structural complexity and ratings.""" + from ..reports import Reporter + path = Reporter(cfg, db).complexity_report() + console.print(f"Report saved: [bold]{path}[/]") + + +@report.command("idea-analysis") +@pass_cfg_db +def idea_analysis_report(cfg, db): + """Idea novelty deep dive — distribution, types, top ideas, cross-draft patterns.""" + from ..reports import Reporter + path = Reporter(cfg, db).idea_analysis() + console.print(f"Report saved: [bold]{path}[/]") + + +# ── wg (working group analysis) ───────────────────────────────────────── + + +@click.group() +def wg(): + """Working group analysis — overlaps, alignment opportunities, submission targets.""" + pass + + +@wg.command("list") +@click.option("--min-drafts", default=1, help="Minimum drafts to show a WG") +@pass_cfg_db +def wg_list(cfg, db, min_drafts: int): + """List working groups with draft counts and average scores.""" + summaries = db.wg_summary() + if not summaries: + console.print("[yellow]No WG data. Run: python scripts/backfill-wg-names.py[/]") + return + + summaries = [s for s in summaries if s["draft_count"] >= min_drafts] + + table = Table(title=f"Working Groups ({len(summaries)} with >= {min_drafts} drafts)") + table.add_column("WG", style="cyan", width=12) + table.add_column("#", justify="right", width=4) + table.add_column("Ideas", justify="right", width=5) + table.add_column("Nov", justify="center", width=4) + table.add_column("Mat", justify="center", width=4) + table.add_column("Ovl", justify="center", width=4) + table.add_column("Mom", justify="center", width=4) + table.add_column("Rel", justify="center", width=4) + table.add_column("Top Categories") + + for s in summaries: + top_cats = sorted(s["categories"].items(), key=lambda x: x[1], reverse=True)[:3] + cats_str = ", ".join(f"{c}({n})" for c, n in top_cats) if top_cats else "-" + table.add_row( + s["wg"], str(s["draft_count"]), str(s["idea_count"]), + str(s["avg_novelty"]), str(s["avg_maturity"]), + str(s["avg_overlap"]), str(s["avg_momentum"]), + str(s["avg_relevance"]), cats_str, + ) + + console.print(table) + + # Also show individual submission count + indiv = db.conn.execute( + 'SELECT COUNT(*) FROM drafts WHERE "group" = \'none\' OR "group" IS NULL' + ).fetchone()[0] + console.print(f"\n[dim]Individual submissions (no WG): {indiv}[/]") + + +@wg.command("show") +@click.argument("name") +@pass_cfg_db +def wg_show(cfg, db, name: str): + """Show details for a specific working group.""" + drafts = db.wg_drafts(name) + if not drafts: + console.print(f"[red]No drafts found for WG: {name}[/]") + return + + console.print(f"\n[bold]Working Group: {name}[/] ({len(drafts)} drafts)\n") + + table = Table() + table.add_column("Date", style="dim", width=10) + table.add_column("Name", style="cyan") + table.add_column("Title", max_width=50) + table.add_column("Score", justify="right", width=6) + + for d in drafts: + rating = db.get_rating(d.name) + score = f"{rating.composite_score:.1f}" if rating else "-" + table.add_row(d.date, d.name, d.title[:50], score) + + console.print(table) + + # Show ideas for this WG + ideas = [] + for d in drafts: + ideas.extend(db.get_ideas_for_draft(d.name)) + if ideas: + console.print(f"\n[bold]Ideas ({len(ideas)}):[/]") + for idea in ideas[:15]: + console.print(f" - [cyan]{idea['title']}[/]: {idea['description'][:80]}") + if len(ideas) > 15: + console.print(f" [dim]... and {len(ideas) - 15} more[/]") + + +@wg.command("overlaps") +@click.option("--min-wgs", default=2, help="Minimum WGs sharing a category to show") +@pass_cfg_db +def wg_overlaps(cfg, db, min_wgs: int): + """Find categories and ideas that span multiple WGs — alignment opportunities.""" + # Category spread across WGs + spread = db.category_wg_spread() + multi = [s for s in spread if s["wg_count"] >= min_wgs + and not all(w["wg"] == "none" for w in s["wgs"])] + + if multi: + console.print(f"\n[bold]Categories spanning {min_wgs}+ WGs[/]\n") + for s in multi: + wg_strs = [f"{w['wg']}({w['count']})" for w in s["wgs"] if w["wg"] != "none"] + if wg_strs: + console.print(f" [cyan]{s['category']}[/] — {s['total_drafts']} drafts across {s['wg_count']} WGs") + console.print(f" WGs: {', '.join(wg_strs)}") + + # Idea overlap across WGs + idea_overlaps = db.wg_idea_overlap() + cross_wg = [o for o in idea_overlaps + if not all(w == "none" for w in o["wg_names"])] + + if cross_wg: + console.print(f"\n[bold]Ideas appearing in {min_wgs}+ WGs ({len(cross_wg)} found)[/]\n") + for o in cross_wg[:20]: + real_wgs = [w for w in o["wg_names"] if w != "none"] + console.print(f" [cyan]{o['idea_title']}[/] — WGs: {', '.join(real_wgs)}") + for entry in o["wgs"]: + if entry["wg"] != "none": + console.print(f" - [{entry['wg']}] {entry['draft_name']}") + if len(cross_wg) > 20: + console.print(f"\n [dim]... and {len(cross_wg) - 20} more[/]") + + if not multi and not cross_wg: + console.print("[yellow]No cross-WG overlaps found.[/]") + + +@wg.command("alignment") +@pass_cfg_db +def wg_alignment(cfg, db): + """Identify where individual drafts should be consolidated into WG standards.""" + # Compare individual vs WG category distribution + dist = db.individual_vs_wg_categories() + indiv = dist["individual"] + adopted = dist["wg_adopted"] + + console.print("\n[bold]Individual vs WG-Adopted Category Distribution[/]\n") + + table = Table() + table.add_column("Category", width=25) + table.add_column("Individual", justify="right", width=10) + table.add_column("WG-Adopted", justify="right", width=10) + table.add_column("Signal", width=40) + + all_cats = sorted(set(list(indiv.keys()) + list(adopted.keys()))) + for cat in all_cats: + i_count = indiv.get(cat, 0) + w_count = adopted.get(cat, 0) + signal = "" + if i_count >= 5 and w_count == 0: + signal = "[yellow]High individual activity, no WG — needs WG?[/]" + elif i_count >= 3 and w_count >= 1: + signal = "[green]WG exists, individual drafts could target it[/]" + elif w_count > i_count and i_count > 0: + signal = "[dim]WG leading, some individual work[/]" + table.add_row(cat, str(i_count), str(w_count), signal) + + console.print(table) + + # Find overlap clusters within individual submissions that might warrant a WG + console.print("\n[bold]Consolidation Candidates[/]") + console.print("[dim]Categories with many individual drafts but no WG adoption — " + "potential for new WG or BoF[/]\n") + + candidates = [] + for cat in all_cats: + i_count = indiv.get(cat, 0) + w_count = adopted.get(cat, 0) + if i_count >= 5 and w_count == 0: + candidates.append((cat, i_count)) + + if candidates: + for cat, count in sorted(candidates, key=lambda x: x[1], reverse=True): + console.print(f" [yellow]{cat}[/]: {count} individual drafts, no WG home") + # Show sample drafts + rows = db.conn.execute(""" + SELECT d.name, d.title FROM drafts d + JOIN ratings r ON d.name = r.draft_name + WHERE (d."group" = 'none' OR d."group" IS NULL) + AND r.categories LIKE ? + ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20 + + r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC + LIMIT 5 + """, (f"%{cat}%",)).fetchall() + for row in rows: + console.print(f" - {row['name']}: {row['title'][:60]}") + console.print() + else: + console.print(" [green]All active categories have WG representation.[/]") + + +@wg.command("targets") +@pass_cfg_db +def wg_targets(cfg, db): + """Suggest best WGs for submitting new work in each category.""" + spread = db.category_wg_spread() + summaries = {s["wg"]: s for s in db.wg_summary()} + + console.print("\n[bold]Recommended Submission Targets by Category[/]\n") + + for s in spread: + cat = s["category"] + # Filter to real WGs (not 'none') + real_wgs = [w for w in s["wgs"] if w["wg"] != "none"] + if not real_wgs: + console.print(f" [cyan]{cat}[/]: [yellow]No active WG — individual submission[/]") + continue + + best = real_wgs[0] + wg_info = summaries.get(best["wg"], {}) + console.print( + f" [cyan]{cat}[/]: [bold green]{best['wg']}[/] " + f"({best['count']} drafts" + f"{', avg relevance ' + str(wg_info.get('avg_relevance', '?')) if wg_info else ''})" + ) + if len(real_wgs) > 1: + alts = ", ".join(f"{w['wg']}({w['count']})" for w in real_wgs[1:3]) + console.print(f" Also: {alts}") + + console.print() + + +# ── visualize ──────────────────────────────────────────────────────────── + + +@click.group() +def viz(): + """Generate interactive visualizations (HTML/PNG).""" + pass + + +@viz.command("all") +@pass_cfg_db +def viz_all(cfg, db): + """Generate all available visualizations.""" + from ..visualize import Visualizer + v = Visualizer(cfg, db) + paths = v.generate_all() + console.print(f"\n[bold green]{len(paths)} visualizations[/] saved to {v.output_dir}/") + + +@viz.command("landscape") +@click.option("--method", "-m", default="tsne", type=click.Choice(["umap", "tsne"]), + help="Dimensionality reduction method") +@pass_cfg_db +def viz_landscape(cfg, db, method: str): + """2D scatter of draft embeddings colored by category.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).landscape_scatter(method=method) + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("heatmap") +@pass_cfg_db +def viz_heatmap(cfg, db): + """Clustered similarity heatmap (PNG).""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).similarity_heatmap() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("distributions") +@pass_cfg_db +def viz_distributions(cfg, db): + """Rating dimension distributions by category (PNG).""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).score_distributions() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("timeline") +@pass_cfg_db +def viz_timeline(cfg, db): + """Stacked area chart of monthly submissions.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).timeline_chart() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("bubble") +@pass_cfg_db +def viz_bubble(cfg, db): + """Interactive bubble chart: novelty vs maturity.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).bubble_explorer() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("radar") +@pass_cfg_db +def viz_radar(cfg, db): + """Radar chart of average category rating profiles.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).category_radar() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("network") +@click.option("--min-shared", "-n", default=2, help="Minimum shared drafts for an edge") +@pass_cfg_db +def viz_network(cfg, db, min_shared: int): + """Interactive author collaboration network graph.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).author_network(min_shared=min_shared) + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("treemap") +@pass_cfg_db +def viz_treemap(cfg, db): + """Category treemap colored by average score.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).category_treemap() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("quality") +@pass_cfg_db +def viz_quality(cfg, db): + """Score vs uniqueness scatter (quality vs redundancy).""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).score_vs_overlap() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("orgs") +@pass_cfg_db +def viz_orgs(cfg, db): + """Organization contribution bar chart.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).org_contributions() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("ideas") +@pass_cfg_db +def viz_ideas(cfg, db): + """Ideas frequency chart by type.""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).ideas_chart() + console.print(f"Saved: [bold]{path}[/]") + + +@viz.command("browser") +@pass_cfg_db +def viz_browser(cfg, db): + """Interactive filterable draft browser (standalone HTML).""" + from ..visualize import Visualizer + path = Visualizer(cfg, db).draft_browser() + console.print(f"Saved: [bold]{path}[/]") + + +# ── export ────────────────────────────────────────────────────────────────── + + +@click.command() +@click.option("--type", "export_type", type=click.Choice(["drafts", "ideas", "gaps", "authors", "ratings"]), + required=True, help="Type of data to export") +@click.option("--format", "fmt", type=click.Choice(["json", "csv"]), default="json", help="Output format") +@click.option("--output", "-o", "output_file", type=click.Path(), default=None, + help="Output file (default: stdout)") +def export(export_type: str, fmt: str, output_file: str | None): + """Export data as JSON or CSV.""" + import csv as csv_mod + import io + import json + + cfg = _get_config() + db = Database(cfg) + + try: + rows: list[dict] = [] + + if export_type == "drafts": + drafts = db.list_drafts(limit=10000, order_by="name ASC") + for d in drafts: + rating = db.get_rating(d.name) + row = { + "name": d.name, + "title": d.title, + "rev": d.rev, + "date": d.date, + "pages": d.pages or 0, + "group": d.group or "", + } + if rating: + row["score"] = round(rating.composite_score, 2) + row["novelty"] = rating.novelty + row["maturity"] = rating.maturity + row["overlap"] = rating.overlap + row["momentum"] = rating.momentum + row["relevance"] = rating.relevance + row["categories"] = json.dumps(rating.categories) + row["summary"] = rating.summary + rows.append(row) + + elif export_type == "ideas": + ideas = db.all_ideas() + rows = ideas + + elif export_type == "gaps": + gaps = db.all_gaps() + rows = gaps + + elif export_type == "authors": + top = db.top_authors(limit=10000) + for name, aff, cnt, drafts_list, _pid in top: + rows.append({ + "name": name, + "affiliation": aff, + "draft_count": cnt, + "drafts": json.dumps(drafts_list), + }) + + elif export_type == "ratings": + pairs = db.drafts_with_ratings(limit=10000) + for draft, rating in pairs: + rows.append({ + "name": draft.name, + "title": draft.title, + "score": round(rating.composite_score, 2), + "novelty": rating.novelty, + "maturity": rating.maturity, + "overlap": rating.overlap, + "momentum": rating.momentum, + "relevance": rating.relevance, + "categories": json.dumps(rating.categories), + "summary": rating.summary, + }) + + if fmt == "json": + text = json.dumps(rows, indent=2, ensure_ascii=False) + else: + # CSV + if not rows: + text = "" + else: + si = io.StringIO() + writer = csv_mod.DictWriter(si, fieldnames=rows[0].keys()) + writer.writeheader() + for row in rows: + writer.writerow(row) + text = si.getvalue() + + if output_file: + Path(output_file).write_text(text, encoding="utf-8") + console.print(f"Exported [bold green]{len(rows)}[/] {export_type} to [cyan]{output_file}[/] ({fmt})") + else: + click.echo(text) + + finally: + db.close()