Idea quality pipeline, web UI features, academic paper

- Tighten idea extraction prompts (1-4 ideas, no sub-features) reducing 1,907 ideas to 468 across 434 drafts (78% reduction) - Add embedding-based dedup (ietf dedup-ideas) for same-draft similarity - Add novelty scoring (ietf ideas score) and filtering (ietf ideas filter) using Claude to rate ideas 1-5, removing 49 generic building blocks - Final count: 419 high-quality ideas (avg 1.1/draft) - Web UI: gap explorer with live draft generation and pre-generated demos - Web UI: D3.js author collaboration network (498 nodes, 1142 edges, 68 clusters, org filtering, interactive zoom/pan) - Academic paper: 15-page LaTeX workshop paper analyzing the 434-draft AI agent standards landscape - Save improvement ideas backlog to data/reports/improvement-ideas.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 22:17:57 +01:00
parent 3c3d7e649f
commit 6e3a387778
29 changed files with 6575 additions and 240 deletions
--- a/src/ietf_analyzer/cli.py
+++ b/src/ietf_analyzer/cli.py
@@ -256,6 +256,60 @@ def embed():
        db.close()


+# ── embed-ideas ──────────────────────────────────────────────────────────────
+
+
+@main.command("embed-ideas")
+@click.option("--limit", default=0, help="Max ideas to embed (0=all)")
+@click.option("--batch-size", default=50, help="Batch size for Ollama")
+def embed_ideas(limit: int, batch_size: int):
+    """Generate embeddings for extracted ideas via Ollama."""
+    import ollama as ollama_lib
+    from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
+
+    cfg = _get_config()
+    db = Database(cfg)
+    client = ollama_lib.Client(host=cfg.ollama_url)
+
+    try:
+        missing = db.ideas_without_embeddings(limit=limit if limit > 0 else 10000)
+        if not missing:
+            console.print("All ideas already have embeddings.")
+            return
+
+        total = len(missing)
+        console.print(f"Embedding [bold]{total}[/] ideas in batches of {batch_size}...")
+
+        count = 0
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            BarColumn(),
+            MofNCompleteColumn(),
+            console=console,
+        ) as progress:
+            task = progress.add_task("Embedding ideas...", total=total)
+            for start in range(0, total, batch_size):
+                batch = missing[start:start + batch_size]
+                texts = [f"{idea['title']}. {idea['description']}" for idea in batch]
+                try:
+                    resp = client.embed(model=cfg.ollama_embed_model, input=texts)
+                    for i, idea in enumerate(batch):
+                        import numpy as np
+                        vec = np.array(resp["embeddings"][i], dtype=np.float32)
+                        db.store_idea_embedding(idea["id"], cfg.ollama_embed_model, vec)
+                        count += 1
+                        progress.advance(task)
+                except Exception as e:
+                    console.print(f"[red]Batch failed: {e}[/]")
+                    for _ in batch:
+                        progress.advance(task)
+
+        console.print(f"Embedded [bold green]{count}[/] ideas")
+    finally:
+        db.close()
+
+
 # ── similar ──────────────────────────────────────────────────────────────────


@@ -531,6 +585,261 @@ def co_occurrence_report():
        db.close()


+@report.command("wg")
+def wg_report():
+    """Working group analysis report — overlaps, alignment, submission targets."""
+    from .reports import Reporter
+    cfg = _get_config()
+    db = Database(cfg)
+    reporter = Reporter(cfg, db)
+    try:
+        path = reporter.wg_report()
+        console.print(f"Report saved: [bold]{path}[/]")
+    finally:
+        db.close()
+
+
+# ── wg (working group analysis) ─────────────────────────────────────────
+
+
+@main.group()
+def wg():
+    """Working group analysis — overlaps, alignment opportunities, submission targets."""
+    pass
+
+
+@wg.command("list")
+@click.option("--min-drafts", default=1, help="Minimum drafts to show a WG")
+def wg_list(min_drafts: int):
+    """List working groups with draft counts and average scores."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        summaries = db.wg_summary()
+        if not summaries:
+            console.print("[yellow]No WG data. Run: python scripts/backfill-wg-names.py[/]")
+            return
+
+        summaries = [s for s in summaries if s["draft_count"] >= min_drafts]
+
+        table = Table(title=f"Working Groups ({len(summaries)} with >= {min_drafts} drafts)")
+        table.add_column("WG", style="cyan", width=12)
+        table.add_column("#", justify="right", width=4)
+        table.add_column("Ideas", justify="right", width=5)
+        table.add_column("Nov", justify="center", width=4)
+        table.add_column("Mat", justify="center", width=4)
+        table.add_column("Ovl", justify="center", width=4)
+        table.add_column("Mom", justify="center", width=4)
+        table.add_column("Rel", justify="center", width=4)
+        table.add_column("Top Categories")
+
+        for s in summaries:
+            top_cats = sorted(s["categories"].items(), key=lambda x: x[1], reverse=True)[:3]
+            cats_str = ", ".join(f"{c}({n})" for c, n in top_cats) if top_cats else "-"
+            table.add_row(
+                s["wg"], str(s["draft_count"]), str(s["idea_count"]),
+                str(s["avg_novelty"]), str(s["avg_maturity"]),
+                str(s["avg_overlap"]), str(s["avg_momentum"]),
+                str(s["avg_relevance"]), cats_str,
+            )
+
+        console.print(table)
+
+        # Also show individual submission count
+        indiv = db.conn.execute(
+            'SELECT COUNT(*) FROM drafts WHERE "group" = \'none\' OR "group" IS NULL'
+        ).fetchone()[0]
+        console.print(f"\n[dim]Individual submissions (no WG): {indiv}[/]")
+    finally:
+        db.close()
+
+
+@wg.command("show")
+@click.argument("name")
+def wg_show(name: str):
+    """Show details for a specific working group."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        drafts = db.wg_drafts(name)
+        if not drafts:
+            console.print(f"[red]No drafts found for WG: {name}[/]")
+            return
+
+        console.print(f"\n[bold]Working Group: {name}[/] ({len(drafts)} drafts)\n")
+
+        table = Table()
+        table.add_column("Date", style="dim", width=10)
+        table.add_column("Name", style="cyan")
+        table.add_column("Title", max_width=50)
+        table.add_column("Score", justify="right", width=6)
+
+        for d in drafts:
+            rating = db.get_rating(d.name)
+            score = f"{rating.composite_score:.1f}" if rating else "-"
+            table.add_row(d.date, d.name, d.title[:50], score)
+
+        console.print(table)
+
+        # Show ideas for this WG
+        ideas = []
+        for d in drafts:
+            ideas.extend(db.get_ideas_for_draft(d.name))
+        if ideas:
+            console.print(f"\n[bold]Ideas ({len(ideas)}):[/]")
+            for idea in ideas[:15]:
+                console.print(f"  - [cyan]{idea['title']}[/]: {idea['description'][:80]}")
+            if len(ideas) > 15:
+                console.print(f"  [dim]... and {len(ideas) - 15} more[/]")
+    finally:
+        db.close()
+
+
+@wg.command("overlaps")
+@click.option("--min-wgs", default=2, help="Minimum WGs sharing a category to show")
+def wg_overlaps(min_wgs: int):
+    """Find categories and ideas that span multiple WGs — alignment opportunities."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        # Category spread across WGs
+        spread = db.category_wg_spread()
+        multi = [s for s in spread if s["wg_count"] >= min_wgs
+                 and not all(w["wg"] == "none" for w in s["wgs"])]
+
+        if multi:
+            console.print(f"\n[bold]Categories spanning {min_wgs}+ WGs[/]\n")
+            for s in multi:
+                wg_strs = [f"{w['wg']}({w['count']})" for w in s["wgs"] if w["wg"] != "none"]
+                if wg_strs:
+                    console.print(f"  [cyan]{s['category']}[/] — {s['total_drafts']} drafts across {s['wg_count']} WGs")
+                    console.print(f"    WGs: {', '.join(wg_strs)}")
+
+        # Idea overlap across WGs
+        idea_overlaps = db.wg_idea_overlap()
+        cross_wg = [o for o in idea_overlaps
+                    if not all(w == "none" for w in o["wg_names"])]
+
+        if cross_wg:
+            console.print(f"\n[bold]Ideas appearing in {min_wgs}+ WGs ({len(cross_wg)} found)[/]\n")
+            for o in cross_wg[:20]:
+                real_wgs = [w for w in o["wg_names"] if w != "none"]
+                console.print(f"  [cyan]{o['idea_title']}[/] — WGs: {', '.join(real_wgs)}")
+                for entry in o["wgs"]:
+                    if entry["wg"] != "none":
+                        console.print(f"    - [{entry['wg']}] {entry['draft_name']}")
+            if len(cross_wg) > 20:
+                console.print(f"\n  [dim]... and {len(cross_wg) - 20} more[/]")
+
+        if not multi and not cross_wg:
+            console.print("[yellow]No cross-WG overlaps found.[/]")
+    finally:
+        db.close()
+
+
+@wg.command("alignment")
+def wg_alignment():
+    """Identify where individual drafts should be consolidated into WG standards."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        # Compare individual vs WG category distribution
+        dist = db.individual_vs_wg_categories()
+        indiv = dist["individual"]
+        adopted = dist["wg_adopted"]
+
+        console.print("\n[bold]Individual vs WG-Adopted Category Distribution[/]\n")
+
+        table = Table()
+        table.add_column("Category", width=25)
+        table.add_column("Individual", justify="right", width=10)
+        table.add_column("WG-Adopted", justify="right", width=10)
+        table.add_column("Signal", width=40)
+
+        all_cats = sorted(set(list(indiv.keys()) + list(adopted.keys())))
+        for cat in all_cats:
+            i_count = indiv.get(cat, 0)
+            w_count = adopted.get(cat, 0)
+            signal = ""
+            if i_count >= 5 and w_count == 0:
+                signal = "[yellow]High individual activity, no WG — needs WG?[/]"
+            elif i_count >= 3 and w_count >= 1:
+                signal = "[green]WG exists, individual drafts could target it[/]"
+            elif w_count > i_count and i_count > 0:
+                signal = "[dim]WG leading, some individual work[/]"
+            table.add_row(cat, str(i_count), str(w_count), signal)
+
+        console.print(table)
+
+        # Find overlap clusters within individual submissions that might warrant a WG
+        console.print("\n[bold]Consolidation Candidates[/]")
+        console.print("[dim]Categories with many individual drafts but no WG adoption — "
+                      "potential for new WG or BoF[/]\n")
+
+        candidates = []
+        for cat in all_cats:
+            i_count = indiv.get(cat, 0)
+            w_count = adopted.get(cat, 0)
+            if i_count >= 5 and w_count == 0:
+                candidates.append((cat, i_count))
+
+        if candidates:
+            for cat, count in sorted(candidates, key=lambda x: x[1], reverse=True):
+                console.print(f"  [yellow]{cat}[/]: {count} individual drafts, no WG home")
+                # Show sample drafts
+                rows = db.conn.execute("""
+                    SELECT d.name, d.title FROM drafts d
+                    JOIN ratings r ON d.name = r.draft_name
+                    WHERE (d."group" = 'none' OR d."group" IS NULL)
+                      AND r.categories LIKE ?
+                    ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20
+                              + r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC
+                    LIMIT 5
+                """, (f"%{cat}%",)).fetchall()
+                for row in rows:
+                    console.print(f"    - {row['name']}: {row['title'][:60]}")
+                console.print()
+        else:
+            console.print("  [green]All active categories have WG representation.[/]")
+    finally:
+        db.close()
+
+
+@wg.command("targets")
+def wg_targets():
+    """Suggest best WGs for submitting new work in each category."""
+    cfg = _get_config()
+    db = Database(cfg)
+    try:
+        spread = db.category_wg_spread()
+        summaries = {s["wg"]: s for s in db.wg_summary()}
+
+        console.print("\n[bold]Recommended Submission Targets by Category[/]\n")
+
+        for s in spread:
+            cat = s["category"]
+            # Filter to real WGs (not 'none')
+            real_wgs = [w for w in s["wgs"] if w["wg"] != "none"]
+            if not real_wgs:
+                console.print(f"  [cyan]{cat}[/]: [yellow]No active WG — individual submission[/]")
+                continue
+
+            best = real_wgs[0]
+            wg_info = summaries.get(best["wg"], {})
+            console.print(
+                f"  [cyan]{cat}[/]: [bold green]{best['wg']}[/] "
+                f"({best['count']} drafts"
+                f"{', avg relevance ' + str(wg_info.get('avg_relevance', '?')) if wg_info else ''})"
+            )
+            if len(real_wgs) > 1:
+                alts = ", ".join(f"{w['wg']}({w['count']})" for w in real_wgs[1:3])
+                console.print(f"    Also: {alts}")
+
+        console.print()
+    finally:
+        db.close()
+
+
 # ── visualize ────────────────────────────────────────────────────────────


@@ -808,14 +1117,21 @@ def network(top: int):
 # ── ideas ───────────────────────────────────────────────────────────────


-@main.command()
-@click.argument("name", required=False)
+@main.group(invoke_without_command=True)
+@click.option("--name", default=None, help="Extract ideas from a specific draft")
@click.option("--all", "extract_all", is_flag=True, help="Extract ideas from all drafts")
@click.option("--limit", "-n", default=50, help="Max drafts to extract (with --all)")
@click.option("--batch", "-b", default=5, help="Drafts per API call (default 5, set 1 for individual)")
@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)")
-def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool):
-    """Extract technical ideas from drafts using Claude."""
+@click.option("--reextract", is_flag=True, help="Clear existing ideas and re-extract with current prompt")
+@click.option("--draft", "reextract_draft", default=None, help="Specific draft to re-extract (with --reextract)")
+@click.pass_context
+def ideas(ctx, name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool,
+          reextract: bool, reextract_draft: str | None):
+    """Extract, score, and filter technical ideas from drafts."""
+    if ctx.invoked_subcommand is not None:
+        return
+
    from .analyzer import Analyzer

    cfg = _get_config()
@@ -823,7 +1139,24 @@ def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bo
    analyzer = Analyzer(cfg, db)

    try:
-        if extract_all:
+        if reextract:
+            # Clear existing ideas, then re-extract
+            deleted = db.delete_ideas(draft_name=reextract_draft)
+            if reextract_draft:
+                console.print(f"Cleared [bold]{deleted}[/] ideas for {reextract_draft}")
+                idea_list = analyzer.extract_ideas(reextract_draft, use_cache=True)
+                if idea_list:
+                    console.print(f"Re-extracted [bold green]{len(idea_list)}[/] ideas:")
+                    for idea in idea_list:
+                        console.print(f"  [{idea.get('type', '?')}] [bold]{idea['title']}[/]")
+                        console.print(f"    {idea['description']}\n")
+                else:
+                    console.print("[red]Re-extraction failed or no ideas found[/]")
+            else:
+                console.print(f"Cleared [bold]{deleted}[/] ideas from all drafts")
+                count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap)
+                console.print(f"Re-extracted ideas from [bold green]{count}[/] drafts")
+        elif extract_all:
            count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap)
            console.print(f"Extracted ideas from [bold green]{count}[/] drafts")
        elif name:
@@ -836,7 +1169,166 @@ def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bo
            else:
                console.print("[red]Extraction failed or no ideas found[/]")
        else:
-            console.print("Provide a draft name or use --all")
+            console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter")
+    finally:
+        db.close()
+
+
+@ideas.command("score")
+@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)")
+@click.option("--batch", "-b", default=20, help="Ideas per API call (default 20)")
+def ideas_score(cheap: bool, batch: int):
+    """Score ideas for novelty (1=generic, 5=genuinely novel)."""
+    from .analyzer import Analyzer
+
+    cfg = _get_config()
+    db = Database(cfg)
+    analyzer = Analyzer(cfg, db)
+
+    try:
+        stats = analyzer.score_idea_novelty(batch_size=batch, cheap=cheap)
+
+        if stats["scored_count"] == 0:
+            return
+
+        # Show distribution table
+        dist = db.idea_score_distribution()
+        table = Table(title="Novelty Score Distribution")
+        table.add_column("Score", style="bold", justify="center")
+        table.add_column("Label", style="dim")
+        table.add_column("Count", justify="right")
+        table.add_column("Bar", min_width=30)
+
+        labels = {
+            1: "Generic building block",
+            2: "Obvious extension",
+            3: "Useful but expected",
+            4: "Interesting contribution",
+            5: "Genuinely novel",
+        }
+        max_count = max(dist.values()) if dist else 1
+        for score in range(1, 6):
+            count = dist.get(score, 0)
+            bar_len = int(30 * count / max_count) if max_count > 0 else 0
+            table.add_row(
+                str(score), labels[score], str(count),
+                "[green]" + "#" * bar_len + "[/]"
+            )
+
+        total = sum(dist.values())
+        unscored = db.idea_count() - total
+        console.print(table)
+        console.print(f"\nTotal scored: [bold]{total}[/] | Unscored: {unscored} | Avg: [bold]{stats['avg_score']:.1f}[/]")
+    finally:
+        db.close()
+
+
+@ideas.command("filter")
+@click.option("--min-score", "-m", default=2, help="Remove ideas below this score (default 2)")
+@click.option("--dry-run/--execute", default=True, help="Preview (default) or actually delete")
+def ideas_filter(min_score: int, dry_run: bool):
+    """Filter out low-novelty ideas by score threshold."""
+    cfg = _get_config()
+    db = Database(cfg)
+
+    try:
+        candidates = db.ideas_below_score(min_score)
+        if not candidates:
+            console.print(f"No ideas with novelty_score < {min_score}.")
+            return
+
+        # Show what would be removed
+        table = Table(
+            title=f"Ideas with novelty_score < {min_score} "
+                  f"({'DRY RUN' if dry_run else 'WILL DELETE'})"
+        )
+        table.add_column("Score", style="bold", justify="center")
+        table.add_column("Idea", style="cyan", max_width=40)
+        table.add_column("Draft", max_width=50)
+        table.add_column("Description", max_width=60)
+
+        for idea in candidates[:50]:  # Show first 50
+            table.add_row(
+                str(idea["novelty_score"]),
+                idea["title"],
+                idea["draft_title"],
+                idea["description"][:60] + ("..." if len(idea["description"]) > 60 else ""),
+            )
+
+        console.print(table)
+
+        if len(candidates) > 50:
+            console.print(f"  ... and {len(candidates) - 50} more")
+
+        console.print(f"\nTotal to remove: [bold red]{len(candidates)}[/] / {db.idea_count()} ideas")
+
+        if not dry_run:
+            deleted = db.delete_low_score_ideas(min_score)
+            console.print(f"[bold red]Deleted {deleted} low-novelty ideas.[/]")
+            console.print(f"Remaining ideas: [bold green]{db.idea_count()}[/]")
+        else:
+            console.print("[dim]Use --execute to actually delete.[/]")
+    finally:
+        db.close()
+
+
+# ── dedup-ideas ─────────────────────────────────────────────────────────
+
+
+@main.command("dedup-ideas")
+@click.option("--threshold", "-t", default=0.85, type=float,
+              help="Cosine similarity threshold for merging (default 0.85)")
+@click.option("--dry-run/--execute", default=True,
+              help="Preview merges (default) vs actually delete duplicates")
+@click.option("--draft", "draft_name", default=None,
+              help="Limit to a single draft name")
+def dedup_ideas(threshold: float, dry_run: bool, draft_name: str | None):
+    """Deduplicate similar ideas within each draft using embedding similarity."""
+    from .analyzer import Analyzer
+
+    cfg = _get_config()
+    db = Database(cfg)
+    analyzer = Analyzer(cfg, db)
+
+    try:
+        mode = "[bold yellow]DRY RUN[/]" if dry_run else "[bold red]EXECUTE[/]"
+        console.print(f"\n{mode} — Deduplicating ideas (threshold={threshold})")
+        if draft_name:
+            console.print(f"Limiting to draft: [bold]{draft_name}[/]")
+        console.print()
+
+        result = analyzer.dedup_ideas(
+            threshold=threshold, dry_run=dry_run, draft_name=draft_name
+        )
+
+        if result["examples"]:
+            table = Table(title="Merge Candidates" if dry_run else "Merged Ideas")
+            table.add_column("Draft", style="dim", max_width=40)
+            table.add_column("Keep", style="green")
+            table.add_column("Drop", style="red")
+            table.add_column("Similarity", justify="right")
+
+            for ex in result["examples"]:
+                table.add_row(
+                    ex["draft"].split("/")[-1][:40],
+                    ex["keep"],
+                    ex["drop"],
+                    f"{ex['similarity']:.3f}",
+                )
+            console.print(table)
+            console.print()
+
+        action = "Would remove" if dry_run else "Removed"
+        console.print(
+            f"Ideas before: [bold]{result['total_before']}[/]  |  "
+            f"{action}: [bold]{result['merged_count']}[/]  |  "
+            f"After: [bold]{result['total_after']}[/]"
+        )
+
+        if dry_run and result["merged_count"] > 0:
+            console.print(
+                "\n[dim]Run with --execute to apply these merges.[/]"
+            )
    finally:
        db.close()

@@ -2024,3 +2516,163 @@ def observatory_diff(since: str | None):
                console.print(f"    [{d.get('source', '?')}] {d.get('name', '?')}: {d.get('title', '')[:60]}")
    finally:
        db.close()
+
+
+# ── monitor ─────────────────────────────────────────────────────────────
+
+
+@main.group()
+def monitor():
+    """Monitor IETF Datatracker for new AI/agent drafts."""
+    pass
+
+
+@monitor.command("run")
+@click.option("--analyze/--no-analyze", default=True, help="Analyze new drafts")
+@click.option("--embed/--no-embed", default=True, help="Generate embeddings")
+@click.option("--ideas/--no-ideas", default=True, help="Extract ideas")
+def monitor_run(analyze, embed, ideas):
+    """Run one monitoring cycle: fetch -> analyze -> embed -> ideas."""
+    from .analyzer import Analyzer
+    from .embeddings import Embedder
+    from .fetcher import Fetcher
+
+    cfg = _get_config()
+    db = Database(cfg)
+    run_id = db.start_monitor_run()
+    stats = {
+        "new_drafts_found": 0,
+        "drafts_analyzed": 0,
+        "drafts_embedded": 0,
+        "ideas_extracted": 0,
+    }
+
+    try:
+        console.print("[bold]Monitor run started[/]")
+
+        # Determine since date from last successful run
+        last_run = db.get_last_successful_run()
+        since = last_run["completed_at"][:10] if last_run and last_run.get("completed_at") else cfg.fetch_since
+        console.print(f"  Fetching drafts since: [cyan]{since}[/]")
+
+        # Fetch new drafts
+        fetcher = Fetcher(cfg)
+        try:
+            existing_count = db.count_drafts()
+            drafts = fetcher.search_drafts(keywords=list(cfg.search_keywords), since=since)
+            for draft in drafts:
+                db.upsert_draft(draft)
+
+            # Download text for any missing
+            missing_text = db.drafts_without_text()
+            if missing_text:
+                console.print(f"  Downloading text for [bold]{len(missing_text)}[/] drafts...")
+                texts = fetcher.download_texts(missing_text)
+                for name, text in texts.items():
+                    draft = db.get_draft(name)
+                    if draft:
+                        draft.full_text = text
+                        db.upsert_draft(draft)
+        finally:
+            fetcher.close()
+
+        new_count = db.count_drafts() - existing_count
+        stats["new_drafts_found"] = max(new_count, 0)
+        console.print(f"  New drafts found: [bold green]{stats['new_drafts_found']}[/]")
+
+        # Analyze unrated drafts
+        if analyze:
+            unrated = db.unrated_drafts(limit=200)
+            if unrated:
+                console.print(f"  Analyzing [bold]{len(unrated)}[/] unrated drafts...")
+                analyzer = Analyzer(cfg, db)
+                count = analyzer.rate_all_unrated(limit=200)
+                stats["drafts_analyzed"] = count
+                console.print(f"  Analyzed: [bold green]{count}[/]")
+
+        # Embed missing drafts
+        if embed:
+            missing_embed = db.drafts_without_embeddings(limit=500)
+            if missing_embed:
+                console.print(f"  Embedding [bold]{len(missing_embed)}[/] drafts...")
+                embedder = Embedder(cfg, db)
+                count = embedder.embed_all_missing()
+                stats["drafts_embedded"] = count
+                console.print(f"  Embedded: [bold green]{count}[/]")
+
+        # Extract ideas
+        if ideas:
+            missing_ideas = db.drafts_without_ideas(limit=500)
+            if missing_ideas:
+                console.print(f"  Extracting ideas from [bold]{len(missing_ideas)}[/] drafts...")
+                analyzer = Analyzer(cfg, db)
+                count = analyzer.extract_all_ideas(limit=500, batch_size=5, cheap=True)
+                stats["ideas_extracted"] = count
+                console.print(f"  Ideas extracted from: [bold green]{count}[/] drafts")
+
+        db.complete_monitor_run(run_id, stats)
+        console.print("\n[bold green]Monitor run completed successfully[/]")
+
+    except Exception as e:
+        db.fail_monitor_run(run_id, str(e))
+        console.print(f"\n[bold red]Monitor run failed:[/] {e}")
+        raise
+    finally:
+        db.close()
+
+
+@monitor.command("status")
+def monitor_status():
+    """Show monitoring status and recent runs."""
+    cfg = _get_config()
+    db = Database(cfg)
+
+    try:
+        runs = db.get_monitor_runs(limit=20)
+        last = db.get_last_successful_run()
+
+        # Unprocessed counts
+        unrated = len(db.unrated_drafts(limit=9999))
+        unembedded = len(db.drafts_without_embeddings(limit=9999))
+        no_ideas = len(db.drafts_without_ideas(limit=9999))
+
+        console.print("\n[bold]Monitor Status[/]\n")
+
+        if last:
+            console.print(f"  Last successful run: [green]{last['completed_at']}[/]")
+            console.print(f"  Duration: {last['duration_seconds']:.1f}s")
+            console.print(f"  New drafts: {last['new_drafts_found']}")
+        else:
+            console.print("  [yellow]No successful runs yet[/]")
+
+        console.print(f"\n[bold]Unprocessed[/]")
+        console.print(f"  Unrated:    [{'yellow' if unrated > 0 else 'green'}]{unrated}[/]")
+        console.print(f"  Unembedded: [{'yellow' if unembedded > 0 else 'green'}]{unembedded}[/]")
+        console.print(f"  No ideas:   [{'yellow' if no_ideas > 0 else 'green'}]{no_ideas}[/]")
+
+        if runs:
+            console.print(f"\n[bold]Recent Runs[/] ({len(runs)} total)\n")
+            table = Table()
+            table.add_column("#", justify="right", width=4)
+            table.add_column("Started", width=20)
+            table.add_column("Duration", justify="right", width=8)
+            table.add_column("Status", width=10)
+            table.add_column("New", justify="right", width=5)
+            table.add_column("Analyzed", justify="right", width=8)
+            table.add_column("Embedded", justify="right", width=8)
+            table.add_column("Ideas", justify="right", width=6)
+            for r in runs:
+                status_style = {"completed": "green", "failed": "red", "running": "yellow"}.get(r["status"], "dim")
+                table.add_row(
+                    str(r["id"]),
+                    r["started_at"][:19] if r["started_at"] else "",
+                    f"{r['duration_seconds']:.1f}s" if r["duration_seconds"] else "-",
+                    f"[{status_style}]{r['status']}[/{status_style}]",
+                    str(r["new_drafts_found"]),
+                    str(r["drafts_analyzed"]),
+                    str(r["drafts_embedded"]),
+                    str(r["ideas_extracted"]),
+                )
+            console.print(table)
+    finally:
+        db.close()