Idea quality pipeline, web UI features, academic paper
- Tighten idea extraction prompts (1-4 ideas, no sub-features) reducing 1,907 ideas to 468 across 434 drafts (78% reduction) - Add embedding-based dedup (ietf dedup-ideas) for same-draft similarity - Add novelty scoring (ietf ideas score) and filtering (ietf ideas filter) using Claude to rate ideas 1-5, removing 49 generic building blocks - Final count: 419 high-quality ideas (avg 1.1/draft) - Web UI: gap explorer with live draft generation and pre-generated demos - Web UI: D3.js author collaboration network (498 nodes, 1142 edges, 68 clusters, org filtering, interactive zoom/pan) - Academic paper: 15-page LaTeX workshop paper analyzing the 434-draft AI agent standards landscape - Save improvement ideas backlog to data/reports/improvement-ideas.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -256,6 +256,60 @@ def embed():
|
||||
db.close()
|
||||
|
||||
|
||||
# ── embed-ideas ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command("embed-ideas")
|
||||
@click.option("--limit", default=0, help="Max ideas to embed (0=all)")
|
||||
@click.option("--batch-size", default=50, help="Batch size for Ollama")
|
||||
def embed_ideas(limit: int, batch_size: int):
|
||||
"""Generate embeddings for extracted ideas via Ollama."""
|
||||
import ollama as ollama_lib
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, MofNCompleteColumn
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
client = ollama_lib.Client(host=cfg.ollama_url)
|
||||
|
||||
try:
|
||||
missing = db.ideas_without_embeddings(limit=limit if limit > 0 else 10000)
|
||||
if not missing:
|
||||
console.print("All ideas already have embeddings.")
|
||||
return
|
||||
|
||||
total = len(missing)
|
||||
console.print(f"Embedding [bold]{total}[/] ideas in batches of {batch_size}...")
|
||||
|
||||
count = 0
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
MofNCompleteColumn(),
|
||||
console=console,
|
||||
) as progress:
|
||||
task = progress.add_task("Embedding ideas...", total=total)
|
||||
for start in range(0, total, batch_size):
|
||||
batch = missing[start:start + batch_size]
|
||||
texts = [f"{idea['title']}. {idea['description']}" for idea in batch]
|
||||
try:
|
||||
resp = client.embed(model=cfg.ollama_embed_model, input=texts)
|
||||
for i, idea in enumerate(batch):
|
||||
import numpy as np
|
||||
vec = np.array(resp["embeddings"][i], dtype=np.float32)
|
||||
db.store_idea_embedding(idea["id"], cfg.ollama_embed_model, vec)
|
||||
count += 1
|
||||
progress.advance(task)
|
||||
except Exception as e:
|
||||
console.print(f"[red]Batch failed: {e}[/]")
|
||||
for _ in batch:
|
||||
progress.advance(task)
|
||||
|
||||
console.print(f"Embedded [bold green]{count}[/] ideas")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── similar ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -531,6 +585,261 @@ def co_occurrence_report():
|
||||
db.close()
|
||||
|
||||
|
||||
@report.command("wg")
|
||||
def wg_report():
|
||||
"""Working group analysis report — overlaps, alignment, submission targets."""
|
||||
from .reports import Reporter
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
reporter = Reporter(cfg, db)
|
||||
try:
|
||||
path = reporter.wg_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── wg (working group analysis) ─────────────────────────────────────────
|
||||
|
||||
|
||||
@main.group()
|
||||
def wg():
|
||||
"""Working group analysis — overlaps, alignment opportunities, submission targets."""
|
||||
pass
|
||||
|
||||
|
||||
@wg.command("list")
|
||||
@click.option("--min-drafts", default=1, help="Minimum drafts to show a WG")
|
||||
def wg_list(min_drafts: int):
|
||||
"""List working groups with draft counts and average scores."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
summaries = db.wg_summary()
|
||||
if not summaries:
|
||||
console.print("[yellow]No WG data. Run: python scripts/backfill-wg-names.py[/]")
|
||||
return
|
||||
|
||||
summaries = [s for s in summaries if s["draft_count"] >= min_drafts]
|
||||
|
||||
table = Table(title=f"Working Groups ({len(summaries)} with >= {min_drafts} drafts)")
|
||||
table.add_column("WG", style="cyan", width=12)
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Ideas", justify="right", width=5)
|
||||
table.add_column("Nov", justify="center", width=4)
|
||||
table.add_column("Mat", justify="center", width=4)
|
||||
table.add_column("Ovl", justify="center", width=4)
|
||||
table.add_column("Mom", justify="center", width=4)
|
||||
table.add_column("Rel", justify="center", width=4)
|
||||
table.add_column("Top Categories")
|
||||
|
||||
for s in summaries:
|
||||
top_cats = sorted(s["categories"].items(), key=lambda x: x[1], reverse=True)[:3]
|
||||
cats_str = ", ".join(f"{c}({n})" for c, n in top_cats) if top_cats else "-"
|
||||
table.add_row(
|
||||
s["wg"], str(s["draft_count"]), str(s["idea_count"]),
|
||||
str(s["avg_novelty"]), str(s["avg_maturity"]),
|
||||
str(s["avg_overlap"]), str(s["avg_momentum"]),
|
||||
str(s["avg_relevance"]), cats_str,
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Also show individual submission count
|
||||
indiv = db.conn.execute(
|
||||
'SELECT COUNT(*) FROM drafts WHERE "group" = \'none\' OR "group" IS NULL'
|
||||
).fetchone()[0]
|
||||
console.print(f"\n[dim]Individual submissions (no WG): {indiv}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@wg.command("show")
|
||||
@click.argument("name")
|
||||
def wg_show(name: str):
|
||||
"""Show details for a specific working group."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
drafts = db.wg_drafts(name)
|
||||
if not drafts:
|
||||
console.print(f"[red]No drafts found for WG: {name}[/]")
|
||||
return
|
||||
|
||||
console.print(f"\n[bold]Working Group: {name}[/] ({len(drafts)} drafts)\n")
|
||||
|
||||
table = Table()
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Title", max_width=50)
|
||||
table.add_column("Score", justify="right", width=6)
|
||||
|
||||
for d in drafts:
|
||||
rating = db.get_rating(d.name)
|
||||
score = f"{rating.composite_score:.1f}" if rating else "-"
|
||||
table.add_row(d.date, d.name, d.title[:50], score)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Show ideas for this WG
|
||||
ideas = []
|
||||
for d in drafts:
|
||||
ideas.extend(db.get_ideas_for_draft(d.name))
|
||||
if ideas:
|
||||
console.print(f"\n[bold]Ideas ({len(ideas)}):[/]")
|
||||
for idea in ideas[:15]:
|
||||
console.print(f" - [cyan]{idea['title']}[/]: {idea['description'][:80]}")
|
||||
if len(ideas) > 15:
|
||||
console.print(f" [dim]... and {len(ideas) - 15} more[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@wg.command("overlaps")
|
||||
@click.option("--min-wgs", default=2, help="Minimum WGs sharing a category to show")
|
||||
def wg_overlaps(min_wgs: int):
|
||||
"""Find categories and ideas that span multiple WGs — alignment opportunities."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
# Category spread across WGs
|
||||
spread = db.category_wg_spread()
|
||||
multi = [s for s in spread if s["wg_count"] >= min_wgs
|
||||
and not all(w["wg"] == "none" for w in s["wgs"])]
|
||||
|
||||
if multi:
|
||||
console.print(f"\n[bold]Categories spanning {min_wgs}+ WGs[/]\n")
|
||||
for s in multi:
|
||||
wg_strs = [f"{w['wg']}({w['count']})" for w in s["wgs"] if w["wg"] != "none"]
|
||||
if wg_strs:
|
||||
console.print(f" [cyan]{s['category']}[/] — {s['total_drafts']} drafts across {s['wg_count']} WGs")
|
||||
console.print(f" WGs: {', '.join(wg_strs)}")
|
||||
|
||||
# Idea overlap across WGs
|
||||
idea_overlaps = db.wg_idea_overlap()
|
||||
cross_wg = [o for o in idea_overlaps
|
||||
if not all(w == "none" for w in o["wg_names"])]
|
||||
|
||||
if cross_wg:
|
||||
console.print(f"\n[bold]Ideas appearing in {min_wgs}+ WGs ({len(cross_wg)} found)[/]\n")
|
||||
for o in cross_wg[:20]:
|
||||
real_wgs = [w for w in o["wg_names"] if w != "none"]
|
||||
console.print(f" [cyan]{o['idea_title']}[/] — WGs: {', '.join(real_wgs)}")
|
||||
for entry in o["wgs"]:
|
||||
if entry["wg"] != "none":
|
||||
console.print(f" - [{entry['wg']}] {entry['draft_name']}")
|
||||
if len(cross_wg) > 20:
|
||||
console.print(f"\n [dim]... and {len(cross_wg) - 20} more[/]")
|
||||
|
||||
if not multi and not cross_wg:
|
||||
console.print("[yellow]No cross-WG overlaps found.[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@wg.command("alignment")
|
||||
def wg_alignment():
|
||||
"""Identify where individual drafts should be consolidated into WG standards."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
# Compare individual vs WG category distribution
|
||||
dist = db.individual_vs_wg_categories()
|
||||
indiv = dist["individual"]
|
||||
adopted = dist["wg_adopted"]
|
||||
|
||||
console.print("\n[bold]Individual vs WG-Adopted Category Distribution[/]\n")
|
||||
|
||||
table = Table()
|
||||
table.add_column("Category", width=25)
|
||||
table.add_column("Individual", justify="right", width=10)
|
||||
table.add_column("WG-Adopted", justify="right", width=10)
|
||||
table.add_column("Signal", width=40)
|
||||
|
||||
all_cats = sorted(set(list(indiv.keys()) + list(adopted.keys())))
|
||||
for cat in all_cats:
|
||||
i_count = indiv.get(cat, 0)
|
||||
w_count = adopted.get(cat, 0)
|
||||
signal = ""
|
||||
if i_count >= 5 and w_count == 0:
|
||||
signal = "[yellow]High individual activity, no WG — needs WG?[/]"
|
||||
elif i_count >= 3 and w_count >= 1:
|
||||
signal = "[green]WG exists, individual drafts could target it[/]"
|
||||
elif w_count > i_count and i_count > 0:
|
||||
signal = "[dim]WG leading, some individual work[/]"
|
||||
table.add_row(cat, str(i_count), str(w_count), signal)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Find overlap clusters within individual submissions that might warrant a WG
|
||||
console.print("\n[bold]Consolidation Candidates[/]")
|
||||
console.print("[dim]Categories with many individual drafts but no WG adoption — "
|
||||
"potential for new WG or BoF[/]\n")
|
||||
|
||||
candidates = []
|
||||
for cat in all_cats:
|
||||
i_count = indiv.get(cat, 0)
|
||||
w_count = adopted.get(cat, 0)
|
||||
if i_count >= 5 and w_count == 0:
|
||||
candidates.append((cat, i_count))
|
||||
|
||||
if candidates:
|
||||
for cat, count in sorted(candidates, key=lambda x: x[1], reverse=True):
|
||||
console.print(f" [yellow]{cat}[/]: {count} individual drafts, no WG home")
|
||||
# Show sample drafts
|
||||
rows = db.conn.execute("""
|
||||
SELECT d.name, d.title FROM drafts d
|
||||
JOIN ratings r ON d.name = r.draft_name
|
||||
WHERE (d."group" = 'none' OR d."group" IS NULL)
|
||||
AND r.categories LIKE ?
|
||||
ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20
|
||||
+ r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC
|
||||
LIMIT 5
|
||||
""", (f"%{cat}%",)).fetchall()
|
||||
for row in rows:
|
||||
console.print(f" - {row['name']}: {row['title'][:60]}")
|
||||
console.print()
|
||||
else:
|
||||
console.print(" [green]All active categories have WG representation.[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@wg.command("targets")
|
||||
def wg_targets():
|
||||
"""Suggest best WGs for submitting new work in each category."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
spread = db.category_wg_spread()
|
||||
summaries = {s["wg"]: s for s in db.wg_summary()}
|
||||
|
||||
console.print("\n[bold]Recommended Submission Targets by Category[/]\n")
|
||||
|
||||
for s in spread:
|
||||
cat = s["category"]
|
||||
# Filter to real WGs (not 'none')
|
||||
real_wgs = [w for w in s["wgs"] if w["wg"] != "none"]
|
||||
if not real_wgs:
|
||||
console.print(f" [cyan]{cat}[/]: [yellow]No active WG — individual submission[/]")
|
||||
continue
|
||||
|
||||
best = real_wgs[0]
|
||||
wg_info = summaries.get(best["wg"], {})
|
||||
console.print(
|
||||
f" [cyan]{cat}[/]: [bold green]{best['wg']}[/] "
|
||||
f"({best['count']} drafts"
|
||||
f"{', avg relevance ' + str(wg_info.get('avg_relevance', '?')) if wg_info else ''})"
|
||||
)
|
||||
if len(real_wgs) > 1:
|
||||
alts = ", ".join(f"{w['wg']}({w['count']})" for w in real_wgs[1:3])
|
||||
console.print(f" Also: {alts}")
|
||||
|
||||
console.print()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── visualize ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -808,14 +1117,21 @@ def network(top: int):
|
||||
# ── ideas ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("name", required=False)
|
||||
@main.group(invoke_without_command=True)
|
||||
@click.option("--name", default=None, help="Extract ideas from a specific draft")
|
||||
@click.option("--all", "extract_all", is_flag=True, help="Extract ideas from all drafts")
|
||||
@click.option("--limit", "-n", default=50, help="Max drafts to extract (with --all)")
|
||||
@click.option("--batch", "-b", default=5, help="Drafts per API call (default 5, set 1 for individual)")
|
||||
@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)")
|
||||
def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool):
|
||||
"""Extract technical ideas from drafts using Claude."""
|
||||
@click.option("--reextract", is_flag=True, help="Clear existing ideas and re-extract with current prompt")
|
||||
@click.option("--draft", "reextract_draft", default=None, help="Specific draft to re-extract (with --reextract)")
|
||||
@click.pass_context
|
||||
def ideas(ctx, name: str | None, extract_all: bool, limit: int, batch: int, cheap: bool,
|
||||
reextract: bool, reextract_draft: str | None):
|
||||
"""Extract, score, and filter technical ideas from drafts."""
|
||||
if ctx.invoked_subcommand is not None:
|
||||
return
|
||||
|
||||
from .analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
@@ -823,7 +1139,24 @@ def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bo
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
if extract_all:
|
||||
if reextract:
|
||||
# Clear existing ideas, then re-extract
|
||||
deleted = db.delete_ideas(draft_name=reextract_draft)
|
||||
if reextract_draft:
|
||||
console.print(f"Cleared [bold]{deleted}[/] ideas for {reextract_draft}")
|
||||
idea_list = analyzer.extract_ideas(reextract_draft, use_cache=True)
|
||||
if idea_list:
|
||||
console.print(f"Re-extracted [bold green]{len(idea_list)}[/] ideas:")
|
||||
for idea in idea_list:
|
||||
console.print(f" [{idea.get('type', '?')}] [bold]{idea['title']}[/]")
|
||||
console.print(f" {idea['description']}\n")
|
||||
else:
|
||||
console.print("[red]Re-extraction failed or no ideas found[/]")
|
||||
else:
|
||||
console.print(f"Cleared [bold]{deleted}[/] ideas from all drafts")
|
||||
count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap)
|
||||
console.print(f"Re-extracted ideas from [bold green]{count}[/] drafts")
|
||||
elif extract_all:
|
||||
count = analyzer.extract_all_ideas(limit=limit, batch_size=batch, cheap=cheap)
|
||||
console.print(f"Extracted ideas from [bold green]{count}[/] drafts")
|
||||
elif name:
|
||||
@@ -836,7 +1169,166 @@ def ideas(name: str | None, extract_all: bool, limit: int, batch: int, cheap: bo
|
||||
else:
|
||||
console.print("[red]Extraction failed or no ideas found[/]")
|
||||
else:
|
||||
console.print("Provide a draft name or use --all")
|
||||
console.print("Use --name DRAFT, --all, or a subcommand: ideas score / ideas filter")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@ideas.command("score")
|
||||
@click.option("--cheap/--quality", default=True, help="Use Haiku (cheap) vs Sonnet (quality)")
|
||||
@click.option("--batch", "-b", default=20, help="Ideas per API call (default 20)")
|
||||
def ideas_score(cheap: bool, batch: int):
|
||||
"""Score ideas for novelty (1=generic, 5=genuinely novel)."""
|
||||
from .analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
stats = analyzer.score_idea_novelty(batch_size=batch, cheap=cheap)
|
||||
|
||||
if stats["scored_count"] == 0:
|
||||
return
|
||||
|
||||
# Show distribution table
|
||||
dist = db.idea_score_distribution()
|
||||
table = Table(title="Novelty Score Distribution")
|
||||
table.add_column("Score", style="bold", justify="center")
|
||||
table.add_column("Label", style="dim")
|
||||
table.add_column("Count", justify="right")
|
||||
table.add_column("Bar", min_width=30)
|
||||
|
||||
labels = {
|
||||
1: "Generic building block",
|
||||
2: "Obvious extension",
|
||||
3: "Useful but expected",
|
||||
4: "Interesting contribution",
|
||||
5: "Genuinely novel",
|
||||
}
|
||||
max_count = max(dist.values()) if dist else 1
|
||||
for score in range(1, 6):
|
||||
count = dist.get(score, 0)
|
||||
bar_len = int(30 * count / max_count) if max_count > 0 else 0
|
||||
table.add_row(
|
||||
str(score), labels[score], str(count),
|
||||
"[green]" + "#" * bar_len + "[/]"
|
||||
)
|
||||
|
||||
total = sum(dist.values())
|
||||
unscored = db.idea_count() - total
|
||||
console.print(table)
|
||||
console.print(f"\nTotal scored: [bold]{total}[/] | Unscored: {unscored} | Avg: [bold]{stats['avg_score']:.1f}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@ideas.command("filter")
|
||||
@click.option("--min-score", "-m", default=2, help="Remove ideas below this score (default 2)")
|
||||
@click.option("--dry-run/--execute", default=True, help="Preview (default) or actually delete")
|
||||
def ideas_filter(min_score: int, dry_run: bool):
|
||||
"""Filter out low-novelty ideas by score threshold."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
candidates = db.ideas_below_score(min_score)
|
||||
if not candidates:
|
||||
console.print(f"No ideas with novelty_score < {min_score}.")
|
||||
return
|
||||
|
||||
# Show what would be removed
|
||||
table = Table(
|
||||
title=f"Ideas with novelty_score < {min_score} "
|
||||
f"({'DRY RUN' if dry_run else 'WILL DELETE'})"
|
||||
)
|
||||
table.add_column("Score", style="bold", justify="center")
|
||||
table.add_column("Idea", style="cyan", max_width=40)
|
||||
table.add_column("Draft", max_width=50)
|
||||
table.add_column("Description", max_width=60)
|
||||
|
||||
for idea in candidates[:50]: # Show first 50
|
||||
table.add_row(
|
||||
str(idea["novelty_score"]),
|
||||
idea["title"],
|
||||
idea["draft_title"],
|
||||
idea["description"][:60] + ("..." if len(idea["description"]) > 60 else ""),
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
if len(candidates) > 50:
|
||||
console.print(f" ... and {len(candidates) - 50} more")
|
||||
|
||||
console.print(f"\nTotal to remove: [bold red]{len(candidates)}[/] / {db.idea_count()} ideas")
|
||||
|
||||
if not dry_run:
|
||||
deleted = db.delete_low_score_ideas(min_score)
|
||||
console.print(f"[bold red]Deleted {deleted} low-novelty ideas.[/]")
|
||||
console.print(f"Remaining ideas: [bold green]{db.idea_count()}[/]")
|
||||
else:
|
||||
console.print("[dim]Use --execute to actually delete.[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── dedup-ideas ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.command("dedup-ideas")
|
||||
@click.option("--threshold", "-t", default=0.85, type=float,
|
||||
help="Cosine similarity threshold for merging (default 0.85)")
|
||||
@click.option("--dry-run/--execute", default=True,
|
||||
help="Preview merges (default) vs actually delete duplicates")
|
||||
@click.option("--draft", "draft_name", default=None,
|
||||
help="Limit to a single draft name")
|
||||
def dedup_ideas(threshold: float, dry_run: bool, draft_name: str | None):
|
||||
"""Deduplicate similar ideas within each draft using embedding similarity."""
|
||||
from .analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
mode = "[bold yellow]DRY RUN[/]" if dry_run else "[bold red]EXECUTE[/]"
|
||||
console.print(f"\n{mode} — Deduplicating ideas (threshold={threshold})")
|
||||
if draft_name:
|
||||
console.print(f"Limiting to draft: [bold]{draft_name}[/]")
|
||||
console.print()
|
||||
|
||||
result = analyzer.dedup_ideas(
|
||||
threshold=threshold, dry_run=dry_run, draft_name=draft_name
|
||||
)
|
||||
|
||||
if result["examples"]:
|
||||
table = Table(title="Merge Candidates" if dry_run else "Merged Ideas")
|
||||
table.add_column("Draft", style="dim", max_width=40)
|
||||
table.add_column("Keep", style="green")
|
||||
table.add_column("Drop", style="red")
|
||||
table.add_column("Similarity", justify="right")
|
||||
|
||||
for ex in result["examples"]:
|
||||
table.add_row(
|
||||
ex["draft"].split("/")[-1][:40],
|
||||
ex["keep"],
|
||||
ex["drop"],
|
||||
f"{ex['similarity']:.3f}",
|
||||
)
|
||||
console.print(table)
|
||||
console.print()
|
||||
|
||||
action = "Would remove" if dry_run else "Removed"
|
||||
console.print(
|
||||
f"Ideas before: [bold]{result['total_before']}[/] | "
|
||||
f"{action}: [bold]{result['merged_count']}[/] | "
|
||||
f"After: [bold]{result['total_after']}[/]"
|
||||
)
|
||||
|
||||
if dry_run and result["merged_count"] > 0:
|
||||
console.print(
|
||||
"\n[dim]Run with --execute to apply these merges.[/]"
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -2024,3 +2516,163 @@ def observatory_diff(since: str | None):
|
||||
console.print(f" [{d.get('source', '?')}] {d.get('name', '?')}: {d.get('title', '')[:60]}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── monitor ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@main.group()
|
||||
def monitor():
|
||||
"""Monitor IETF Datatracker for new AI/agent drafts."""
|
||||
pass
|
||||
|
||||
|
||||
@monitor.command("run")
|
||||
@click.option("--analyze/--no-analyze", default=True, help="Analyze new drafts")
|
||||
@click.option("--embed/--no-embed", default=True, help="Generate embeddings")
|
||||
@click.option("--ideas/--no-ideas", default=True, help="Extract ideas")
|
||||
def monitor_run(analyze, embed, ideas):
|
||||
"""Run one monitoring cycle: fetch -> analyze -> embed -> ideas."""
|
||||
from .analyzer import Analyzer
|
||||
from .embeddings import Embedder
|
||||
from .fetcher import Fetcher
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
run_id = db.start_monitor_run()
|
||||
stats = {
|
||||
"new_drafts_found": 0,
|
||||
"drafts_analyzed": 0,
|
||||
"drafts_embedded": 0,
|
||||
"ideas_extracted": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
console.print("[bold]Monitor run started[/]")
|
||||
|
||||
# Determine since date from last successful run
|
||||
last_run = db.get_last_successful_run()
|
||||
since = last_run["completed_at"][:10] if last_run and last_run.get("completed_at") else cfg.fetch_since
|
||||
console.print(f" Fetching drafts since: [cyan]{since}[/]")
|
||||
|
||||
# Fetch new drafts
|
||||
fetcher = Fetcher(cfg)
|
||||
try:
|
||||
existing_count = db.count_drafts()
|
||||
drafts = fetcher.search_drafts(keywords=list(cfg.search_keywords), since=since)
|
||||
for draft in drafts:
|
||||
db.upsert_draft(draft)
|
||||
|
||||
# Download text for any missing
|
||||
missing_text = db.drafts_without_text()
|
||||
if missing_text:
|
||||
console.print(f" Downloading text for [bold]{len(missing_text)}[/] drafts...")
|
||||
texts = fetcher.download_texts(missing_text)
|
||||
for name, text in texts.items():
|
||||
draft = db.get_draft(name)
|
||||
if draft:
|
||||
draft.full_text = text
|
||||
db.upsert_draft(draft)
|
||||
finally:
|
||||
fetcher.close()
|
||||
|
||||
new_count = db.count_drafts() - existing_count
|
||||
stats["new_drafts_found"] = max(new_count, 0)
|
||||
console.print(f" New drafts found: [bold green]{stats['new_drafts_found']}[/]")
|
||||
|
||||
# Analyze unrated drafts
|
||||
if analyze:
|
||||
unrated = db.unrated_drafts(limit=200)
|
||||
if unrated:
|
||||
console.print(f" Analyzing [bold]{len(unrated)}[/] unrated drafts...")
|
||||
analyzer = Analyzer(cfg, db)
|
||||
count = analyzer.rate_all_unrated(limit=200)
|
||||
stats["drafts_analyzed"] = count
|
||||
console.print(f" Analyzed: [bold green]{count}[/]")
|
||||
|
||||
# Embed missing drafts
|
||||
if embed:
|
||||
missing_embed = db.drafts_without_embeddings(limit=500)
|
||||
if missing_embed:
|
||||
console.print(f" Embedding [bold]{len(missing_embed)}[/] drafts...")
|
||||
embedder = Embedder(cfg, db)
|
||||
count = embedder.embed_all_missing()
|
||||
stats["drafts_embedded"] = count
|
||||
console.print(f" Embedded: [bold green]{count}[/]")
|
||||
|
||||
# Extract ideas
|
||||
if ideas:
|
||||
missing_ideas = db.drafts_without_ideas(limit=500)
|
||||
if missing_ideas:
|
||||
console.print(f" Extracting ideas from [bold]{len(missing_ideas)}[/] drafts...")
|
||||
analyzer = Analyzer(cfg, db)
|
||||
count = analyzer.extract_all_ideas(limit=500, batch_size=5, cheap=True)
|
||||
stats["ideas_extracted"] = count
|
||||
console.print(f" Ideas extracted from: [bold green]{count}[/] drafts")
|
||||
|
||||
db.complete_monitor_run(run_id, stats)
|
||||
console.print("\n[bold green]Monitor run completed successfully[/]")
|
||||
|
||||
except Exception as e:
|
||||
db.fail_monitor_run(run_id, str(e))
|
||||
console.print(f"\n[bold red]Monitor run failed:[/] {e}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@monitor.command("status")
|
||||
def monitor_status():
|
||||
"""Show monitoring status and recent runs."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
runs = db.get_monitor_runs(limit=20)
|
||||
last = db.get_last_successful_run()
|
||||
|
||||
# Unprocessed counts
|
||||
unrated = len(db.unrated_drafts(limit=9999))
|
||||
unembedded = len(db.drafts_without_embeddings(limit=9999))
|
||||
no_ideas = len(db.drafts_without_ideas(limit=9999))
|
||||
|
||||
console.print("\n[bold]Monitor Status[/]\n")
|
||||
|
||||
if last:
|
||||
console.print(f" Last successful run: [green]{last['completed_at']}[/]")
|
||||
console.print(f" Duration: {last['duration_seconds']:.1f}s")
|
||||
console.print(f" New drafts: {last['new_drafts_found']}")
|
||||
else:
|
||||
console.print(" [yellow]No successful runs yet[/]")
|
||||
|
||||
console.print(f"\n[bold]Unprocessed[/]")
|
||||
console.print(f" Unrated: [{'yellow' if unrated > 0 else 'green'}]{unrated}[/]")
|
||||
console.print(f" Unembedded: [{'yellow' if unembedded > 0 else 'green'}]{unembedded}[/]")
|
||||
console.print(f" No ideas: [{'yellow' if no_ideas > 0 else 'green'}]{no_ideas}[/]")
|
||||
|
||||
if runs:
|
||||
console.print(f"\n[bold]Recent Runs[/] ({len(runs)} total)\n")
|
||||
table = Table()
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Started", width=20)
|
||||
table.add_column("Duration", justify="right", width=8)
|
||||
table.add_column("Status", width=10)
|
||||
table.add_column("New", justify="right", width=5)
|
||||
table.add_column("Analyzed", justify="right", width=8)
|
||||
table.add_column("Embedded", justify="right", width=8)
|
||||
table.add_column("Ideas", justify="right", width=6)
|
||||
for r in runs:
|
||||
status_style = {"completed": "green", "failed": "red", "running": "yellow"}.get(r["status"], "dim")
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
r["started_at"][:19] if r["started_at"] else "",
|
||||
f"{r['duration_seconds']:.1f}s" if r["duration_seconds"] else "-",
|
||||
f"[{status_style}]{r['status']}[/{status_style}]",
|
||||
str(r["new_drafts_found"]),
|
||||
str(r["drafts_analyzed"]),
|
||||
str(r["drafts_embedded"]),
|
||||
str(r["ideas_extracted"]),
|
||||
)
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user