Split cli.py (3,438 LOC) into modular command packages
Move 98 CLI commands from monolithic cli.py into organized modules: - commands/common.py: shared utilities (console, pass_cfg_db, _get_config) - commands/fetch.py: fetch, search, list, show, annotate, classify, authors, network - commands/analysis.py: analyze, ask, compare, embed, ideas, gaps, refs, trends, etc. - commands/reports.py: report group, viz group, wg group, export - commands/admin.py: config, pipeline, observatory, monitor, auto-heal - commands/proposals.py: draft-gen, intake cli.py is now a slim 30-line entry point that registers all modules. All command names, options, and behavior preserved exactly. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
12
src/ietf_analyzer/commands/__init__.py
Normal file
12
src/ietf_analyzer/commands/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
"""CLI command modules — registers all commands with the main Click group."""
|
||||
|
||||
from . import fetch, analysis, reports, admin, proposals
|
||||
|
||||
|
||||
def register_commands(main):
|
||||
"""Register all command modules with the main CLI group."""
|
||||
fetch.register(main)
|
||||
analysis.register(main)
|
||||
reports.register(main)
|
||||
admin.register(main)
|
||||
proposals.register(main)
|
||||
913
src/ietf_analyzer/commands/admin.py
Normal file
913
src/ietf_analyzer/commands/admin.py
Normal file
@@ -0,0 +1,913 @@
|
||||
"""Config, pipeline, observatory, monitor, and auto-heal commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from rich.table import Table
|
||||
|
||||
from .common import console, pass_cfg_db, _get_config
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
|
||||
def register(main):
|
||||
"""Register all admin commands with the main CLI group."""
|
||||
main.add_command(config_cmd)
|
||||
main.add_command(pipeline)
|
||||
main.add_command(observatory)
|
||||
main.add_command(monitor)
|
||||
main.add_command(auto)
|
||||
|
||||
|
||||
# ── config ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command("config")
|
||||
@click.option("--set", "set_key", nargs=2, help="Set a config key (e.g. --set claude_model claude-opus-4-20250514)")
|
||||
@click.option("--show", is_flag=True, help="Show effective config with env var sources noted")
|
||||
def config_cmd(set_key: tuple[str, str] | None, show: bool):
|
||||
"""Show or modify configuration."""
|
||||
from dataclasses import asdict
|
||||
cfg = _get_config()
|
||||
|
||||
if set_key:
|
||||
key, value = set_key
|
||||
if hasattr(cfg, key):
|
||||
# Coerce types
|
||||
current = getattr(cfg, key)
|
||||
if isinstance(current, float):
|
||||
value = float(value)
|
||||
elif isinstance(current, int):
|
||||
value = int(value)
|
||||
elif isinstance(current, list):
|
||||
import json
|
||||
value = json.loads(value)
|
||||
setattr(cfg, key, value)
|
||||
cfg.save()
|
||||
console.print(f"Set [bold]{key}[/] = {value}")
|
||||
else:
|
||||
console.print(f"[red]Unknown config key: {key}[/]")
|
||||
else:
|
||||
from dataclasses import asdict
|
||||
env_sources = cfg.env_sources()
|
||||
for key, val in asdict(cfg).items():
|
||||
source_note = ""
|
||||
if key in env_sources:
|
||||
source_note = f" [yellow](from ${env_sources[key]})[/]"
|
||||
console.print(f" [bold]{key}:[/] {val}{source_note}")
|
||||
if env_sources:
|
||||
console.print(f"\n [dim]({len(env_sources)} value(s) overridden by environment variables)[/]")
|
||||
# Note about ANTHROPIC_API_KEY
|
||||
import os
|
||||
if os.environ.get("ANTHROPIC_API_KEY"):
|
||||
console.print(" [dim]ANTHROPIC_API_KEY is set in environment[/]")
|
||||
else:
|
||||
console.print(" [dim]ANTHROPIC_API_KEY is NOT set in environment[/]")
|
||||
|
||||
|
||||
# ── pipeline ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def pipeline():
|
||||
"""Gap-to-Draft generation pipeline."""
|
||||
pass
|
||||
|
||||
|
||||
@pipeline.command("context")
|
||||
@click.argument("gap_topic")
|
||||
def pipeline_context(gap_topic: str):
|
||||
"""Preview assembled context for a gap topic (dry run)."""
|
||||
from ..pipeline import ContextBuilder
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
builder = ContextBuilder(cfg, db)
|
||||
ctx = builder.build_context(gap_topic)
|
||||
console.print(f"\n[bold]Context for gap: {gap_topic}[/]\n")
|
||||
|
||||
gap = ctx.get("gap")
|
||||
if gap:
|
||||
console.print(f"[cyan]Gap:[/] {gap.get('topic', '?')}")
|
||||
console.print(f" {gap.get('description', '')[:200]}")
|
||||
console.print(f" Severity: {gap.get('severity', '?')}")
|
||||
|
||||
ideas = ctx.get("ideas", [])
|
||||
console.print(f"\n[cyan]Convergent ideas:[/] {len(ideas)}")
|
||||
for idea in ideas[:10]:
|
||||
console.print(f" - {idea.get('title', '?')}: {idea.get('description', '')[:80]}")
|
||||
|
||||
rfcs = ctx.get("rfc_foundations", [])
|
||||
console.print(f"\n[cyan]RFC foundations:[/] {len(rfcs)}")
|
||||
for ref_id, count in rfcs[:10]:
|
||||
console.print(f" - RFC {ref_id} (cited by {count} drafts)")
|
||||
|
||||
similar = ctx.get("similar_drafts", [])
|
||||
console.print(f"\n[cyan]Similar existing drafts:[/] {len(similar)}")
|
||||
for name, score in similar[:8]:
|
||||
console.print(f" - {name} (similarity: {score:.3f})")
|
||||
|
||||
top_rated = ctx.get("top_rated", [])
|
||||
console.print(f"\n[cyan]Top-rated in category:[/] {len(top_rated)}")
|
||||
|
||||
wg_ctx = ctx.get("wg_context", [])
|
||||
adopted = [w for w in wg_ctx if w.get("wg_adopted")]
|
||||
console.print(f"\n[cyan]WG context:[/] {len(adopted)} WG-adopted drafts")
|
||||
|
||||
vision = ctx.get("ecosystem_vision", "")
|
||||
if vision:
|
||||
console.print(f"\n[cyan]Ecosystem vision:[/] {len(vision)} chars loaded")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@pipeline.command("generate")
|
||||
@click.argument("gap_topic")
|
||||
@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)")
|
||||
@click.option("--dry-run", is_flag=True, help="Show outline only, don't generate sections")
|
||||
@click.option("--family", "family_name", default="", help="Family name for multi-draft generation")
|
||||
def pipeline_generate(gap_topic: str, cheap: bool, dry_run: bool, family_name: str):
|
||||
"""Generate a single draft from a gap topic."""
|
||||
from ..analyzer import Analyzer
|
||||
from ..pipeline import PipelineGenerator, ContextBuilder
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
builder = ContextBuilder(cfg, db)
|
||||
generator = PipelineGenerator(cfg, db, analyzer)
|
||||
ctx = builder.build_context(gap_topic)
|
||||
|
||||
console.print(f"[bold]Generating draft for gap: {gap_topic}[/]")
|
||||
|
||||
outline = generator.generate_outline(ctx, cheap=cheap)
|
||||
console.print(f" Title: [cyan]{outline.get('title', '?')}[/]")
|
||||
console.print(f" Sections: {len(outline.get('sections', []))}")
|
||||
console.print(f" Target WG: {outline.get('target_wg', '?')}")
|
||||
|
||||
if dry_run:
|
||||
import json
|
||||
console.print("\n[bold]Outline (dry run):[/]")
|
||||
console.print(json.dumps(outline, indent=2))
|
||||
return
|
||||
|
||||
result = generator.generate_full(gap_topic, cheap=cheap)
|
||||
console.print(f"\n[bold green]Draft generated![/]")
|
||||
console.print(f" ID: {result.get('id', '?')}")
|
||||
console.print(f" Draft name: {result.get('draft_name', '?')}")
|
||||
|
||||
# Export text file
|
||||
output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
draft_name = result.get("draft_name", "draft-unknown")
|
||||
out_path = output_dir / f"{draft_name}.txt"
|
||||
if result.get("full_text"):
|
||||
out_path.write_text(result["full_text"])
|
||||
console.print(f" Saved: {out_path}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@pipeline.command("family")
|
||||
@click.option("--name", "family_name", default="agent-ecosystem", help="Family name")
|
||||
@click.option("--cheap/--quality", default=False, help="Use Haiku (cheap) or Sonnet (quality)")
|
||||
def pipeline_family(family_name: str, cheap: bool):
|
||||
"""Generate the full 5-draft ecosystem family."""
|
||||
from ..analyzer import Analyzer
|
||||
from ..pipeline import FamilyCoordinator
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
coordinator = FamilyCoordinator(cfg, db, analyzer)
|
||||
console.print(f"[bold]Generating draft family: {family_name}[/]\n")
|
||||
results = coordinator.generate_family(family_name=family_name, cheap=cheap)
|
||||
console.print(f"\n[bold green]Generated {len(results)} drafts![/]")
|
||||
|
||||
# Export all
|
||||
output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for r in results:
|
||||
draft_name = r.get("draft_name", "draft-unknown")
|
||||
if r.get("full_text"):
|
||||
out_path = output_dir / f"{draft_name}.txt"
|
||||
out_path.write_text(r["full_text"])
|
||||
console.print(f" [green]{r.get('family_role', '?')}[/] -> {out_path}")
|
||||
|
||||
# Family summary
|
||||
summary_path = output_dir / "family-summary.md"
|
||||
lines = [f"# Draft Family: {family_name}\n"]
|
||||
for r in results:
|
||||
lines.append(f"## {r.get('family_role', '?')}: {r.get('title', '?')}")
|
||||
lines.append(f"- Draft: `{r.get('draft_name', '?')}`")
|
||||
lines.append(f"- Gap: {r.get('gap_topic', '?')}")
|
||||
lines.append(f"- Sections: {len(r.get('sections', []))}")
|
||||
lines.append("")
|
||||
summary_path.write_text("\n".join(lines))
|
||||
console.print(f"\n Summary: {summary_path}")
|
||||
|
||||
# Consistency check
|
||||
consistency = coordinator.check_consistency(family_name)
|
||||
if consistency.get("issues"):
|
||||
console.print(f"\n[yellow]Consistency issues:[/]")
|
||||
for issue in consistency["issues"]:
|
||||
console.print(f" - {issue}")
|
||||
else:
|
||||
console.print(f"\n[green]No consistency issues found[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@pipeline.command("quality")
|
||||
@click.argument("draft_id", type=int)
|
||||
def pipeline_quality(draft_id: int):
|
||||
"""Run quality gates on a generated draft."""
|
||||
from ..analyzer import Analyzer
|
||||
from ..pipeline import QualityGates
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
|
||||
try:
|
||||
gates = QualityGates(cfg, db, analyzer)
|
||||
console.print(f"[bold]Running quality gates on draft #{draft_id}[/]\n")
|
||||
results = gates.run_all(draft_id)
|
||||
|
||||
for gate_name, result in results.items():
|
||||
status = "[green]PASS[/]" if result["passed"] else "[red]FAIL[/]"
|
||||
console.print(f" {status} {gate_name}: {result.get('details', '')[:100]}")
|
||||
if "score" in result:
|
||||
console.print(f" Score: {result['score']:.2f}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@pipeline.command("status")
|
||||
def pipeline_status():
|
||||
"""Show pipeline health: processing stages, generated drafts, and API cost."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
# Pipeline health overview
|
||||
total = db.count_drafts()
|
||||
rated_count = len(db.drafts_with_ratings(limit=10000))
|
||||
unrated = len(db.unrated_drafts(limit=10000))
|
||||
unembedded = len(db.drafts_without_embeddings(limit=10000))
|
||||
embedded_count = total - unembedded
|
||||
no_ideas = len(db.drafts_without_ideas(limit=10000))
|
||||
ideas_count = total - no_ideas
|
||||
idea_total = db.idea_count()
|
||||
gap_count = len(db.all_gaps())
|
||||
input_tok, output_tok = db.total_tokens_used()
|
||||
est_cost = (input_tok * 3.0 / 1_000_000) + (output_tok * 15.0 / 1_000_000)
|
||||
|
||||
# Last update
|
||||
snapshots = db.get_snapshots(limit=1)
|
||||
last_update = snapshots[0]["snapshot_at"][:19] if snapshots else "never"
|
||||
|
||||
console.print("\n[bold]Pipeline Status[/]\n")
|
||||
console.print(f" Total documents: [bold]{total}[/]")
|
||||
console.print(f" Last update: {last_update}")
|
||||
console.print()
|
||||
|
||||
# Stage table
|
||||
stage_table = Table(title="Processing Stages")
|
||||
stage_table.add_column("Stage", width=20)
|
||||
stage_table.add_column("Done", justify="right", width=8)
|
||||
stage_table.add_column("Missing", justify="right", width=8)
|
||||
stage_table.add_column("Progress", width=20)
|
||||
|
||||
def bar(done, total_n):
|
||||
pct = int(done / total_n * 100) if total_n > 0 else 0
|
||||
filled = pct // 5
|
||||
return f"[green]{'#' * filled}[/][dim]{'.' * (20 - filled)}[/] {pct}%"
|
||||
|
||||
stage_table.add_row("Rated", str(rated_count), str(unrated), bar(rated_count, total))
|
||||
stage_table.add_row("Embedded", str(embedded_count), str(unembedded), bar(embedded_count, total))
|
||||
stage_table.add_row("Ideas extracted", str(ideas_count), str(no_ideas), bar(ideas_count, total))
|
||||
|
||||
console.print(stage_table)
|
||||
|
||||
console.print(f"\n Total ideas: [bold]{idea_total}[/]")
|
||||
console.print(f" Gaps identified: [bold]{gap_count}[/]")
|
||||
console.print(f"\n API tokens: {input_tok:,} in + {output_tok:,} out")
|
||||
console.print(f" Estimated cost: [bold]${est_cost:.2f}[/]")
|
||||
|
||||
# Generated drafts
|
||||
gen_drafts = db.get_generated_drafts()
|
||||
if gen_drafts:
|
||||
console.print()
|
||||
table = Table(title=f"Generated Drafts ({len(gen_drafts)})")
|
||||
table.add_column("ID", justify="right", width=4)
|
||||
table.add_column("Draft Name", style="cyan")
|
||||
table.add_column("Gap Topic")
|
||||
table.add_column("Family", width=15)
|
||||
table.add_column("Status", width=10)
|
||||
table.add_column("Quality", justify="right", width=7)
|
||||
table.add_column("Created", width=10)
|
||||
|
||||
for d in gen_drafts:
|
||||
table.add_row(
|
||||
str(d["id"]),
|
||||
d["draft_name"],
|
||||
d["gap_topic"][:30],
|
||||
d.get("family_name", ""),
|
||||
d.get("status", "?"),
|
||||
f"{d.get('quality_score', 0):.1f}" if d.get("quality_score") else "-",
|
||||
(d.get("created_at") or "")[:10],
|
||||
)
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@pipeline.command("export")
|
||||
@click.argument("draft_id", type=int)
|
||||
@click.option("--output", "-o", help="Output file path")
|
||||
def pipeline_export(draft_id: int, output: str | None):
|
||||
"""Export a generated draft as I-D text."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
draft = db.get_generated_draft(draft_id)
|
||||
if not draft:
|
||||
console.print(f"[red]Draft #{draft_id} not found[/]")
|
||||
return
|
||||
|
||||
text = draft.get("full_text", "")
|
||||
if not text:
|
||||
console.print(f"[red]Draft #{draft_id} has no generated text[/]")
|
||||
return
|
||||
|
||||
if output:
|
||||
out_path = Path(output)
|
||||
else:
|
||||
output_dir = Path(cfg.data_dir) / "reports" / "generated-drafts"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = output_dir / f"{draft['draft_name']}.txt"
|
||||
|
||||
out_path.write_text(text)
|
||||
console.print(f"Exported: [bold green]{out_path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── observatory ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def observatory():
|
||||
"""Living Standards Observatory — monitor AI standards across bodies."""
|
||||
pass
|
||||
|
||||
|
||||
@observatory.command("update")
|
||||
@click.option("--source", "-s", default=None, help="Comma-separated sources (e.g. ietf,w3c)")
|
||||
@click.option("--full/--delta", default=False, help="Full refresh or delta only")
|
||||
@click.option("--dry-run", is_flag=True, default=False, help="Show what would happen without making changes")
|
||||
def observatory_update(source: str | None, full: bool, dry_run: bool):
|
||||
"""Fetch, analyze, and update the observatory."""
|
||||
from ..observatory import Observatory
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
if dry_run:
|
||||
obs = Observatory(cfg, db)
|
||||
else:
|
||||
from ..analyzer import Analyzer
|
||||
analyzer = Analyzer(cfg, db)
|
||||
obs = Observatory(cfg, db, analyzer)
|
||||
sources = source.split(",") if source else None
|
||||
mode = "full" if full else "delta"
|
||||
console.print(f"[bold]Observatory update[/] ({mode}{' [DRY RUN]' if dry_run else ''})")
|
||||
result = obs.update(sources=sources, full=full, dry_run=dry_run)
|
||||
|
||||
if not dry_run:
|
||||
console.print(f"\n[bold green]Update complete![/]")
|
||||
console.print(f" New docs: {result.get('new_docs', 0)}")
|
||||
console.print(f" Analyzed: {result.get('analyzed', 0)}")
|
||||
console.print(f" Embedded: {result.get('embedded', 0)}")
|
||||
console.print(f" Ideas extracted: {result.get('ideas', 0)}")
|
||||
if result.get("gaps_changed"):
|
||||
console.print(f" Gaps re-analyzed: yes")
|
||||
if result.get("errors"):
|
||||
console.print(f"\n [yellow]Errors ({len(result['errors'])}):[/]")
|
||||
for err in result["errors"]:
|
||||
console.print(f" - {err}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@observatory.command("dashboard")
|
||||
def observatory_dashboard():
|
||||
"""Regenerate the static dashboard site."""
|
||||
from ..dashboard import DashboardGenerator
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
gen = DashboardGenerator(cfg, db)
|
||||
path = gen.generate()
|
||||
console.print(f"[bold green]Dashboard generated:[/] {path}")
|
||||
console.print(f" Open: file://{path}/index.html")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@observatory.command("status")
|
||||
def observatory_status():
|
||||
"""Show observatory status — doc counts, sources, last update."""
|
||||
from ..observatory import Observatory
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
obs = Observatory(cfg, db)
|
||||
status = obs.status()
|
||||
|
||||
console.print(f"\n[bold]Observatory Status[/]\n")
|
||||
console.print(f" Total documents: [bold]{status.get('total_docs', 0)}[/]")
|
||||
console.print(f" Unrated: {status.get('unrated', 0)}")
|
||||
console.print(f" Unembedded: {status.get('unembedded', 0)}")
|
||||
console.print(f" Gaps: {status.get('gaps', 0)}")
|
||||
|
||||
sources = status.get("sources", {})
|
||||
if sources:
|
||||
console.print(f"\n [bold]Sources:[/]")
|
||||
for name, count in sources.items():
|
||||
console.print(f" {name}: {count} docs")
|
||||
|
||||
last_update = status.get("last_update")
|
||||
if last_update:
|
||||
console.print(f"\n Last update: {last_update[:10]}")
|
||||
console.print(f" Snapshots: {status.get('snapshots', 0)}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@observatory.command("snapshot")
|
||||
def observatory_snapshot():
|
||||
"""Record current state as a snapshot."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
snap_id = db.create_snapshot()
|
||||
gaps = db.all_gaps()
|
||||
if gaps:
|
||||
db.record_gap_history(snap_id, gaps)
|
||||
console.print(f"[bold green]Snapshot #{snap_id} created[/] ({db.count_drafts()} docs, {len(gaps)} gaps)")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@observatory.command("diff")
|
||||
@click.option("--since", help="Show changes since this date (YYYY-MM-DD)")
|
||||
def observatory_diff(since: str | None):
|
||||
"""Show what changed since a date."""
|
||||
from ..observatory import Observatory
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
obs = Observatory(cfg, db)
|
||||
result = obs.diff(since=since)
|
||||
|
||||
console.print(f"\n[bold]Observatory Diff[/]")
|
||||
if since:
|
||||
console.print(f" Since: {result.get('since', since)}")
|
||||
|
||||
new_docs = result.get("new_docs", [])
|
||||
console.print(f" New documents: {result.get('new_doc_count', len(new_docs))}")
|
||||
gap_changes = result.get("gap_changes", [])
|
||||
console.print(f" Gap history entries: {len(gap_changes)}")
|
||||
|
||||
if new_docs:
|
||||
console.print(f"\n [bold]New documents:[/]")
|
||||
for doc in new_docs[:20]:
|
||||
d = dict(doc) if not isinstance(doc, dict) else doc
|
||||
console.print(f" [{d.get('source', '?')}] {d.get('name', '?')}: {d.get('title', '')[:60]}")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── monitor ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def monitor():
|
||||
"""Monitor IETF Datatracker for new AI/agent drafts."""
|
||||
pass
|
||||
|
||||
|
||||
@monitor.command("run")
|
||||
@click.option("--analyze/--no-analyze", default=True, help="Analyze new drafts")
|
||||
@click.option("--embed/--no-embed", default=True, help="Generate embeddings")
|
||||
@click.option("--ideas/--no-ideas", default=True, help="Extract ideas")
|
||||
def monitor_run(analyze, embed, ideas):
|
||||
"""Run one monitoring cycle: fetch -> analyze -> embed -> ideas."""
|
||||
from ..analyzer import Analyzer
|
||||
from ..embeddings import Embedder
|
||||
from ..fetcher import Fetcher
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
run_id = db.start_monitor_run()
|
||||
stats = {
|
||||
"new_drafts_found": 0,
|
||||
"drafts_analyzed": 0,
|
||||
"drafts_embedded": 0,
|
||||
"ideas_extracted": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
console.print("[bold]Monitor run started[/]")
|
||||
|
||||
# Determine since date from last successful run
|
||||
last_run = db.get_last_successful_run()
|
||||
since = last_run["completed_at"][:10] if last_run and last_run.get("completed_at") else cfg.fetch_since
|
||||
console.print(f" Fetching drafts since: [cyan]{since}[/]")
|
||||
|
||||
# Fetch new drafts
|
||||
fetcher = Fetcher(cfg)
|
||||
try:
|
||||
existing_count = db.count_drafts()
|
||||
drafts = fetcher.search_drafts(keywords=list(cfg.search_keywords), since=since)
|
||||
for draft in drafts:
|
||||
db.upsert_draft(draft)
|
||||
|
||||
# Download text for any missing
|
||||
missing_text = db.drafts_without_text()
|
||||
if missing_text:
|
||||
console.print(f" Downloading text for [bold]{len(missing_text)}[/] drafts...")
|
||||
texts = fetcher.download_texts(missing_text)
|
||||
for name, text in texts.items():
|
||||
draft = db.get_draft(name)
|
||||
if draft:
|
||||
draft.full_text = text
|
||||
db.upsert_draft(draft)
|
||||
finally:
|
||||
fetcher.close()
|
||||
|
||||
new_count = db.count_drafts() - existing_count
|
||||
stats["new_drafts_found"] = max(new_count, 0)
|
||||
console.print(f" New drafts found: [bold green]{stats['new_drafts_found']}[/]")
|
||||
|
||||
# Analyze unrated drafts
|
||||
if analyze:
|
||||
unrated = db.unrated_drafts(limit=200)
|
||||
if unrated:
|
||||
console.print(f" Analyzing [bold]{len(unrated)}[/] unrated drafts...")
|
||||
analyzer = Analyzer(cfg, db)
|
||||
count = analyzer.rate_all_unrated(limit=200)
|
||||
stats["drafts_analyzed"] = count
|
||||
console.print(f" Analyzed: [bold green]{count}[/]")
|
||||
|
||||
# Embed missing drafts
|
||||
if embed:
|
||||
missing_embed = db.drafts_without_embeddings(limit=500)
|
||||
if missing_embed:
|
||||
console.print(f" Embedding [bold]{len(missing_embed)}[/] drafts...")
|
||||
embedder = Embedder(cfg, db)
|
||||
count = embedder.embed_all_missing()
|
||||
stats["drafts_embedded"] = count
|
||||
console.print(f" Embedded: [bold green]{count}[/]")
|
||||
|
||||
# Extract ideas
|
||||
if ideas:
|
||||
missing_ideas = db.drafts_without_ideas(limit=500)
|
||||
if missing_ideas:
|
||||
console.print(f" Extracting ideas from [bold]{len(missing_ideas)}[/] drafts...")
|
||||
analyzer = Analyzer(cfg, db)
|
||||
count = analyzer.extract_all_ideas(limit=500, batch_size=5, cheap=True)
|
||||
stats["ideas_extracted"] = count
|
||||
console.print(f" Ideas extracted from: [bold green]{count}[/] drafts")
|
||||
|
||||
db.complete_monitor_run(run_id, stats)
|
||||
console.print("\n[bold green]Monitor run completed successfully[/]")
|
||||
|
||||
except Exception as e:
|
||||
db.fail_monitor_run(run_id, str(e))
|
||||
console.print(f"\n[bold red]Monitor run failed:[/] {e}")
|
||||
raise
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@monitor.command("status")
|
||||
def monitor_status():
|
||||
"""Show monitoring status and recent runs."""
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
runs = db.get_monitor_runs(limit=20)
|
||||
last = db.get_last_successful_run()
|
||||
|
||||
# Unprocessed counts
|
||||
unrated = len(db.unrated_drafts(limit=9999))
|
||||
unembedded = len(db.drafts_without_embeddings(limit=9999))
|
||||
no_ideas = len(db.drafts_without_ideas(limit=9999))
|
||||
|
||||
console.print("\n[bold]Monitor Status[/]\n")
|
||||
|
||||
if last:
|
||||
console.print(f" Last successful run: [green]{last['completed_at']}[/]")
|
||||
console.print(f" Duration: {last['duration_seconds']:.1f}s")
|
||||
console.print(f" New drafts: {last['new_drafts_found']}")
|
||||
else:
|
||||
console.print(" [yellow]No successful runs yet[/]")
|
||||
|
||||
console.print(f"\n[bold]Unprocessed[/]")
|
||||
console.print(f" Unrated: [{'yellow' if unrated > 0 else 'green'}]{unrated}[/]")
|
||||
console.print(f" Unembedded: [{'yellow' if unembedded > 0 else 'green'}]{unembedded}[/]")
|
||||
console.print(f" No ideas: [{'yellow' if no_ideas > 0 else 'green'}]{no_ideas}[/]")
|
||||
|
||||
if runs:
|
||||
console.print(f"\n[bold]Recent Runs[/] ({len(runs)} total)\n")
|
||||
table = Table()
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Started", width=20)
|
||||
table.add_column("Duration", justify="right", width=8)
|
||||
table.add_column("Status", width=10)
|
||||
table.add_column("New", justify="right", width=5)
|
||||
table.add_column("Analyzed", justify="right", width=8)
|
||||
table.add_column("Embedded", justify="right", width=8)
|
||||
table.add_column("Ideas", justify="right", width=6)
|
||||
for r in runs:
|
||||
status_style = {"completed": "green", "failed": "red", "running": "yellow"}.get(r["status"], "dim")
|
||||
table.add_row(
|
||||
str(r["id"]),
|
||||
r["started_at"][:19] if r["started_at"] else "",
|
||||
f"{r['duration_seconds']:.1f}s" if r["duration_seconds"] else "-",
|
||||
f"[{status_style}]{r['status']}[/{status_style}]",
|
||||
str(r["new_drafts_found"]),
|
||||
str(r["drafts_analyzed"]),
|
||||
str(r["drafts_embedded"]),
|
||||
str(r["ideas_extracted"]),
|
||||
)
|
||||
console.print(table)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── auto ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command("auto")
|
||||
@click.option("--cost-limit", default=2.0, help="Auto-approve operations under this USD amount (default: $2)")
|
||||
@click.option("--yes", "-y", is_flag=True, help="Skip all confirmation prompts")
|
||||
@click.option("--dry-run", is_flag=True, help="Show what would be done without doing it")
|
||||
@click.option("--source", "-s", default=None, help="Limit to specific source (ietf,w3c,etsi,iso,itu)")
|
||||
def auto(cost_limit: float, yes: bool, dry_run: bool, source: str | None):
|
||||
"""Auto-heal: fetch, analyze, embed, extract ideas, and update gaps.
|
||||
|
||||
Automatically processes all unrated, unembedded, and idea-less drafts
|
||||
across all sources. Uses cheap models (Haiku) for bulk operations.
|
||||
Operations estimated above --cost-limit require confirmation.
|
||||
|
||||
Examples:
|
||||
|
||||
ietf auto # run full pipeline, auto-approve under $2
|
||||
|
||||
ietf auto --dry-run # show plan without executing
|
||||
|
||||
ietf auto -s iso # only process ISO drafts
|
||||
|
||||
ietf auto --cost-limit 5 # raise approval threshold to $5
|
||||
|
||||
ietf auto -y # skip all prompts (for cron)
|
||||
"""
|
||||
cfg = Config.load()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
_auto_heal(cfg, db, cost_limit=cost_limit, yes=yes, dry_run=dry_run, source_filter=source)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _estimate_cost(n_drafts: int, operation: str) -> float:
|
||||
"""Estimate USD cost for an operation. Conservative estimates."""
|
||||
costs = {
|
||||
"analyze_cheap": n_drafts * 0.0005,
|
||||
"analyze_quality": n_drafts * 0.005,
|
||||
"ideas_cheap": n_drafts * 0.001,
|
||||
"ideas_quality": n_drafts * 0.008,
|
||||
"gaps": 0.05,
|
||||
"embed": 0.0,
|
||||
"authors": 0.0,
|
||||
"fetch": 0.0,
|
||||
}
|
||||
return costs.get(operation, 0.0)
|
||||
|
||||
|
||||
def _auto_heal(cfg, db, cost_limit: float, yes: bool, dry_run: bool, source_filter: str | None):
|
||||
"""Run the full auto-heal pipeline."""
|
||||
import time as _time
|
||||
|
||||
from rich.panel import Panel
|
||||
|
||||
steps: list[dict] = []
|
||||
total_cost = 0.0
|
||||
|
||||
# ── Step 1: Fetch new drafts from all sources ──
|
||||
sources = [source_filter] if source_filter else cfg.observatory_sources
|
||||
steps.append({
|
||||
"name": f"Fetch new drafts from {', '.join(sources)}",
|
||||
"sources": sources,
|
||||
"cost": 0.0,
|
||||
"action": "fetch",
|
||||
})
|
||||
|
||||
# ── Step 2: Analyze unrated drafts ──
|
||||
unrated = db.unrated_drafts(limit=10000)
|
||||
if source_filter:
|
||||
unrated = [d for d in unrated if (d.source or "ietf") == source_filter]
|
||||
n_unrated = len(unrated)
|
||||
analyze_cost = _estimate_cost(n_unrated, "analyze_cheap")
|
||||
steps.append({
|
||||
"name": f"Analyze {n_unrated} unrated drafts (Haiku)",
|
||||
"count": n_unrated,
|
||||
"cost": analyze_cost,
|
||||
"action": "analyze",
|
||||
})
|
||||
total_cost += analyze_cost
|
||||
|
||||
# ── Step 3: Fetch authors ──
|
||||
missing_authors = db.conn.execute(
|
||||
"SELECT COUNT(*) FROM drafts WHERE name NOT IN (SELECT DISTINCT draft_name FROM draft_authors)"
|
||||
).fetchone()[0]
|
||||
steps.append({
|
||||
"name": f"Fetch authors for {missing_authors} drafts",
|
||||
"count": missing_authors,
|
||||
"cost": 0.0,
|
||||
"action": "authors",
|
||||
})
|
||||
|
||||
# ── Step 4: Embed missing drafts ──
|
||||
missing_embed = db.drafts_without_embeddings(limit=10000)
|
||||
if source_filter:
|
||||
source_names = {row[0] for row in db.conn.execute(
|
||||
"SELECT name FROM drafts WHERE source = ?", (source_filter,)
|
||||
).fetchall()}
|
||||
missing_embed = [n for n in missing_embed if n in source_names]
|
||||
n_embed = len(missing_embed)
|
||||
steps.append({
|
||||
"name": f"Embed {n_embed} drafts (Ollama, free)",
|
||||
"count": n_embed,
|
||||
"cost": 0.0,
|
||||
"action": "embed",
|
||||
})
|
||||
|
||||
# ── Step 5: Extract ideas ──
|
||||
missing_ideas = db.drafts_without_ideas(limit=10000)
|
||||
if source_filter:
|
||||
if not source_names:
|
||||
source_names = {row[0] for row in db.conn.execute(
|
||||
"SELECT name FROM drafts WHERE source = ?", (source_filter,)
|
||||
).fetchall()}
|
||||
missing_ideas = [n for n in missing_ideas if n in source_names]
|
||||
n_ideas = len(missing_ideas)
|
||||
ideas_cost = _estimate_cost(n_ideas, "ideas_cheap")
|
||||
steps.append({
|
||||
"name": f"Extract ideas from {n_ideas} drafts (Haiku)",
|
||||
"count": n_ideas,
|
||||
"cost": ideas_cost,
|
||||
"action": "ideas",
|
||||
})
|
||||
total_cost += ideas_cost
|
||||
|
||||
# ── Step 6: Refresh gaps ──
|
||||
gap_cost = _estimate_cost(0, "gaps")
|
||||
steps.append({
|
||||
"name": "Refresh gap analysis",
|
||||
"cost": gap_cost,
|
||||
"action": "gaps",
|
||||
})
|
||||
total_cost += gap_cost
|
||||
|
||||
# ── Show plan ──
|
||||
plan_lines = []
|
||||
for s in steps:
|
||||
count = s.get("count", 1)
|
||||
if count == 0:
|
||||
plan_lines.append(f" [dim]SKIP[/] {s['name']}")
|
||||
else:
|
||||
cost_str = f" [yellow]~${s['cost']:.2f}[/]" if s["cost"] > 0 else ""
|
||||
plan_lines.append(f" [green]RUN[/] {s['name']}{cost_str}")
|
||||
|
||||
auto_approved = total_cost <= cost_limit
|
||||
plan_lines.append(f"\n [bold]Estimated total cost: ${total_cost:.2f}[/]")
|
||||
if auto_approved:
|
||||
plan_lines.append(f" [green]Auto-approved (under ${cost_limit:.2f} limit)[/]")
|
||||
else:
|
||||
plan_lines.append(f" [yellow]Requires approval (over ${cost_limit:.2f} limit)[/]")
|
||||
|
||||
console.print(Panel("\n".join(plan_lines), title="Auto-Heal Plan"))
|
||||
|
||||
if dry_run:
|
||||
console.print("[bold yellow]DRY RUN[/] — no changes made.")
|
||||
return
|
||||
|
||||
# ── Approval ──
|
||||
if not auto_approved and not yes:
|
||||
if not click.confirm(f"Estimated cost ${total_cost:.2f} exceeds ${cost_limit:.2f} limit. Proceed?"):
|
||||
console.print("[yellow]Aborted.[/]")
|
||||
return
|
||||
|
||||
# ── Execute ──
|
||||
start = _time.time()
|
||||
|
||||
for step in steps:
|
||||
action = step["action"]
|
||||
count = step.get("count", 0)
|
||||
|
||||
if action == "fetch":
|
||||
console.print(f"\n[bold cyan]>>> Fetching from {step['sources']}...[/]")
|
||||
from ..sources import get_fetcher
|
||||
from ..observatory import _doc_to_draft
|
||||
for src_name in step["sources"]:
|
||||
try:
|
||||
fetcher = get_fetcher(src_name, cfg)
|
||||
before = db.count_drafts()
|
||||
results = fetcher.search(keywords=cfg.search_keywords)
|
||||
for doc in results:
|
||||
db.upsert_draft(_doc_to_draft(doc))
|
||||
after = db.count_drafts()
|
||||
new = after - before
|
||||
console.print(f" [{src_name}] +{new} new drafts")
|
||||
fetcher.close()
|
||||
except Exception as e:
|
||||
console.print(f" [{src_name}] [red]Error: {e}[/]")
|
||||
|
||||
elif action == "analyze" and count > 0:
|
||||
console.print(f"\n[bold cyan]>>> Analyzing {count} drafts (Haiku)...[/]")
|
||||
from ..analyzer import Analyzer
|
||||
analyzer = Analyzer(cfg, db)
|
||||
orig_model = cfg.claude_model
|
||||
cfg.claude_model = cfg.claude_model_cheap
|
||||
try:
|
||||
done = analyzer.rate_all_unrated(limit=count)
|
||||
console.print(f" Analyzed [bold green]{done}[/] drafts")
|
||||
finally:
|
||||
cfg.claude_model = orig_model
|
||||
|
||||
elif action == "authors" and count > 0:
|
||||
console.print(f"\n[bold cyan]>>> Fetching authors for {count} drafts...[/]")
|
||||
from ..authors import AuthorNetwork
|
||||
author_net = AuthorNetwork(cfg, db)
|
||||
done = author_net.fetch_all_authors()
|
||||
console.print(f" Fetched authors for [bold green]{done}[/] drafts")
|
||||
|
||||
elif action == "embed" and count > 0:
|
||||
console.print(f"\n[bold cyan]>>> Embedding {count} drafts (Ollama)...[/]")
|
||||
from ..embeddings import Embedder
|
||||
with Embedder(cfg, db) as embedder:
|
||||
done = embedder.embed_all_missing()
|
||||
console.print(f" Embedded [bold green]{done}[/] drafts")
|
||||
|
||||
elif action == "ideas" and count > 0:
|
||||
console.print(f"\n[bold cyan]>>> Extracting ideas from {count} drafts (Haiku)...[/]")
|
||||
from ..analyzer import Analyzer
|
||||
analyzer = Analyzer(cfg, db)
|
||||
done = analyzer.extract_all_ideas(limit=count, batch_size=5, cheap=True)
|
||||
console.print(f" Extracted ideas from [bold green]{done}[/] drafts")
|
||||
|
||||
elif action == "gaps":
|
||||
console.print(f"\n[bold cyan]>>> Refreshing gap analysis...[/]")
|
||||
from ..analyzer import Analyzer
|
||||
analyzer = Analyzer(cfg, db)
|
||||
gaps = analyzer.gap_analysis()
|
||||
if gaps:
|
||||
console.print(f" Found [bold green]{len(gaps)}[/] gaps")
|
||||
|
||||
elapsed = _time.time() - start
|
||||
console.print(f"\n[bold green]Auto-heal complete![/] ({elapsed:.1f}s, ~${total_cost:.2f})")
|
||||
|
||||
# Show final counts
|
||||
total = db.count_drafts()
|
||||
rated = db.conn.execute("SELECT COUNT(*) FROM ratings").fetchone()[0]
|
||||
embedded = db.conn.execute("SELECT COUNT(*) FROM embeddings").fetchone()[0]
|
||||
idea_count = db.conn.execute("SELECT COUNT(*) FROM ideas").fetchone()[0]
|
||||
gap_count = db.conn.execute("SELECT COUNT(*) FROM gaps").fetchone()[0]
|
||||
console.print(f" Drafts: {total} | Rated: {rated} | Embedded: {embedded} | Ideas: {idea_count} | Gaps: {gap_count}")
|
||||
|
||||
by_source = db.conn.execute(
|
||||
"SELECT source, COUNT(*) FROM drafts GROUP BY source ORDER BY COUNT(*) DESC"
|
||||
).fetchall()
|
||||
source_str = " | ".join(f"{s}: {c}" for s, c in by_source)
|
||||
console.print(f" Sources: {source_str}")
|
||||
1412
src/ietf_analyzer/commands/analysis.py
Normal file
1412
src/ietf_analyzer/commands/analysis.py
Normal file
File diff suppressed because it is too large
Load Diff
32
src/ietf_analyzer/commands/common.py
Normal file
32
src/ietf_analyzer/commands/common.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""Shared utilities for CLI command modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
|
||||
import click
|
||||
from rich.console import Console
|
||||
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
def _get_config() -> Config:
|
||||
cfg = Config.load()
|
||||
return cfg
|
||||
|
||||
|
||||
def pass_cfg_db(f):
|
||||
"""Decorator that extracts cfg and db from Click context and passes them as arguments.
|
||||
|
||||
Usage: place @pass_cfg_db after all @click decorators. The decorated function
|
||||
should accept (cfg, db, ...) instead of manually calling _get_config()/Database().
|
||||
"""
|
||||
|
||||
@click.pass_context
|
||||
@functools.wraps(f)
|
||||
def wrapper(ctx, **kwargs):
|
||||
return f(ctx.obj["cfg"], ctx.obj["db"], **kwargs)
|
||||
return wrapper
|
||||
409
src/ietf_analyzer/commands/fetch.py
Normal file
409
src/ietf_analyzer/commands/fetch.py
Normal file
@@ -0,0 +1,409 @@
|
||||
"""Fetch, search, and browse commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import click
|
||||
from rich.table import Table
|
||||
|
||||
from .common import console, pass_cfg_db, _get_config
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
|
||||
def register(main):
|
||||
"""Register all fetch/browse commands with the main CLI group."""
|
||||
main.add_command(fetch)
|
||||
main.add_command(classify)
|
||||
main.add_command(list_drafts)
|
||||
main.add_command(search)
|
||||
main.add_command(show)
|
||||
main.add_command(annotate)
|
||||
main.add_command(authors)
|
||||
main.add_command(network)
|
||||
|
||||
|
||||
# ── fetch ────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--keywords", "-k", multiple=True, help="Extra keywords to search for")
|
||||
@click.option("--since", "-s", help="Only fetch drafts newer than this date (YYYY-MM-DD)")
|
||||
@click.option("--download-text/--no-download-text", default=True, help="Download full text of drafts")
|
||||
@click.option("--classify/--no-classify", default=True, help="Pre-filter with local Ollama classifier (saves Claude tokens)")
|
||||
@pass_cfg_db
|
||||
def fetch(cfg, db, keywords: tuple[str, ...], since: str | None, download_text: bool, classify: bool):
|
||||
"""Fetch AI/agent drafts from IETF Datatracker."""
|
||||
from ..fetcher import Fetcher
|
||||
|
||||
fetcher = Fetcher(cfg)
|
||||
|
||||
kw_list = list(cfg.search_keywords)
|
||||
if keywords:
|
||||
kw_list.extend(keywords)
|
||||
|
||||
try:
|
||||
drafts = fetcher.search_drafts(keywords=kw_list, since=since)
|
||||
console.print(f"Found [bold]{len(drafts)}[/] drafts from Datatracker")
|
||||
|
||||
# Pre-filter with local classifier to avoid storing irrelevant drafts
|
||||
if classify and drafts:
|
||||
try:
|
||||
from ..classifier import Classifier
|
||||
console.print("\n[bold]Running local AI-relevance classifier (Ollama)...[/]")
|
||||
clf = Classifier(cfg)
|
||||
draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts]
|
||||
relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True)
|
||||
relevant_names = {d["name"] for d in relevant}
|
||||
before = len(drafts)
|
||||
drafts = [d for d in drafts if d.name in relevant_names]
|
||||
console.print(f"\n Kept [green]{len(drafts)}[/green] / {before} drafts after classification")
|
||||
clf.close()
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]Classifier unavailable ({e}), storing all drafts[/yellow]")
|
||||
|
||||
for draft in drafts:
|
||||
db.upsert_draft(draft)
|
||||
console.print(f"Stored [bold green]{len(drafts)}[/] drafts in database")
|
||||
|
||||
if download_text:
|
||||
missing = db.drafts_without_text()
|
||||
if missing:
|
||||
console.print(f"Downloading text for [bold]{len(missing)}[/] drafts...")
|
||||
texts = fetcher.download_texts(missing)
|
||||
for name, text in texts.items():
|
||||
draft = db.get_draft(name)
|
||||
if draft:
|
||||
draft.full_text = text
|
||||
db.upsert_draft(draft)
|
||||
finally:
|
||||
fetcher.close()
|
||||
|
||||
|
||||
# ── classify ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--unrated", is_flag=True, help="Classify only unrated drafts")
|
||||
@click.option("--all", "all_drafts", is_flag=True, help="Classify all drafts (checks accuracy against existing ratings)")
|
||||
@click.option("--remove", is_flag=True, help="Actually remove drafts classified as irrelevant (use with --unrated)")
|
||||
@pass_cfg_db
|
||||
def classify(cfg, db, unrated: bool, all_drafts: bool, remove: bool):
|
||||
"""Pre-classify drafts as AI-relevant using local Ollama model.
|
||||
|
||||
Runs a two-stage filter (embedding similarity + chat model) to identify
|
||||
irrelevant drafts before spending Claude tokens on rating.
|
||||
|
||||
Examples:
|
||||
|
||||
ietf classify --unrated # preview irrelevant unrated drafts
|
||||
|
||||
ietf classify --unrated --remove # remove them from DB
|
||||
|
||||
ietf classify --all # accuracy check against existing ratings
|
||||
"""
|
||||
from ..classifier import Classifier
|
||||
|
||||
clf = Classifier(cfg)
|
||||
|
||||
if all_drafts:
|
||||
# Accuracy check mode: compare against existing FP flags
|
||||
console.print("[bold]Accuracy check: classifying all rated drafts...[/]\n")
|
||||
tp_rows = db.conn.execute(
|
||||
"SELECT d.name, d.title, d.abstract FROM drafts d "
|
||||
"JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 0"
|
||||
).fetchall()
|
||||
fp_rows = db.conn.execute(
|
||||
"SELECT d.name, d.title, d.abstract FROM drafts d "
|
||||
"JOIN ratings r ON d.name = r.draft_name WHERE r.false_positive = 1"
|
||||
).fetchall()
|
||||
|
||||
tp_ok, tp_miss, fp_ok, fp_miss = 0, 0, 0, 0
|
||||
for row in tp_rows:
|
||||
rel, sim, method = clf.classify(row["title"], row["abstract"])
|
||||
if rel:
|
||||
tp_ok += 1
|
||||
else:
|
||||
tp_miss += 1
|
||||
for row in fp_rows:
|
||||
rel, sim, method = clf.classify(row["title"], row["abstract"])
|
||||
if not rel:
|
||||
fp_ok += 1
|
||||
else:
|
||||
fp_miss += 1
|
||||
|
||||
total_tp = len(tp_rows)
|
||||
total_fp = len(fp_rows)
|
||||
precision = tp_ok / (tp_ok + fp_miss) if (tp_ok + fp_miss) else 0
|
||||
recall = tp_ok / total_tp if total_tp else 0
|
||||
console.print(f"True Positives: [green]{tp_ok}[/]/{total_tp} kept ({tp_miss} missed)")
|
||||
console.print(f"False Positives: [red]{fp_ok}[/]/{total_fp} filtered ({fp_miss} slipped)")
|
||||
console.print(f"Precision: [bold]{precision:.1%}[/] Recall: [bold]{recall:.1%}[/]")
|
||||
|
||||
elif unrated:
|
||||
drafts = db.unrated_drafts(limit=5000)
|
||||
if not drafts:
|
||||
console.print("No unrated drafts to classify.")
|
||||
clf.close()
|
||||
return
|
||||
|
||||
console.print(f"[bold]Classifying {len(drafts)} unrated drafts...[/]\n")
|
||||
draft_dicts = [{"name": d.name, "title": d.title, "abstract": d.abstract} for d in drafts]
|
||||
relevant, irrelevant = clf.classify_batch(draft_dicts, verbose=True)
|
||||
|
||||
if irrelevant:
|
||||
console.print(f"\n[bold red]Irrelevant drafts ({len(irrelevant)}):[/]")
|
||||
table = Table()
|
||||
table.add_column("Name", style="cyan", max_width=50)
|
||||
table.add_column("Title", max_width=50)
|
||||
for d in irrelevant:
|
||||
table.add_row(d["name"], d.get("title", "")[:50])
|
||||
console.print(table)
|
||||
|
||||
if remove:
|
||||
for d in irrelevant:
|
||||
db.conn.execute("DELETE FROM drafts WHERE name = ?", (d["name"],))
|
||||
db.conn.commit()
|
||||
console.print(f"\n[bold red]Removed {len(irrelevant)} irrelevant drafts from database[/]")
|
||||
else:
|
||||
console.print(f"\n[dim]Use --remove to delete these from the DB[/]")
|
||||
else:
|
||||
console.print("\nAll unrated drafts appear relevant.")
|
||||
else:
|
||||
console.print("Use --unrated or --all. See: ietf classify --help")
|
||||
|
||||
clf.close()
|
||||
|
||||
|
||||
# ── list ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command("list")
|
||||
@click.option("--limit", "-n", default=30, help="Number of drafts to show")
|
||||
@click.option("--sort", "-s", default="time DESC", help="Sort order (e.g. 'time DESC', 'name ASC')")
|
||||
@pass_cfg_db
|
||||
def list_drafts(cfg, db, limit: int, sort: str):
|
||||
"""List tracked drafts."""
|
||||
drafts = db.list_drafts(limit=limit, order_by=sort)
|
||||
total = db.count_drafts()
|
||||
|
||||
table = Table(title=f"Tracked Drafts ({total} total, showing {len(drafts)})")
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan", max_width=55)
|
||||
table.add_column("Title", max_width=50)
|
||||
table.add_column("Pg", justify="right", width=4)
|
||||
table.add_column("Text", justify="center", width=4)
|
||||
table.add_column("Rated", justify="center", width=5)
|
||||
|
||||
for d in drafts:
|
||||
has_text = "\u2713" if d.full_text else ""
|
||||
rated = "\u2713" if db.get_rating(d.name) else ""
|
||||
table.add_row(d.date, d.name, d.title[:50], str(d.pages or ""), has_text, rated)
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
# ── search ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("query")
|
||||
@click.option("--limit", "-n", default=20, help="Max results")
|
||||
@pass_cfg_db
|
||||
def search(cfg, db, query: str, limit: int):
|
||||
"""Full-text search across stored drafts."""
|
||||
results = db.search_drafts(query, limit=limit)
|
||||
if not results:
|
||||
console.print(f"No results for [bold]{query}[/]")
|
||||
return
|
||||
|
||||
table = Table(title=f"Search: {query} ({len(results)} results)")
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Title")
|
||||
|
||||
for d in results:
|
||||
table.add_row(d.date, d.name, d.title[:60])
|
||||
|
||||
console.print(table)
|
||||
|
||||
|
||||
# ── show ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("name")
|
||||
@pass_cfg_db
|
||||
def show(cfg, db, name: str):
|
||||
"""Show detailed info for a draft."""
|
||||
from ..reports import Reporter
|
||||
from ..readiness import compute_readiness
|
||||
|
||||
reporter = Reporter(cfg, db)
|
||||
draft = db.get_draft(name)
|
||||
if draft is None:
|
||||
console.print(f"[red]Draft not found: {name}[/]")
|
||||
return
|
||||
|
||||
rating = db.get_rating(name)
|
||||
|
||||
console.print(f"\n[bold]{draft.title}[/]")
|
||||
console.print(f"[dim]{draft.name}[/] rev {draft.rev} | {draft.date} | {draft.pages or '?'} pages")
|
||||
console.print(f"Group: {draft.group or 'individual'} | {draft.datatracker_url}")
|
||||
console.print(f"\n[italic]{draft.abstract}[/]\n")
|
||||
|
||||
if rating:
|
||||
console.print("[bold]AI Assessment[/]")
|
||||
console.print(f" Score: [bold green]{rating.composite_score:.1f}[/]")
|
||||
console.print(f" Summary: {rating.summary}\n")
|
||||
|
||||
table = Table(show_header=True)
|
||||
table.add_column("Dimension", width=12)
|
||||
table.add_column("Score", justify="center", width=7)
|
||||
table.add_column("Notes")
|
||||
table.add_row("Novelty", f"{rating.novelty}/5", rating.novelty_note)
|
||||
table.add_row("Maturity", f"{rating.maturity}/5", rating.maturity_note)
|
||||
table.add_row("Overlap", f"{rating.overlap}/5", rating.overlap_note)
|
||||
table.add_row("Momentum", f"{rating.momentum}/5", rating.momentum_note)
|
||||
table.add_row("Relevance", f"{rating.relevance}/5", rating.relevance_note)
|
||||
console.print(table)
|
||||
|
||||
if rating.categories:
|
||||
console.print(f"\nCategories: {', '.join(rating.categories)}")
|
||||
else:
|
||||
console.print("[dim]Not yet rated — run: ietf analyze {name}[/]")
|
||||
|
||||
# Readiness score
|
||||
readiness = compute_readiness(db, name)
|
||||
if readiness["score"] > 0:
|
||||
console.print(f"\n[bold]Standards Readiness: [cyan]{readiness['score']}/100[/][/]")
|
||||
rtable = Table(show_header=True)
|
||||
rtable.add_column("Factor", width=20)
|
||||
rtable.add_column("Value", justify="center", width=10)
|
||||
rtable.add_column("Points", justify="right", width=8)
|
||||
rtable.add_column("Detail")
|
||||
for key, f in readiness["factors"].items():
|
||||
rtable.add_row(f["label"], f"{f['value']:.2f}", f"+{f['contribution']}", f["detail"])
|
||||
console.print(rtable)
|
||||
|
||||
# Save detailed report too
|
||||
path = reporter.draft_detail(name)
|
||||
if path:
|
||||
console.print(f"\n[dim]Report saved: {path}[/]")
|
||||
|
||||
|
||||
# ── annotate ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("draft_name")
|
||||
@click.option("--note", "-n", default=None, help="Set/update the note text")
|
||||
@click.option("--tag", "-t", multiple=True, help="Add a tag (can be used multiple times)")
|
||||
@click.option("--remove-tag", "-r", multiple=True, help="Remove a tag (can be used multiple times)")
|
||||
@pass_cfg_db
|
||||
def annotate(cfg, db, draft_name: str, note: str | None, tag: tuple[str, ...], remove_tag: tuple[str, ...]):
|
||||
"""Add or view annotations (notes & tags) for a draft."""
|
||||
draft = db.get_draft(draft_name)
|
||||
if draft is None:
|
||||
console.print(f"[red]Draft not found: {draft_name}[/]")
|
||||
return
|
||||
|
||||
# If no options, display current annotation
|
||||
if note is None and not tag and not remove_tag:
|
||||
ann = db.get_annotation(draft_name)
|
||||
if ann:
|
||||
console.print(f"\n[bold]Annotation for {draft_name}[/]")
|
||||
console.print(f" Note: {ann['note'] or '(empty)'}")
|
||||
console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}")
|
||||
console.print(f" Updated: {ann['updated_at']}")
|
||||
else:
|
||||
console.print(f"[dim]No annotation for {draft_name}. Use --note or --tag to add one.[/]")
|
||||
return
|
||||
|
||||
# Fetch existing tags for add/remove operations
|
||||
existing = db.get_annotation(draft_name)
|
||||
current_tags = existing["tags"] if existing else []
|
||||
|
||||
for t in tag:
|
||||
if t not in current_tags:
|
||||
current_tags.append(t)
|
||||
for t in remove_tag:
|
||||
if t in current_tags:
|
||||
current_tags.remove(t)
|
||||
|
||||
db.upsert_annotation(draft_name, note=note, tags=current_tags)
|
||||
ann = db.get_annotation(draft_name)
|
||||
console.print(f"[green]Annotation updated for {draft_name}[/]")
|
||||
console.print(f" Note: {ann['note'] or '(empty)'}")
|
||||
console.print(f" Tags: {', '.join(ann['tags']) if ann['tags'] else '(none)'}")
|
||||
|
||||
|
||||
# ── authors ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("name", required=False)
|
||||
@click.option("--fetch/--no-fetch", default=False, help="Fetch author data from Datatracker first")
|
||||
@click.option("--limit", "-n", default=20, help="Number of top authors to show")
|
||||
@pass_cfg_db
|
||||
def authors(cfg, db, name: str | None, fetch: bool, limit: int):
|
||||
"""Show authors for a draft, or top authors overall."""
|
||||
from ..authors import AuthorNetwork
|
||||
|
||||
author_network = AuthorNetwork(cfg, db)
|
||||
|
||||
if fetch:
|
||||
count = author_network.fetch_all_authors()
|
||||
console.print(f"Fetched authors for [bold green]{count}[/] drafts")
|
||||
|
||||
if name:
|
||||
draft_authors = db.get_authors_for_draft(name)
|
||||
if not draft_authors:
|
||||
console.print(f"[yellow]No author data for {name}. Run `ietf authors --fetch` first.[/]")
|
||||
return
|
||||
console.print(f"\n[bold]Authors of {name}:[/]")
|
||||
for a in draft_authors:
|
||||
console.print(f" - {a.name} ({a.affiliation or 'no affiliation'})")
|
||||
else:
|
||||
top = db.top_authors(limit=limit)
|
||||
if not top:
|
||||
console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]")
|
||||
return
|
||||
table = Table(title=f"Top {limit} Authors")
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Author", style="cyan")
|
||||
table.add_column("Organization")
|
||||
table.add_column("Drafts", justify="right", width=6)
|
||||
for rank, (aname, aff, cnt, _, _pid) in enumerate(top, 1):
|
||||
table.add_row(str(rank), aname, aff, str(cnt))
|
||||
console.print(table)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--top", "-n", default=20, help="Top N to show")
|
||||
@pass_cfg_db
|
||||
def network(cfg, db, top: int):
|
||||
"""Show author collaboration network."""
|
||||
console.print("\n[bold]Top Organizations[/]")
|
||||
orgs = db.top_orgs(limit=top)
|
||||
if orgs:
|
||||
table = Table()
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Organization", style="cyan")
|
||||
table.add_column("Authors", justify="right", width=8)
|
||||
table.add_column("Drafts", justify="right", width=6)
|
||||
for rank, (org, auth_cnt, draft_cnt) in enumerate(orgs, 1):
|
||||
table.add_row(str(rank), org, str(auth_cnt), str(draft_cnt))
|
||||
console.print(table)
|
||||
|
||||
console.print("\n[bold]Cross-Org Collaboration[/]")
|
||||
cross = db.cross_org_collaborations(limit=top)
|
||||
if cross:
|
||||
table = Table()
|
||||
table.add_column("Org A", style="cyan")
|
||||
table.add_column("Org B", style="cyan")
|
||||
table.add_column("Shared Drafts", justify="right", width=8)
|
||||
for org_a, org_b, shared in cross:
|
||||
table.add_row(org_a, org_b, str(shared))
|
||||
console.print(table)
|
||||
else:
|
||||
console.print("[yellow]No author data. Run `ietf authors --fetch` first.[/]")
|
||||
100
src/ietf_analyzer/commands/proposals.py
Normal file
100
src/ietf_analyzer/commands/proposals.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""Draft generation and proposal intake commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from .common import console, _get_config
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
|
||||
def register(main):
|
||||
"""Register all proposal commands with the main CLI group."""
|
||||
main.add_command(draft_gen)
|
||||
main.add_command(intake)
|
||||
|
||||
|
||||
# ── draft-gen ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command("draft-gen")
|
||||
@click.argument("gap_topic")
|
||||
@click.option("--output", "-o", help="Output file path")
|
||||
def draft_gen(gap_topic: str, output: str | None):
|
||||
"""Generate an Internet-Draft addressing a landscape gap."""
|
||||
from ..draftgen import DraftGenerator
|
||||
from ..analyzer import Analyzer
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
analyzer = Analyzer(cfg, db)
|
||||
generator = DraftGenerator(cfg, db, analyzer)
|
||||
|
||||
try:
|
||||
out_path = output or str(Path(cfg.data_dir) / "reports" / "generated-draft.txt")
|
||||
console.print(f"Generating Internet-Draft on: [bold]{gap_topic}[/]")
|
||||
path = generator.generate(gap_topic, output_path=out_path)
|
||||
console.print(f"\nDraft saved: [bold green]{path}[/]")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ── proposal intake ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command("intake")
|
||||
@click.argument("input_text", required=False)
|
||||
@click.option("--file", "-f", type=click.Path(exists=True), help="Read input from a file")
|
||||
@click.option("--dry-run", is_flag=True, help="Parse and show proposals without storing")
|
||||
def intake(input_text: str | None, file: str | None, dry_run: bool):
|
||||
"""Generate draft proposals from text/URLs.
|
||||
|
||||
Paste article text, URLs, or notes. Claude analyzes against all gaps
|
||||
and generates structured IETF draft proposals automatically.
|
||||
|
||||
Examples:
|
||||
|
||||
ietf intake "https://arxiv.org/abs/2503.18813"
|
||||
|
||||
ietf intake -f notes.txt
|
||||
|
||||
echo "interesting paper about agent security" | ietf intake -
|
||||
"""
|
||||
from ..proposal_intake import ProposalIntake
|
||||
|
||||
if input_text == "-":
|
||||
import sys
|
||||
input_text = sys.stdin.read()
|
||||
elif file:
|
||||
input_text = Path(file).read_text()
|
||||
elif not input_text:
|
||||
# Interactive: read from stdin until EOF
|
||||
console.print("[dim]Paste text/URLs, then Ctrl+D to submit:[/]")
|
||||
import sys
|
||||
input_text = sys.stdin.read()
|
||||
|
||||
if not input_text or not input_text.strip():
|
||||
console.print("[red]No input provided.[/]")
|
||||
raise SystemExit(1)
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
try:
|
||||
pipeline = ProposalIntake(cfg, db)
|
||||
proposals, usage = pipeline.process(input_text, dry_run=dry_run)
|
||||
|
||||
if proposals:
|
||||
console.print(f"\n[bold green]{len(proposals)} proposal(s) generated[/]")
|
||||
for p in proposals:
|
||||
pid = p.get("id", "---")
|
||||
gaps = ", ".join(f"#{g}" for g in p.get("gap_ids", []))
|
||||
console.print(f" [blue]#{pid}[/] {p['title']} [dim]gaps: {gaps}[/]")
|
||||
if not dry_run:
|
||||
console.print(f"\nView in web UI: [bold]http://localhost:5000/proposals[/]")
|
||||
else:
|
||||
console.print("[yellow]No proposals generated from this input.[/]")
|
||||
finally:
|
||||
db.close()
|
||||
667
src/ietf_analyzer/commands/reports.py
Normal file
667
src/ietf_analyzer/commands/reports.py
Normal file
@@ -0,0 +1,667 @@
|
||||
"""Report generation, visualization, working group analysis, and export commands."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from rich.table import Table
|
||||
|
||||
from .common import console, pass_cfg_db, _get_config
|
||||
from ..config import Config
|
||||
from ..db import Database
|
||||
|
||||
|
||||
def register(main):
|
||||
"""Register all report/viz/wg/export commands with the main CLI group."""
|
||||
main.add_command(report)
|
||||
main.add_command(viz)
|
||||
main.add_command(wg)
|
||||
main.add_command(export)
|
||||
|
||||
|
||||
# ── report ───────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def report():
|
||||
"""Generate markdown reports."""
|
||||
pass
|
||||
|
||||
|
||||
@report.command()
|
||||
@pass_cfg_db
|
||||
def overview(cfg, db):
|
||||
"""Overview table of all rated drafts."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).overview()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command()
|
||||
@pass_cfg_db
|
||||
def landscape(cfg, db):
|
||||
"""Category-grouped landscape view."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).landscape()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command()
|
||||
@click.option("--days", "-d", default=7, help="Look back N days")
|
||||
@pass_cfg_db
|
||||
def digest(cfg, db, days: int):
|
||||
"""What's new digest."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).digest(since_days=days)
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command()
|
||||
@pass_cfg_db
|
||||
def timeline(cfg, db):
|
||||
"""Timeline of draft submissions by month and category."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).timeline()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("overlap-matrix")
|
||||
@pass_cfg_db
|
||||
def overlap_matrix(cfg, db):
|
||||
"""Full pairwise overlap matrix report."""
|
||||
from ..embeddings import Embedder
|
||||
from ..reports import Reporter
|
||||
embedder = Embedder(cfg, db)
|
||||
n_drafts = len(db.all_drafts())
|
||||
console.print(f"Computing {n_drafts}x{n_drafts} similarity matrix...")
|
||||
path = Reporter(cfg, db).overlap_matrix(embedder)
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("authors")
|
||||
@pass_cfg_db
|
||||
def authors_report(cfg, db):
|
||||
"""Author and organization network report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).authors_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("ideas")
|
||||
@pass_cfg_db
|
||||
def ideas_report(cfg, db):
|
||||
"""Report on extracted technical ideas."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).ideas_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("refs")
|
||||
@pass_cfg_db
|
||||
def refs_report(cfg, db):
|
||||
"""Cross-reference report — which standards the ecosystem builds on."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).refs_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("trends")
|
||||
@pass_cfg_db
|
||||
def trends_report(cfg, db):
|
||||
"""Category trend analysis report (markdown)."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).trends_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("idea-overlap")
|
||||
@pass_cfg_db
|
||||
def idea_overlap_report(cfg, db):
|
||||
"""Cross-organization idea overlap report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).idea_overlap_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("status")
|
||||
@pass_cfg_db
|
||||
def status_report(cfg, db):
|
||||
"""WG adoption status report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).status_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("revisions")
|
||||
@pass_cfg_db
|
||||
def revisions_report(cfg, db):
|
||||
"""Draft revision velocity report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).revisions_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("centrality")
|
||||
@pass_cfg_db
|
||||
def centrality_report(cfg, db):
|
||||
"""Author network centrality report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).centrality_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("co-occurrence")
|
||||
@pass_cfg_db
|
||||
def co_occurrence_report(cfg, db):
|
||||
"""Category co-occurrence matrix report."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).co_occurrence_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("wg")
|
||||
@pass_cfg_db
|
||||
def wg_report(cfg, db):
|
||||
"""Working group analysis report — overlaps, alignment, submission targets."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).wg_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("sources")
|
||||
@pass_cfg_db
|
||||
def sources_report(cfg, db):
|
||||
"""Cross-source comparison report — ratings and categories by standards body."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).sources_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("false-positives")
|
||||
@pass_cfg_db
|
||||
def false_positives_report(cfg, db):
|
||||
"""False positive profiling report — what makes drafts look AI-related but not be."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).false_positives_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("citations")
|
||||
@pass_cfg_db
|
||||
def citations_report(cfg, db):
|
||||
"""Citation influence and BCP dependency analysis."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).citations_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("complexity")
|
||||
@pass_cfg_db
|
||||
def complexity_report(cfg, db):
|
||||
"""Draft complexity matrix: correlations between structural complexity and ratings."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).complexity_report()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@report.command("idea-analysis")
|
||||
@pass_cfg_db
|
||||
def idea_analysis_report(cfg, db):
|
||||
"""Idea novelty deep dive — distribution, types, top ideas, cross-draft patterns."""
|
||||
from ..reports import Reporter
|
||||
path = Reporter(cfg, db).idea_analysis()
|
||||
console.print(f"Report saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
# ── wg (working group analysis) ─────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def wg():
|
||||
"""Working group analysis — overlaps, alignment opportunities, submission targets."""
|
||||
pass
|
||||
|
||||
|
||||
@wg.command("list")
|
||||
@click.option("--min-drafts", default=1, help="Minimum drafts to show a WG")
|
||||
@pass_cfg_db
|
||||
def wg_list(cfg, db, min_drafts: int):
|
||||
"""List working groups with draft counts and average scores."""
|
||||
summaries = db.wg_summary()
|
||||
if not summaries:
|
||||
console.print("[yellow]No WG data. Run: python scripts/backfill-wg-names.py[/]")
|
||||
return
|
||||
|
||||
summaries = [s for s in summaries if s["draft_count"] >= min_drafts]
|
||||
|
||||
table = Table(title=f"Working Groups ({len(summaries)} with >= {min_drafts} drafts)")
|
||||
table.add_column("WG", style="cyan", width=12)
|
||||
table.add_column("#", justify="right", width=4)
|
||||
table.add_column("Ideas", justify="right", width=5)
|
||||
table.add_column("Nov", justify="center", width=4)
|
||||
table.add_column("Mat", justify="center", width=4)
|
||||
table.add_column("Ovl", justify="center", width=4)
|
||||
table.add_column("Mom", justify="center", width=4)
|
||||
table.add_column("Rel", justify="center", width=4)
|
||||
table.add_column("Top Categories")
|
||||
|
||||
for s in summaries:
|
||||
top_cats = sorted(s["categories"].items(), key=lambda x: x[1], reverse=True)[:3]
|
||||
cats_str = ", ".join(f"{c}({n})" for c, n in top_cats) if top_cats else "-"
|
||||
table.add_row(
|
||||
s["wg"], str(s["draft_count"]), str(s["idea_count"]),
|
||||
str(s["avg_novelty"]), str(s["avg_maturity"]),
|
||||
str(s["avg_overlap"]), str(s["avg_momentum"]),
|
||||
str(s["avg_relevance"]), cats_str,
|
||||
)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Also show individual submission count
|
||||
indiv = db.conn.execute(
|
||||
'SELECT COUNT(*) FROM drafts WHERE "group" = \'none\' OR "group" IS NULL'
|
||||
).fetchone()[0]
|
||||
console.print(f"\n[dim]Individual submissions (no WG): {indiv}[/]")
|
||||
|
||||
|
||||
@wg.command("show")
|
||||
@click.argument("name")
|
||||
@pass_cfg_db
|
||||
def wg_show(cfg, db, name: str):
|
||||
"""Show details for a specific working group."""
|
||||
drafts = db.wg_drafts(name)
|
||||
if not drafts:
|
||||
console.print(f"[red]No drafts found for WG: {name}[/]")
|
||||
return
|
||||
|
||||
console.print(f"\n[bold]Working Group: {name}[/] ({len(drafts)} drafts)\n")
|
||||
|
||||
table = Table()
|
||||
table.add_column("Date", style="dim", width=10)
|
||||
table.add_column("Name", style="cyan")
|
||||
table.add_column("Title", max_width=50)
|
||||
table.add_column("Score", justify="right", width=6)
|
||||
|
||||
for d in drafts:
|
||||
rating = db.get_rating(d.name)
|
||||
score = f"{rating.composite_score:.1f}" if rating else "-"
|
||||
table.add_row(d.date, d.name, d.title[:50], score)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Show ideas for this WG
|
||||
ideas = []
|
||||
for d in drafts:
|
||||
ideas.extend(db.get_ideas_for_draft(d.name))
|
||||
if ideas:
|
||||
console.print(f"\n[bold]Ideas ({len(ideas)}):[/]")
|
||||
for idea in ideas[:15]:
|
||||
console.print(f" - [cyan]{idea['title']}[/]: {idea['description'][:80]}")
|
||||
if len(ideas) > 15:
|
||||
console.print(f" [dim]... and {len(ideas) - 15} more[/]")
|
||||
|
||||
|
||||
@wg.command("overlaps")
|
||||
@click.option("--min-wgs", default=2, help="Minimum WGs sharing a category to show")
|
||||
@pass_cfg_db
|
||||
def wg_overlaps(cfg, db, min_wgs: int):
|
||||
"""Find categories and ideas that span multiple WGs — alignment opportunities."""
|
||||
# Category spread across WGs
|
||||
spread = db.category_wg_spread()
|
||||
multi = [s for s in spread if s["wg_count"] >= min_wgs
|
||||
and not all(w["wg"] == "none" for w in s["wgs"])]
|
||||
|
||||
if multi:
|
||||
console.print(f"\n[bold]Categories spanning {min_wgs}+ WGs[/]\n")
|
||||
for s in multi:
|
||||
wg_strs = [f"{w['wg']}({w['count']})" for w in s["wgs"] if w["wg"] != "none"]
|
||||
if wg_strs:
|
||||
console.print(f" [cyan]{s['category']}[/] — {s['total_drafts']} drafts across {s['wg_count']} WGs")
|
||||
console.print(f" WGs: {', '.join(wg_strs)}")
|
||||
|
||||
# Idea overlap across WGs
|
||||
idea_overlaps = db.wg_idea_overlap()
|
||||
cross_wg = [o for o in idea_overlaps
|
||||
if not all(w == "none" for w in o["wg_names"])]
|
||||
|
||||
if cross_wg:
|
||||
console.print(f"\n[bold]Ideas appearing in {min_wgs}+ WGs ({len(cross_wg)} found)[/]\n")
|
||||
for o in cross_wg[:20]:
|
||||
real_wgs = [w for w in o["wg_names"] if w != "none"]
|
||||
console.print(f" [cyan]{o['idea_title']}[/] — WGs: {', '.join(real_wgs)}")
|
||||
for entry in o["wgs"]:
|
||||
if entry["wg"] != "none":
|
||||
console.print(f" - [{entry['wg']}] {entry['draft_name']}")
|
||||
if len(cross_wg) > 20:
|
||||
console.print(f"\n [dim]... and {len(cross_wg) - 20} more[/]")
|
||||
|
||||
if not multi and not cross_wg:
|
||||
console.print("[yellow]No cross-WG overlaps found.[/]")
|
||||
|
||||
|
||||
@wg.command("alignment")
|
||||
@pass_cfg_db
|
||||
def wg_alignment(cfg, db):
|
||||
"""Identify where individual drafts should be consolidated into WG standards."""
|
||||
# Compare individual vs WG category distribution
|
||||
dist = db.individual_vs_wg_categories()
|
||||
indiv = dist["individual"]
|
||||
adopted = dist["wg_adopted"]
|
||||
|
||||
console.print("\n[bold]Individual vs WG-Adopted Category Distribution[/]\n")
|
||||
|
||||
table = Table()
|
||||
table.add_column("Category", width=25)
|
||||
table.add_column("Individual", justify="right", width=10)
|
||||
table.add_column("WG-Adopted", justify="right", width=10)
|
||||
table.add_column("Signal", width=40)
|
||||
|
||||
all_cats = sorted(set(list(indiv.keys()) + list(adopted.keys())))
|
||||
for cat in all_cats:
|
||||
i_count = indiv.get(cat, 0)
|
||||
w_count = adopted.get(cat, 0)
|
||||
signal = ""
|
||||
if i_count >= 5 and w_count == 0:
|
||||
signal = "[yellow]High individual activity, no WG — needs WG?[/]"
|
||||
elif i_count >= 3 and w_count >= 1:
|
||||
signal = "[green]WG exists, individual drafts could target it[/]"
|
||||
elif w_count > i_count and i_count > 0:
|
||||
signal = "[dim]WG leading, some individual work[/]"
|
||||
table.add_row(cat, str(i_count), str(w_count), signal)
|
||||
|
||||
console.print(table)
|
||||
|
||||
# Find overlap clusters within individual submissions that might warrant a WG
|
||||
console.print("\n[bold]Consolidation Candidates[/]")
|
||||
console.print("[dim]Categories with many individual drafts but no WG adoption — "
|
||||
"potential for new WG or BoF[/]\n")
|
||||
|
||||
candidates = []
|
||||
for cat in all_cats:
|
||||
i_count = indiv.get(cat, 0)
|
||||
w_count = adopted.get(cat, 0)
|
||||
if i_count >= 5 and w_count == 0:
|
||||
candidates.append((cat, i_count))
|
||||
|
||||
if candidates:
|
||||
for cat, count in sorted(candidates, key=lambda x: x[1], reverse=True):
|
||||
console.print(f" [yellow]{cat}[/]: {count} individual drafts, no WG home")
|
||||
# Show sample drafts
|
||||
rows = db.conn.execute("""
|
||||
SELECT d.name, d.title FROM drafts d
|
||||
JOIN ratings r ON d.name = r.draft_name
|
||||
WHERE (d."group" = 'none' OR d."group" IS NULL)
|
||||
AND r.categories LIKE ?
|
||||
ORDER BY (r.novelty * 0.30 + r.relevance * 0.25 + r.maturity * 0.20
|
||||
+ r.momentum * 0.15 + (6 - r.overlap) * 0.10) DESC
|
||||
LIMIT 5
|
||||
""", (f"%{cat}%",)).fetchall()
|
||||
for row in rows:
|
||||
console.print(f" - {row['name']}: {row['title'][:60]}")
|
||||
console.print()
|
||||
else:
|
||||
console.print(" [green]All active categories have WG representation.[/]")
|
||||
|
||||
|
||||
@wg.command("targets")
|
||||
@pass_cfg_db
|
||||
def wg_targets(cfg, db):
|
||||
"""Suggest best WGs for submitting new work in each category."""
|
||||
spread = db.category_wg_spread()
|
||||
summaries = {s["wg"]: s for s in db.wg_summary()}
|
||||
|
||||
console.print("\n[bold]Recommended Submission Targets by Category[/]\n")
|
||||
|
||||
for s in spread:
|
||||
cat = s["category"]
|
||||
# Filter to real WGs (not 'none')
|
||||
real_wgs = [w for w in s["wgs"] if w["wg"] != "none"]
|
||||
if not real_wgs:
|
||||
console.print(f" [cyan]{cat}[/]: [yellow]No active WG — individual submission[/]")
|
||||
continue
|
||||
|
||||
best = real_wgs[0]
|
||||
wg_info = summaries.get(best["wg"], {})
|
||||
console.print(
|
||||
f" [cyan]{cat}[/]: [bold green]{best['wg']}[/] "
|
||||
f"({best['count']} drafts"
|
||||
f"{', avg relevance ' + str(wg_info.get('avg_relevance', '?')) if wg_info else ''})"
|
||||
)
|
||||
if len(real_wgs) > 1:
|
||||
alts = ", ".join(f"{w['wg']}({w['count']})" for w in real_wgs[1:3])
|
||||
console.print(f" Also: {alts}")
|
||||
|
||||
console.print()
|
||||
|
||||
|
||||
# ── visualize ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.group()
|
||||
def viz():
|
||||
"""Generate interactive visualizations (HTML/PNG)."""
|
||||
pass
|
||||
|
||||
|
||||
@viz.command("all")
|
||||
@pass_cfg_db
|
||||
def viz_all(cfg, db):
|
||||
"""Generate all available visualizations."""
|
||||
from ..visualize import Visualizer
|
||||
v = Visualizer(cfg, db)
|
||||
paths = v.generate_all()
|
||||
console.print(f"\n[bold green]{len(paths)} visualizations[/] saved to {v.output_dir}/")
|
||||
|
||||
|
||||
@viz.command("landscape")
|
||||
@click.option("--method", "-m", default="tsne", type=click.Choice(["umap", "tsne"]),
|
||||
help="Dimensionality reduction method")
|
||||
@pass_cfg_db
|
||||
def viz_landscape(cfg, db, method: str):
|
||||
"""2D scatter of draft embeddings colored by category."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).landscape_scatter(method=method)
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("heatmap")
|
||||
@pass_cfg_db
|
||||
def viz_heatmap(cfg, db):
|
||||
"""Clustered similarity heatmap (PNG)."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).similarity_heatmap()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("distributions")
|
||||
@pass_cfg_db
|
||||
def viz_distributions(cfg, db):
|
||||
"""Rating dimension distributions by category (PNG)."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).score_distributions()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("timeline")
|
||||
@pass_cfg_db
|
||||
def viz_timeline(cfg, db):
|
||||
"""Stacked area chart of monthly submissions."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).timeline_chart()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("bubble")
|
||||
@pass_cfg_db
|
||||
def viz_bubble(cfg, db):
|
||||
"""Interactive bubble chart: novelty vs maturity."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).bubble_explorer()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("radar")
|
||||
@pass_cfg_db
|
||||
def viz_radar(cfg, db):
|
||||
"""Radar chart of average category rating profiles."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).category_radar()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("network")
|
||||
@click.option("--min-shared", "-n", default=2, help="Minimum shared drafts for an edge")
|
||||
@pass_cfg_db
|
||||
def viz_network(cfg, db, min_shared: int):
|
||||
"""Interactive author collaboration network graph."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).author_network(min_shared=min_shared)
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("treemap")
|
||||
@pass_cfg_db
|
||||
def viz_treemap(cfg, db):
|
||||
"""Category treemap colored by average score."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).category_treemap()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("quality")
|
||||
@pass_cfg_db
|
||||
def viz_quality(cfg, db):
|
||||
"""Score vs uniqueness scatter (quality vs redundancy)."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).score_vs_overlap()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("orgs")
|
||||
@pass_cfg_db
|
||||
def viz_orgs(cfg, db):
|
||||
"""Organization contribution bar chart."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).org_contributions()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("ideas")
|
||||
@pass_cfg_db
|
||||
def viz_ideas(cfg, db):
|
||||
"""Ideas frequency chart by type."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).ideas_chart()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
@viz.command("browser")
|
||||
@pass_cfg_db
|
||||
def viz_browser(cfg, db):
|
||||
"""Interactive filterable draft browser (standalone HTML)."""
|
||||
from ..visualize import Visualizer
|
||||
path = Visualizer(cfg, db).draft_browser()
|
||||
console.print(f"Saved: [bold]{path}[/]")
|
||||
|
||||
|
||||
# ── export ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--type", "export_type", type=click.Choice(["drafts", "ideas", "gaps", "authors", "ratings"]),
|
||||
required=True, help="Type of data to export")
|
||||
@click.option("--format", "fmt", type=click.Choice(["json", "csv"]), default="json", help="Output format")
|
||||
@click.option("--output", "-o", "output_file", type=click.Path(), default=None,
|
||||
help="Output file (default: stdout)")
|
||||
def export(export_type: str, fmt: str, output_file: str | None):
|
||||
"""Export data as JSON or CSV."""
|
||||
import csv as csv_mod
|
||||
import io
|
||||
import json
|
||||
|
||||
cfg = _get_config()
|
||||
db = Database(cfg)
|
||||
|
||||
try:
|
||||
rows: list[dict] = []
|
||||
|
||||
if export_type == "drafts":
|
||||
drafts = db.list_drafts(limit=10000, order_by="name ASC")
|
||||
for d in drafts:
|
||||
rating = db.get_rating(d.name)
|
||||
row = {
|
||||
"name": d.name,
|
||||
"title": d.title,
|
||||
"rev": d.rev,
|
||||
"date": d.date,
|
||||
"pages": d.pages or 0,
|
||||
"group": d.group or "",
|
||||
}
|
||||
if rating:
|
||||
row["score"] = round(rating.composite_score, 2)
|
||||
row["novelty"] = rating.novelty
|
||||
row["maturity"] = rating.maturity
|
||||
row["overlap"] = rating.overlap
|
||||
row["momentum"] = rating.momentum
|
||||
row["relevance"] = rating.relevance
|
||||
row["categories"] = json.dumps(rating.categories)
|
||||
row["summary"] = rating.summary
|
||||
rows.append(row)
|
||||
|
||||
elif export_type == "ideas":
|
||||
ideas = db.all_ideas()
|
||||
rows = ideas
|
||||
|
||||
elif export_type == "gaps":
|
||||
gaps = db.all_gaps()
|
||||
rows = gaps
|
||||
|
||||
elif export_type == "authors":
|
||||
top = db.top_authors(limit=10000)
|
||||
for name, aff, cnt, drafts_list, _pid in top:
|
||||
rows.append({
|
||||
"name": name,
|
||||
"affiliation": aff,
|
||||
"draft_count": cnt,
|
||||
"drafts": json.dumps(drafts_list),
|
||||
})
|
||||
|
||||
elif export_type == "ratings":
|
||||
pairs = db.drafts_with_ratings(limit=10000)
|
||||
for draft, rating in pairs:
|
||||
rows.append({
|
||||
"name": draft.name,
|
||||
"title": draft.title,
|
||||
"score": round(rating.composite_score, 2),
|
||||
"novelty": rating.novelty,
|
||||
"maturity": rating.maturity,
|
||||
"overlap": rating.overlap,
|
||||
"momentum": rating.momentum,
|
||||
"relevance": rating.relevance,
|
||||
"categories": json.dumps(rating.categories),
|
||||
"summary": rating.summary,
|
||||
})
|
||||
|
||||
if fmt == "json":
|
||||
text = json.dumps(rows, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
# CSV
|
||||
if not rows:
|
||||
text = ""
|
||||
else:
|
||||
si = io.StringIO()
|
||||
writer = csv_mod.DictWriter(si, fieldnames=rows[0].keys())
|
||||
writer.writeheader()
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
text = si.getvalue()
|
||||
|
||||
if output_file:
|
||||
Path(output_file).write_text(text, encoding="utf-8")
|
||||
console.print(f"Exported [bold green]{len(rows)}[/] {export_type} to [cyan]{output_file}[/] ({fmt})")
|
||||
else:
|
||||
click.echo(text)
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
Reference in New Issue
Block a user