Run pipeline, write Post 08, commit untracked files
Pipeline: - Extract ideas for 38 new drafts → 462 ideas total - Convergence analysis: 132 cross-org convergent ideas (33% rate) - Fetch authors for 102 drafts → 709 authors (up from 403) - Refresh gap analysis: 12 gaps across full 474-draft corpus - Update verified counts with new totals Post 08: - Complete rewrite of "Agents Building the Agent Analysis" (2,953 words) - Covers 3 phases: writing team → review cycle → fix cycle - Meta-irony table mapping team coordination to IETF gap names - Specific examples from dev journal (SQL injection, consent conflation, ideas mismatch) Untracked files committed: - scripts/: backfill-wg-names, classify-unrated, compare-classifiers, download-relevant-text, run-webui - src/ietf_analyzer/classifier.py: two-stage Ollama classifier - src/webui/: analytics (GDPR-compliant), auth, obsidian_export - tests/test_obsidian_export.py (10 tests) - data/reports/: wg-analysis, generated draft for gap #37 Housekeeping: - .gitignore: exclude LaTeX artifacts, stale DBs, analytics.db Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
39
scripts/classify-unrated.py
Normal file
39
scripts/classify-unrated.py
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Classify unrated drafts using Ollama two-stage filter."""
|
||||
|
||||
import sqlite3
|
||||
import sys
|
||||
sys.path.insert(0, "src")
|
||||
|
||||
from ietf_analyzer.classifier import Classifier
|
||||
from ietf_analyzer.config import Config
|
||||
|
||||
cfg = Config.load()
|
||||
conn = sqlite3.connect(cfg.db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Get unrated drafts
|
||||
rows = conn.execute("""
|
||||
SELECT name, title, abstract, source FROM drafts
|
||||
WHERE name NOT IN (SELECT draft_name FROM ratings)
|
||||
ORDER BY source, name
|
||||
""").fetchall()
|
||||
|
||||
drafts = [dict(r) for r in rows]
|
||||
print(f"Classifying {len(drafts)} unrated drafts...\n")
|
||||
|
||||
with Classifier(cfg) as clf:
|
||||
relevant, irrelevant = clf.classify_batch(drafts, verbose=True)
|
||||
|
||||
print(f"\n--- RELEVANT ({len(relevant)}) ---")
|
||||
for d in relevant:
|
||||
print(f" [{d['source']}] {d['name']}")
|
||||
print(f" {d['title'][:100]}")
|
||||
|
||||
print(f"\n--- IRRELEVANT ({len(irrelevant)}) ---")
|
||||
for d in irrelevant:
|
||||
print(f" [{d['source']}] {d['name']}")
|
||||
print(f" {d['title'][:100]}")
|
||||
|
||||
print(f"\nSummary: {len(relevant)} relevant, {len(irrelevant)} irrelevant out of {len(drafts)}")
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user