Idea quality pipeline, web UI features, academic paper

- Tighten idea extraction prompts (1-4 ideas, no sub-features) reducing
  1,907 ideas to 468 across 434 drafts (78% reduction)
- Add embedding-based dedup (ietf dedup-ideas) for same-draft similarity
- Add novelty scoring (ietf ideas score) and filtering (ietf ideas filter)
  using Claude to rate ideas 1-5, removing 49 generic building blocks
- Final count: 419 high-quality ideas (avg 1.1/draft)
- Web UI: gap explorer with live draft generation and pre-generated demos
- Web UI: D3.js author collaboration network (498 nodes, 1142 edges,
  68 clusters, org filtering, interactive zoom/pan)
- Academic paper: 15-page LaTeX workshop paper analyzing the 434-draft
  AI agent standards landscape
- Save improvement ideas backlog to data/reports/improvement-ideas.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 22:17:57 +01:00
parent 3c3d7e649f
commit 6e3a387778
29 changed files with 6575 additions and 240 deletions

View File

@@ -0,0 +1,200 @@
{% extends "base.html" %}
{% set active_page = "idea_clusters" %}
{% block title %}Idea Clusters — IETF Draft Analyzer{% endblock %}
{% block content %}
<div class="mb-6">
<h1 class="text-2xl font-bold text-white">Idea Clusters</h1>
<p class="text-slate-400 text-sm mt-1">Extracted ideas grouped by semantic similarity using embedding-based clustering</p>
</div>
<div id="emptyState" class="hidden">
<div class="bg-slate-900 rounded-xl border border-slate-800 p-12 text-center">
<svg class="w-16 h-16 mx-auto text-slate-600 mb-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M4 6a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2H6a2 2 0 01-2-2V6zm10 0a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2h-2a2 2 0 01-2-2V6zM4 16a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2H6a2 2 0 01-2-2v-2z"/>
</svg>
<h2 class="text-lg font-semibold text-slate-300 mb-2">No idea embeddings found</h2>
<p class="text-slate-500">Run <code class="bg-slate-800 px-2 py-1 rounded text-sm font-mono text-blue-400">ietf embed-ideas</code> to generate embeddings first.</p>
</div>
</div>
<div id="clusterContent" class="hidden">
<!-- Stat cards -->
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Total Ideas Embedded</p>
<p class="text-2xl font-bold text-white mt-1" id="statTotal">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Clusters Found</p>
<p class="text-2xl font-bold text-white mt-1" id="statClusters">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Avg Cluster Size</p>
<p class="text-2xl font-bold text-white mt-1" id="statAvgSize">0</p>
</div>
</div>
<!-- t-SNE Scatter -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Idea Embedding Space (t-SNE)</h2>
<p class="text-xs text-slate-500 mb-3">Each dot is an extracted idea, colored by cluster. Hover for details, click to view the source draft.</p>
<div id="scatterPlot" style="height: 560px;"></div>
</div>
<!-- Treemap -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Cluster Sizes</h2>
<p class="text-xs text-slate-500 mb-3">Treemap showing relative sizes of each idea cluster.</p>
<div id="treemapPlot" style="height: 450px;"></div>
</div>
<!-- Cluster cards grid -->
<h2 class="text-lg font-semibold text-white mb-4">Cluster Details</h2>
<div id="clusterGrid" class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4 mb-6">
</div>
</div>
{% endblock %}
{% block extra_scripts %}
<script>
const PLOTLY_LAYOUT = {
paper_bgcolor: 'transparent', plot_bgcolor: 'rgba(15,23,42,0.5)',
font: { color: '#94a3b8', family: 'Inter, system-ui, sans-serif', size: 12 },
margin: { t: 20, r: 20, b: 50, l: 50 },
xaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
yaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
};
const CFG = { responsive: true, displayModeBar: false };
const PALETTE = [
'#3b82f6', '#ef4444', '#22c55e', '#a855f7', '#f59e0b',
'#06b6d4', '#ec4899', '#84cc16', '#f97316', '#8b5cf6',
'#14b8a6', '#e11d48', '#64748b', '#eab308', '#6366f1',
];
const data = {{ clusters | tojson }};
if (data.empty) {
document.getElementById('emptyState').classList.remove('hidden');
} else {
document.getElementById('clusterContent').classList.remove('hidden');
// Stats
const stats = data.stats;
document.getElementById('statTotal').textContent = stats.total.toLocaleString();
document.getElementById('statClusters').textContent = stats.num_clusters.toLocaleString();
document.getElementById('statAvgSize').textContent = stats.num_clusters > 0
? (stats.clustered / stats.num_clusters).toFixed(1) : '0';
// --- t-SNE Scatter ---
if (data.scatter.length > 0) {
// Group by cluster_id
const groups = {};
data.scatter.forEach(pt => {
if (!groups[pt.cluster_id]) groups[pt.cluster_id] = { x: [], y: [], text: [], names: [] };
groups[pt.cluster_id].x.push(pt.x);
groups[pt.cluster_id].y.push(pt.y);
groups[pt.cluster_id].text.push(pt.title);
groups[pt.cluster_id].names.push(pt.draft_name);
});
// Map cluster_id to cluster theme
const clusterThemes = {};
data.clusters.forEach((c, i) => {
// Find the original cluster_id by matching scatter points
});
const clusterIds = Object.keys(groups).sort((a, b) => (groups[b].x.length - groups[a].x.length));
const traces = clusterIds.map((cid, i) => {
const g = groups[cid];
const theme = data.clusters[i] ? data.clusters[i].theme : `Cluster ${cid}`;
return {
x: g.x, y: g.y, text: g.text, name: theme,
customdata: g.names,
mode: 'markers', type: 'scatter',
marker: {
size: 6,
color: PALETTE[i % PALETTE.length],
opacity: 0.8,
line: { width: 0.5, color: 'rgba(255,255,255,0.15)' },
},
hovertemplate: '<b>%{text}</b><extra>%{customdata}</extra>',
};
});
Plotly.newPlot('scatterPlot', traces, {
...PLOTLY_LAYOUT,
xaxis: { visible: false, showgrid: false, zeroline: false },
yaxis: { visible: false, showgrid: false, zeroline: false },
legend: { font: { size: 10, color: '#94a3b8' }, bgcolor: 'transparent' },
hovermode: 'closest',
margin: { t: 10, r: 20, b: 10, l: 20 },
}, CFG);
document.getElementById('scatterPlot').on('plotly_click', function(ev) {
const pt = ev.points[0];
if (pt.customdata) {
window.location.href = '/drafts/' + pt.customdata;
}
});
}
// --- Treemap ---
if (data.clusters.length > 0) {
const labels = data.clusters.map(c => c.theme);
const values = data.clusters.map(c => c.size);
const colors = data.clusters.map((_, i) => PALETTE[i % PALETTE.length]);
Plotly.newPlot('treemapPlot', [{
type: 'treemap',
labels: labels,
parents: labels.map(() => ''),
values: values,
textinfo: 'label+value',
marker: { colors: colors },
hovertemplate: '<b>%{label}</b><br>%{value} ideas<extra></extra>',
}], {
...PLOTLY_LAYOUT,
margin: { t: 10, r: 10, b: 10, l: 10 },
}, CFG);
}
// --- Cluster Cards ---
const grid = document.getElementById('clusterGrid');
data.clusters.forEach((cluster, i) => {
const color = PALETTE[i % PALETTE.length];
const topIdeas = cluster.ideas.slice(0, 3);
const ideaListHtml = topIdeas.map(idea =>
`<li class="text-xs text-slate-400 truncate" title="${idea.title}">${idea.title}</li>`
).join('');
const extraCount = cluster.size - topIdeas.length;
const extraHtml = extraCount > 0
? `<li class="text-xs text-slate-600">+${extraCount} more</li>` : '';
const draftBadges = cluster.drafts.slice(0, 4).map(d =>
`<a href="/drafts/${d}" class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded hover:text-blue-400 truncate max-w-[140px]" title="${d}">${d.replace('draft-', '').substring(0, 20)}</a>`
).join(' ');
const extraDrafts = cluster.drafts.length > 4
? `<span class="text-xs text-slate-600">+${cluster.drafts.length - 4}</span>` : '';
const card = document.createElement('div');
card.className = 'bg-slate-900 rounded-xl border border-slate-800 p-5';
card.innerHTML = `
<div class="flex items-center gap-2 mb-3">
<div class="w-3 h-3 rounded-full" style="background: ${color}"></div>
<h3 class="text-sm font-semibold text-white">${cluster.theme}</h3>
<span class="ml-auto text-xs text-slate-500">${cluster.size} ideas</span>
</div>
<ul class="space-y-1 mb-3">${ideaListHtml}${extraHtml}</ul>
<div class="border-t border-slate-800 pt-3">
<p class="text-xs text-slate-500 mb-1">${cluster.drafts.length} source draft${cluster.drafts.length !== 1 ? 's' : ''}</p>
<div class="flex flex-wrap gap-1">${draftBadges}${extraDrafts}</div>
</div>
`;
grid.appendChild(card);
});
}
</script>
{% endblock %}