Idea quality pipeline, web UI features, academic paper

- Tighten idea extraction prompts (1-4 ideas, no sub-features) reducing
  1,907 ideas to 468 across 434 drafts (78% reduction)
- Add embedding-based dedup (ietf dedup-ideas) for same-draft similarity
- Add novelty scoring (ietf ideas score) and filtering (ietf ideas filter)
  using Claude to rate ideas 1-5, removing 49 generic building blocks
- Final count: 419 high-quality ideas (avg 1.1/draft)
- Web UI: gap explorer with live draft generation and pre-generated demos
- Web UI: D3.js author collaboration network (498 nodes, 1142 edges,
  68 clusters, org filtering, interactive zoom/pan)
- Academic paper: 15-page LaTeX workshop paper analyzing the 434-draft
  AI agent standards landscape
- Save improvement ideas backlog to data/reports/improvement-ideas.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-06 22:17:57 +01:00
parent 3c3d7e649f
commit 6e3a387778
29 changed files with 6575 additions and 240 deletions

View File

@@ -0,0 +1,249 @@
{% extends "base.html" %}
{% set active_page = "similarity" %}
{% block title %}Similarity — IETF Draft Analyzer{% endblock %}
{% block content %}
<div class="mb-6">
<h1 class="text-2xl font-bold text-white">Draft Similarity Graph</h1>
<p class="text-slate-400 text-sm mt-1">Force-directed graph of draft-to-draft semantic similarity based on embeddings</p>
</div>
<!-- Summary stats -->
<div class="grid grid-cols-2 md:grid-cols-3 gap-4 mb-6">
<div class="stat-card rounded-xl border border-slate-800 p-4 relative overflow-hidden">
<div class="absolute top-0 left-0 w-full h-1 bg-gradient-to-r from-blue-500 to-blue-400"></div>
<div class="text-xs text-slate-500 uppercase tracking-wider">Connected Drafts</div>
<div class="text-2xl font-bold text-white mt-1" id="statNodes">0</div>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-4 relative overflow-hidden">
<div class="absolute top-0 left-0 w-full h-1 bg-gradient-to-r from-purple-500 to-purple-400"></div>
<div class="text-xs text-slate-500 uppercase tracking-wider">Similarity Links</div>
<div class="text-2xl font-bold text-white mt-1" id="statEdges">0</div>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-4 relative overflow-hidden">
<div class="absolute top-0 left-0 w-full h-1 bg-gradient-to-r from-emerald-500 to-emerald-400"></div>
<div class="text-xs text-slate-500 uppercase tracking-wider">Avg Similarity</div>
<div class="text-2xl font-bold text-white mt-1" id="statAvgSim">0</div>
</div>
</div>
<!-- Threshold slider -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-4 mb-6">
<div class="flex items-center gap-4 flex-wrap">
<label class="text-sm text-slate-300 font-medium">Similarity Threshold:</label>
<input type="range" id="thresholdSlider" min="0.50" max="0.99" step="0.01" value="0.75"
class="w-48 h-2 bg-slate-700 rounded-lg appearance-none cursor-pointer accent-blue-500">
<span class="text-sm font-mono text-blue-400" id="thresholdLabel">0.75</span>
<span class="text-xs text-slate-500 ml-2">(<span id="visibleEdges">0</span> edges visible)</span>
</div>
</div>
<!-- Force-directed graph -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Similarity Network</h2>
<p class="text-xs text-slate-500 mb-3">Node size = composite score, color = category. Edge opacity = similarity strength. Click a node to view draft detail.</p>
<div id="simGraph" style="height: 640px;"></div>
</div>
{% endblock %}
{% block extra_scripts %}
<script>
const PLOTLY_LAYOUT = {
paper_bgcolor: 'transparent',
plot_bgcolor: 'transparent',
font: { color: '#94a3b8', family: 'Inter, system-ui, sans-serif', size: 12 },
margin: { t: 20, r: 20, b: 40, l: 50 },
xaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
yaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
};
const CFG = { responsive: true, displayModeBar: false };
const PALETTE = [
'#3b82f6', '#ef4444', '#22c55e', '#a855f7', '#f59e0b',
'#06b6d4', '#ec4899', '#84cc16', '#f97316', '#8b5cf6',
'#14b8a6', '#e11d48', '#64748b', '#eab308', '#6366f1',
];
const fullNetwork = {{ network | tojson }};
// Assign color per category
const catSet = [...new Set(fullNetwork.nodes.map(n => n.category))];
const catColor = {};
catSet.forEach((c, i) => { catColor[c] = PALETTE[i % PALETTE.length]; });
// Update stat cards
document.getElementById('statNodes').textContent = fullNetwork.stats.node_count;
document.getElementById('statEdges').textContent = fullNetwork.stats.edge_count;
document.getElementById('statAvgSim').textContent = fullNetwork.stats.avg_similarity.toFixed(3);
function renderGraph(threshold) {
const edges = fullNetwork.edges.filter(e => e.similarity >= threshold);
// Only show nodes that are connected at current threshold
const connectedNames = new Set();
edges.forEach(e => { connectedNames.add(e.source); connectedNames.add(e.target); });
const nodes = fullNetwork.nodes.filter(n => connectedNames.has(n.name));
document.getElementById('visibleEdges').textContent = edges.length;
if (nodes.length === 0) {
document.getElementById('simGraph').innerHTML = '<p class="text-slate-500 text-sm text-center mt-20">No connections at this threshold. Try lowering it.</p>';
return;
}
// Build index
const N = nodes.length;
const nodeIndex = {};
const pos = [];
nodes.forEach((n, i) => {
nodeIndex[n.name] = i;
pos.push({
x: Math.cos(i * 2 * Math.PI / N) * 3 + (Math.random() - 0.5),
y: Math.sin(i * 2 * Math.PI / N) * 3 + (Math.random() - 0.5)
});
});
// Force-directed spring layout
const k = Math.sqrt(80.0 / Math.max(N, 1));
for (let iter = 0; iter < 150; iter++) {
const disp = pos.map(() => ({ x: 0, y: 0 }));
const temp = 3.0 * (1 - iter / 150);
// Repulsion between all pairs
for (let i = 0; i < N; i++) {
for (let j = i + 1; j < N; j++) {
let dx = pos[i].x - pos[j].x;
let dy = pos[i].y - pos[j].y;
let dist = Math.sqrt(dx * dx + dy * dy) || 0.01;
let force = k * k / dist;
disp[i].x += (dx / dist) * force;
disp[i].y += (dy / dist) * force;
disp[j].x -= (dx / dist) * force;
disp[j].y -= (dy / dist) * force;
}
}
// Attraction along edges
for (const e of edges) {
const si = nodeIndex[e.source];
const ti = nodeIndex[e.target];
if (si === undefined || ti === undefined) continue;
let dx = pos[si].x - pos[ti].x;
let dy = pos[si].y - pos[ti].y;
let dist = Math.sqrt(dx * dx + dy * dy) || 0.01;
let force = dist * dist / k * e.similarity;
disp[si].x -= (dx / dist) * force;
disp[si].y -= (dy / dist) * force;
disp[ti].x += (dx / dist) * force;
disp[ti].y += (dy / dist) * force;
}
// Apply with temperature
for (let i = 0; i < N; i++) {
let len = Math.sqrt(disp[i].x * disp[i].x + disp[i].y * disp[i].y) || 0.01;
pos[i].x += (disp[i].x / len) * Math.min(len, temp);
pos[i].y += (disp[i].y / len) * Math.min(len, temp);
}
}
// Count connections per node for hover
const connCount = {};
edges.forEach(e => {
connCount[e.source] = (connCount[e.source] || 0) + 1;
connCount[e.target] = (connCount[e.target] || 0) + 1;
});
// Build edge traces — group by opacity bands for performance
const edgeX = [];
const edgeY = [];
for (const e of edges) {
const si = nodeIndex[e.source];
const ti = nodeIndex[e.target];
if (si === undefined || ti === undefined) continue;
edgeX.push(pos[si].x, pos[ti].x, null);
edgeY.push(pos[si].y, pos[ti].y, null);
}
// Compute per-segment opacity based on similarity
// Plotly lines don't support per-segment opacity easily, so we use a base color
const minSim = Math.min(...edges.map(e => e.similarity));
const maxSim = Math.max(...edges.map(e => e.similarity));
const avgOpacity = edges.length > 0 ? 0.15 + 0.35 * ((maxSim + minSim) / 2 - threshold) / Math.max(1 - threshold, 0.01) : 0.2;
const edgeTrace = {
x: edgeX, y: edgeY,
mode: 'lines',
type: 'scatter',
line: { color: `rgba(100, 116, 139, ${Math.min(avgOpacity, 0.4).toFixed(2)})`, width: 0.8 },
hoverinfo: 'skip',
showlegend: false,
};
// Build node trace grouped by category for legend
const catGroups = {};
nodes.forEach((n, i) => {
if (!catGroups[n.category]) catGroups[n.category] = { x: [], y: [], size: [], text: [], names: [] };
catGroups[n.category].x.push(pos[i].x);
catGroups[n.category].y.push(pos[i].y);
catGroups[n.category].size.push(Math.max(n.score * 4, 6));
catGroups[n.category].text.push(
`<b>${n.title}</b><br>Category: ${n.category}<br>Score: ${n.score}<br>Connections: ${connCount[n.name] || 0}`
);
catGroups[n.category].names.push(n.name);
});
const catList = Object.keys(catGroups).sort((a, b) =>
catGroups[b].x.length - catGroups[a].x.length
);
const nodeTraces = catList.map((cat, i) => {
const g = catGroups[cat];
return {
x: g.x, y: g.y,
customdata: g.names,
mode: 'markers',
type: 'scatter',
name: cat,
marker: {
size: g.size,
color: catColor[cat] || '#64748b',
opacity: 0.85,
line: { color: 'rgba(255,255,255,0.15)', width: 1 },
},
hovertext: g.text,
hoverinfo: 'text',
};
});
Plotly.newPlot('simGraph', [edgeTrace, ...nodeTraces], {
...PLOTLY_LAYOUT,
xaxis: { visible: false, showgrid: false, zeroline: false },
yaxis: { visible: false, showgrid: false, zeroline: false },
legend: { font: { size: 10, color: '#94a3b8' }, bgcolor: 'transparent', x: 1.02, y: 0.5 },
margin: { t: 10, r: 140, b: 10, l: 10 },
hovermode: 'closest',
}, CFG);
// Click to navigate to draft detail
document.getElementById('simGraph').on('plotly_click', function(data) {
const pt = data.points[0];
if (pt.customdata) {
window.location.href = '/drafts/' + pt.customdata;
}
});
}
// Initial render
renderGraph(0.75);
// Threshold slider
const slider = document.getElementById('thresholdSlider');
const label = document.getElementById('thresholdLabel');
slider.addEventListener('input', function() {
const val = parseFloat(this.value);
label.textContent = val.toFixed(2);
renderGraph(val);
});
</script>
{% endblock %}