Files
ietf-draft-analyzer/src/webui/templates/idea_clusters.html
Christian Nennemann f8ed2b83e9 fix: security hardening — self-hosted JS, XSS protection, SSRF blocking
- Replace all CDN script tags (marked, plotly) with self-hosted static files
- Add DOMPurify for sanitizing markdown-rendered HTML
- Add escapeHtml() helper to base.html for all innerHTML operations
- Sanitize dynamic data in innerHTML across 13 templates
- Add security headers (X-Content-Type-Options, X-Frame-Options, Referrer-Policy)
- Add SSRF protection to proposal intake URL fetcher (block private/loopback IPs)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 04:47:32 +01:00

459 lines
24 KiB
HTML

{% extends "base.html" %}
{% set active_page = "idea_clusters" %}
{% block title %}Idea Clusters — IETF Draft Analyzer{% endblock %}
{% block extra_head %}<script src="/static/js/plotly.min.js"></script>{% endblock %}
{% block content %}
<div class="mb-6">
<h1 class="text-2xl font-bold text-white">Idea Clusters</h1>
<p class="text-slate-400 text-sm mt-1">Extracted ideas grouped by semantic similarity — enriched with WG and category data. Ideas are embedded using Ollama (nomic-embed-text), then clustered via DBSCAN so that semantically related ideas from different drafts are grouped together. "Cross-WG" clusters indicate ideas that span multiple IETF working groups — potential coordination points.</p>
</div>
<div id="emptyState" class="hidden">
<div class="bg-slate-900 rounded-xl border border-slate-800 p-12 text-center">
<svg class="w-16 h-16 mx-auto text-slate-600 mb-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="1.5" d="M4 6a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2H6a2 2 0 01-2-2V6zm10 0a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2h-2a2 2 0 01-2-2V6zM4 16a2 2 0 012-2h2a2 2 0 012 2v2a2 2 0 01-2 2H6a2 2 0 01-2-2v-2z"/>
</svg>
<h2 class="text-lg font-semibold text-slate-300 mb-2">No idea embeddings found</h2>
<p class="text-slate-500">Run <code class="bg-slate-800 px-2 py-1 rounded text-sm font-mono text-blue-400">ietf embed-ideas</code> to generate embeddings first.</p>
</div>
</div>
<div id="clusterContent" class="hidden">
<!-- Stat cards -->
<div class="grid grid-cols-1 md:grid-cols-4 gap-4 mb-6">
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Total Ideas</p>
<p class="text-2xl font-bold text-white mt-1" id="statTotal">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Clusters</p>
<p class="text-2xl font-bold text-white mt-1" id="statClusters">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Avg Size</p>
<p class="text-2xl font-bold text-white mt-1" id="statAvgSize">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Cross-WG Clusters</p>
<p class="text-2xl font-bold text-amber-400 mt-1" id="statCrossWg">0</p>
</div>
</div>
<!-- Filter bar -->
<div class="flex flex-wrap gap-3 mb-6">
<button id="filterAll" onclick="filterClusters('all')" class="px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white">All</button>
<button id="filterCrossWg" onclick="filterClusters('cross_wg')" class="px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white">Cross-WG only</button>
<button id="filterLarge" onclick="filterClusters('large')" class="px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white">Large (10+)</button>
</div>
<!-- t-SNE Scatter -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Idea Embedding Space (t-SNE)</h2>
<p class="text-xs text-slate-500 mb-3">Each dot is an extracted idea, colored by cluster. Hover for details, click to view the source draft.</p>
<div id="scatterPlot" style="height: 560px;"></div>
</div>
<!-- Treemap -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Cluster Sizes</h2>
<p class="text-xs text-slate-500 mb-3">Treemap showing relative sizes of each idea cluster. Amber borders = cross-WG clusters.</p>
<div id="treemapPlot" style="height: 450px;"></div>
</div>
<!-- Cluster relationship network -->
<div id="networkSection" class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6 hidden">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Cluster Relationships</h2>
<p class="text-xs text-slate-500 mb-3">Network showing how idea clusters relate to each other. Thicker lines = stronger semantic similarity. Click a link to see the connecting ideas.</p>
<div id="networkPlot" style="height: 560px;"></div>
<div id="linkDetail" class="hidden mt-4 bg-slate-800/50 rounded-lg p-4 border border-slate-700/50">
<div class="flex items-center justify-between mb-2">
<h3 class="text-sm font-semibold text-white" id="linkTitle"></h3>
<button onclick="document.getElementById('linkDetail').classList.add('hidden')" class="text-slate-500 hover:text-white text-xs"></button>
</div>
<div id="linkContent" class="text-xs text-slate-400 space-y-2"></div>
</div>
</div>
<!-- Cluster cards grid -->
<h2 class="text-lg font-semibold text-white mb-4">Cluster Details</h2>
<div id="clusterGrid" class="grid grid-cols-1 md:grid-cols-2 xl:grid-cols-3 gap-4 mb-6">
</div>
</div>
{% endblock %}
{% block extra_scripts %}
<script>
const PLOTLY_LAYOUT = {
paper_bgcolor: 'transparent', plot_bgcolor: 'rgba(15,23,42,0.5)',
font: { color: '#94a3b8', family: 'Inter, system-ui, sans-serif', size: 12 },
margin: { t: 20, r: 20, b: 50, l: 50 },
xaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
yaxis: { gridcolor: '#1e293b', zerolinecolor: '#334155' },
};
const CFG = { responsive: true, displayModeBar: false };
const PALETTE = [
'#3b82f6', '#ef4444', '#22c55e', '#a855f7', '#f59e0b',
'#06b6d4', '#ec4899', '#84cc16', '#f97316', '#8b5cf6',
'#14b8a6', '#e11d48', '#64748b', '#eab308', '#6366f1',
'#fb923c', '#2dd4bf', '#c084fc', '#facc15', '#4ade80',
'#f472b6', '#38bdf8', '#a3e635', '#fb7185', '#818cf8',
'#34d399', '#fbbf24', '#e879f9', '#22d3ee', '#a78bfa',
];
const data = {{ clusters | tojson }};
if (data.empty) {
document.getElementById('emptyState').classList.remove('hidden');
} else {
document.getElementById('clusterContent').classList.remove('hidden');
const stats = data.stats;
const crossWgCount = data.clusters.filter(c => c.cross_wg).length;
document.getElementById('statTotal').textContent = stats.total.toLocaleString();
document.getElementById('statClusters').textContent = stats.num_clusters.toLocaleString();
document.getElementById('statAvgSize').textContent = stats.num_clusters > 0
? (stats.clustered / stats.num_clusters).toFixed(1) : '0';
document.getElementById('statCrossWg').textContent = crossWgCount;
// --- t-SNE Scatter ---
if (data.scatter.length > 0) {
const groups = {};
data.scatter.forEach(pt => {
if (!groups[pt.cluster_id]) groups[pt.cluster_id] = { x: [], y: [], text: [], names: [], wgs: [] };
groups[pt.cluster_id].x.push(pt.x);
groups[pt.cluster_id].y.push(pt.y);
groups[pt.cluster_id].text.push(pt.title);
groups[pt.cluster_id].names.push(pt.draft_name);
groups[pt.cluster_id].wgs.push(pt.wg || 'none');
});
const clusterIds = Object.keys(groups).sort((a, b) => (groups[b].x.length - groups[a].x.length));
const traces = clusterIds.map((cid, i) => {
const g = groups[cid];
const theme = data.clusters[i] ? data.clusters[i].theme : `Cluster ${cid}`;
const hoverTexts = g.text.map((t, j) => `${t}<br><span style="color:#64748b">${g.wgs[j]}</span>`);
return {
x: g.x, y: g.y, text: hoverTexts, name: theme,
customdata: g.names,
mode: 'markers', type: 'scatter',
marker: {
size: 7,
color: PALETTE[i % PALETTE.length],
opacity: 0.8,
line: { width: 0.5, color: 'rgba(255,255,255,0.15)' },
},
hovertemplate: '%{text}<extra>%{customdata}</extra>',
};
});
Plotly.newPlot('scatterPlot', traces, {
...PLOTLY_LAYOUT,
xaxis: { visible: false, showgrid: false, zeroline: false },
yaxis: { visible: false, showgrid: false, zeroline: false },
legend: { font: { size: 10, color: '#94a3b8' }, bgcolor: 'transparent' },
hovermode: 'closest',
margin: { t: 10, r: 20, b: 10, l: 20 },
}, CFG);
document.getElementById('scatterPlot').on('plotly_click', function(ev) {
const pt = ev.points[0];
if (pt.customdata) window.location.href = '/drafts/' + pt.customdata;
});
}
// --- Treemap ---
if (data.clusters.length > 0) {
const labels = data.clusters.map(c => c.cross_wg ? `${c.theme}` : c.theme);
const values = data.clusters.map(c => c.size);
const colors = data.clusters.map((c, i) => c.cross_wg
? PALETTE[i % PALETTE.length] : PALETTE[i % PALETTE.length]);
const hoverTexts = data.clusters.map(c => {
const wgs = (c.wgs || []).filter(w => w.wg !== 'none').map(w => `${w.wg}(${w.count})`).join(', ');
const cats = (c.categories || []).map(cat => cat.cat).join(', ');
return `<b>${c.theme}</b><br>${c.size} ideas, ${c.drafts.length} drafts` +
(wgs ? `<br>WGs: ${wgs}` : '') +
(cats ? `<br>Categories: ${cats}` : '');
});
Plotly.newPlot('treemapPlot', [{
type: 'treemap',
labels: labels,
parents: labels.map(() => ''),
values: values,
text: hoverTexts,
textinfo: 'label+value',
marker: { colors: colors },
hovertemplate: '%{text}<extra></extra>',
}], {
...PLOTLY_LAYOUT,
margin: { t: 10, r: 10, b: 10, l: 10 },
}, CFG);
}
// --- Cluster Relationship Network ---
const links = data.links || [];
if (links.length > 0) {
document.getElementById('networkSection').classList.remove('hidden');
// Build node set from clusters that have links
const linkedIds = new Set();
links.forEach(l => { linkedIds.add(l.source); linkedIds.add(l.target); });
const nodes = data.clusters.filter(c => linkedIds.has(c.id));
const nodeMap = {};
nodes.forEach((n, i) => { nodeMap[n.id] = i; });
// Force-directed layout using Plotly scatter + annotations for edges
// Position nodes in a circle, then use link structure
const n = nodes.length;
const nodeX = nodes.map((_, i) => Math.cos(2 * Math.PI * i / n) * 4);
const nodeY = nodes.map((_, i) => Math.sin(2 * Math.PI * i / n) * 4);
// Simple force-directed: pull linked nodes closer
for (let iter = 0; iter < 80; iter++) {
for (const link of links) {
const si = nodeMap[link.source];
const ti = nodeMap[link.target];
if (si === undefined || ti === undefined) continue;
const dx = nodeX[ti] - nodeX[si];
const dy = nodeY[ti] - nodeY[si];
const dist = Math.sqrt(dx*dx + dy*dy) || 1;
const force = (link.best_pair_sim - 0.5) * 0.15;
nodeX[si] += dx/dist * force;
nodeY[si] += dy/dist * force;
nodeX[ti] -= dx/dist * force;
nodeY[ti] -= dy/dist * force;
}
// Repulsion between all nodes
for (let i = 0; i < n; i++) {
for (let j = i+1; j < n; j++) {
const dx = nodeX[j] - nodeX[i];
const dy = nodeY[j] - nodeY[i];
const dist = Math.sqrt(dx*dx + dy*dy) || 0.1;
if (dist < 1.5) {
const repel = 0.3 / (dist * dist);
nodeX[i] -= dx/dist * repel;
nodeY[i] -= dy/dist * repel;
nodeX[j] += dx/dist * repel;
nodeY[j] += dy/dist * repel;
}
}
}
}
// Edge traces (one per link for click handling)
const edgeTraces = links.map((link, li) => {
const si = nodeMap[link.source];
const ti = nodeMap[link.target];
if (si === undefined || ti === undefined) return null;
const width = 1 + (link.best_pair_sim - 0.5) * 8;
const opacity = 0.3 + (link.best_pair_sim - 0.5) * 1.2;
return {
x: [nodeX[si], nodeX[ti], null],
y: [nodeY[si], nodeY[ti], null],
mode: 'lines',
line: { width: width, color: `rgba(100,116,139,${opacity})` },
hoverinfo: 'text',
text: `${link.source_theme}${link.target_theme}<br>Similarity: ${(link.best_pair_sim * 100).toFixed(0)}%`,
customdata: [li, li, null],
showlegend: false,
};
}).filter(Boolean);
// Node trace
const nodeTrace = {
x: nodeX, y: nodeY,
mode: 'markers+text',
type: 'scatter',
marker: {
size: nodes.map(n => 12 + Math.sqrt(n.size) * 3),
color: nodes.map((_, i) => PALETTE[nodes[i].id % PALETTE.length]),
line: { width: 2, color: 'rgba(15,23,42,0.8)' },
},
text: nodes.map(n => n.theme.length > 25 ? n.theme.substring(0, 22) + '...' : n.theme),
textposition: 'top center',
textfont: { size: 10, color: '#cbd5e1' },
hovertext: nodes.map(n =>
`<b>${n.theme}</b><br>${n.size} ideas, ${n.drafts.length} drafts`
),
hoverinfo: 'text',
showlegend: false,
};
Plotly.newPlot('networkPlot', [...edgeTraces, nodeTrace], {
...PLOTLY_LAYOUT,
xaxis: { visible: false, showgrid: false, zeroline: false },
yaxis: { visible: false, showgrid: false, zeroline: false },
hovermode: 'closest',
margin: { t: 10, r: 20, b: 10, l: 20 },
}, CFG);
// Click handler for edges — show link detail
document.getElementById('networkPlot').on('plotly_click', function(ev) {
const pt = ev.points[0];
if (pt.data.customdata && pt.data.customdata[pt.pointNumber] !== null) {
const link = links[pt.data.customdata[pt.pointNumber]];
if (!link) return;
const detail = document.getElementById('linkDetail');
const simPct = (link.best_pair_sim * 100).toFixed(0);
document.getElementById('linkTitle').innerHTML =
`<span style="color:${PALETTE[link.source % PALETTE.length]}">${escapeHtml(link.source_theme)}</span>` +
` <span class="text-slate-500">↔</span> ` +
`<span style="color:${PALETTE[link.target % PALETTE.length]}">${escapeHtml(link.target_theme)}</span>` +
` <span class="text-slate-500 text-xs font-normal ml-2">${simPct}% similar</span>`;
document.getElementById('linkContent').innerHTML = `
<div class="grid grid-cols-2 gap-4">
<div class="bg-slate-900/50 rounded p-3 border border-slate-700/30">
<div class="text-slate-300 font-medium mb-1">${escapeHtml(link.idea_a)}</div>
<a href="/drafts/${encodeURIComponent(link.idea_a_draft)}" class="text-blue-400/70 hover:text-blue-300 text-[10px] font-mono">${escapeHtml(link.idea_a_draft)}</a>
</div>
<div class="bg-slate-900/50 rounded p-3 border border-slate-700/30">
<div class="text-slate-300 font-medium mb-1">${escapeHtml(link.idea_b)}</div>
<a href="/drafts/${encodeURIComponent(link.idea_b_draft)}" class="text-blue-400/70 hover:text-blue-300 text-[10px] font-mono">${escapeHtml(link.idea_b_draft)}</a>
</div>
</div>
<p class="text-slate-500 text-[10px] mt-1">These two ideas from different clusters have the strongest cross-cluster similarity.</p>
`;
detail.classList.remove('hidden');
}
});
}
// --- Cluster Cards ---
const grid = document.getElementById('clusterGrid');
function renderCards(filter) {
grid.innerHTML = '';
data.clusters.forEach((cluster, i) => {
if (filter === 'cross_wg' && !cluster.cross_wg) return;
if (filter === 'large' && cluster.size < 10) return;
const color = PALETTE[i % PALETTE.length];
const cardId = `cluster-${i}`;
const topIdeas = cluster.ideas.slice(0, 3);
// Deduplicate ideas by title, track which drafts have each
const ideaByTitle = {};
cluster.ideas.forEach(idea => {
if (!ideaByTitle[idea.title]) {
ideaByTitle[idea.title] = { ...idea, drafts: [] };
}
ideaByTitle[idea.title].drafts.push(idea.draft_name);
});
const uniqueIdeas = Object.values(ideaByTitle);
// Preview: first 3 unique ideas
const previewHtml = uniqueIdeas.slice(0, 3).map(idea => {
const draftTag = idea.drafts.length > 1
? `<span class="text-slate-600">(${idea.drafts.length} drafts)</span>`
: `<span class="text-slate-600">${idea.drafts[0].replace('draft-', '').substring(0, 20)}</span>`;
return `<li class="text-xs text-slate-400 truncate" title="${escapeHtml(idea.description || idea.title)}">
<span class="text-slate-300">${escapeHtml(idea.title)}</span> ${draftTag}
</li>`;
}).join('');
const previewExtra = uniqueIdeas.length > 3
? `<li class="text-xs text-slate-600">+${uniqueIdeas.length - 3} more unique ideas</li>` : '';
// Full idea list (shown on expand)
const fullIdeasHtml = uniqueIdeas.map(idea => {
const draftLinks = idea.drafts.map(d =>
`<a href="/drafts/${d}" class="text-blue-400/70 hover:text-blue-300 transition">${d.replace('draft-', '').substring(0, 28)}</a>`
).join(', ');
return `<div class="py-2 border-b border-slate-800/50 last:border-0">
<div class="text-xs text-slate-200 font-medium">${escapeHtml(idea.title)}</div>
${idea.description ? `<div class="text-xs text-slate-500 mt-0.5 leading-relaxed">${escapeHtml(idea.description.substring(0, 200))}</div>` : ''}
<div class="text-[10px] text-slate-600 mt-1 font-mono">${draftLinks}</div>
</div>`;
}).join('');
// WG badges
const wgBadges = (cluster.wgs || []).filter(w => w.wg !== 'none').map(w =>
`<span class="inline-block bg-amber-900/30 text-amber-400 text-xs px-2 py-0.5 rounded border border-amber-800/30">${w.wg} (${w.count})</span>`
).join(' ');
const noneCount = (cluster.wgs || []).find(w => w.wg === 'none');
const noneHtml = noneCount
? `<span class="text-xs text-slate-600">${noneCount.count} individual</span>` : '';
// Category badges
const catBadges = (cluster.categories || []).map(c =>
`<span class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded">${c.cat}</span>`
).join(' ');
// Draft badges
const draftBadges = cluster.drafts.slice(0, 4).map(d =>
`<a href="/drafts/${d}" class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded hover:text-blue-400 truncate max-w-[160px]" title="${d}">${d.replace('draft-', '').substring(0, 22)}</a>`
).join(' ');
const extraDrafts = cluster.drafts.length > 4
? `<span class="text-xs text-slate-600">+${cluster.drafts.length - 4}</span>` : '';
const crossBadge = cluster.cross_wg
? `<span class="text-xs bg-amber-900/30 text-amber-400 px-1.5 py-0.5 rounded">cross-WG</span>` : '';
const card = document.createElement('div');
card.className = 'bg-slate-900 rounded-xl border p-5 cursor-pointer hover:border-slate-600 transition ' +
(cluster.cross_wg ? 'border-amber-800/40' : 'border-slate-800');
card.onclick = () => {
const detail = document.getElementById(cardId);
const chevron = document.getElementById(`chevron-${i}`);
if (detail.classList.contains('hidden')) {
detail.classList.remove('hidden');
chevron.style.transform = 'rotate(180deg)';
} else {
detail.classList.add('hidden');
chevron.style.transform = '';
}
};
card.innerHTML = `
<div class="flex items-center gap-2 mb-3">
<div class="w-3 h-3 rounded-full flex-shrink-0" style="background: ${color}"></div>
<h3 class="text-sm font-semibold text-white truncate">${escapeHtml(cluster.theme)}</h3>
${crossBadge}
<span class="ml-auto text-xs text-slate-500 flex-shrink-0">${cluster.size} ideas</span>
<svg id="chevron-${i}" class="w-4 h-4 text-slate-500 flex-shrink-0 transition-transform" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 9l-7 7-7-7"/></svg>
</div>
<ul class="space-y-1 mb-3">${previewHtml}${previewExtra}</ul>
${(wgBadges || noneHtml) ? `<div class="mb-2"><p class="text-xs text-slate-500 mb-1">Working Groups</p><div class="flex flex-wrap gap-1">${wgBadges} ${noneHtml}</div></div>` : ''}
${catBadges ? `<div class="mb-2"><p class="text-xs text-slate-500 mb-1">Categories</p><div class="flex flex-wrap gap-1">${catBadges}</div></div>` : ''}
<div class="border-t border-slate-800 pt-3">
<p class="text-xs text-slate-500 mb-1">${cluster.drafts.length} source draft${cluster.drafts.length !== 1 ? 's' : ''}</p>
<div class="flex flex-wrap gap-1">${draftBadges}${extraDrafts}</div>
</div>
<!-- Expanded detail (hidden by default) -->
<div id="${cardId}" class="hidden mt-4 border-t border-slate-700 pt-4">
<h4 class="text-xs font-semibold text-slate-300 mb-2 uppercase tracking-wide">All ${uniqueIdeas.length} unique ideas</h4>
<div class="max-h-80 overflow-y-auto pr-1">${fullIdeasHtml}</div>
</div>
`;
grid.appendChild(card);
});
}
renderCards('all');
// Filter buttons
window.filterClusters = function(filter) {
document.querySelectorAll('[id^="filter"]').forEach(b => {
b.className = b.id === 'filter' + filter.charAt(0).toUpperCase() + filter.slice(1).replace('_w', 'W').replace('_', '')
? 'px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white'
: 'px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white';
});
// Simpler: just match by id
['filterAll', 'filterCrossWg', 'filterLarge'].forEach(id => {
const btn = document.getElementById(id);
const isActive = (filter === 'all' && id === 'filterAll') ||
(filter === 'cross_wg' && id === 'filterCrossWg') ||
(filter === 'large' && id === 'filterLarge');
btn.className = isActive
? 'px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white'
: 'px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white';
});
renderCards(filter);
};
}
</script>
{% endblock %}