Platform upgrade: semantic search, citations, readiness, tests, Docker

Major features added by 5 parallel agent teams:
- Semantic "Ask" (NL queries via FTS5 + embeddings + Claude synthesis)
- Global search across drafts, ideas, authors, gaps
- REST API expansion (14 endpoints, up from 3) with CSV/JSON export
- Citation graph visualization (D3.js, 440 nodes, 2422 edges)
- Standards readiness scoring (0-100 composite from 6 factors)
- Side-by-side draft comparison view with shared/unique analysis
- Annotation system (notes + tags per draft, DB-persisted)
- Docker deployment (Dockerfile + docker-compose with Ollama)
- Scheduled updates (cron script with log rotation)
- Pipeline health dashboard (stage progress bars, cost tracking)
- Test suite foundation (54 pytest tests covering DB, models, web data)

Fixes: compare_drafts() stubbed→working, get_authors_for_draft() bug,
source-aware analysis prompts, config env var overrides + validation,
resilient batch error handling with --retry-failed, observatory --dry-run

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 20:52:56 +01:00
parent da2a989744
commit 757b781c67
33 changed files with 4253 additions and 170 deletions

View File

@@ -6,7 +6,7 @@
{% block content %}
<div class="mb-6">
<h1 class="text-2xl font-bold text-white">Idea Clusters</h1>
<p class="text-slate-400 text-sm mt-1">Extracted ideas grouped by semantic similarity using embedding-based clustering</p>
<p class="text-slate-400 text-sm mt-1">Extracted ideas grouped by semantic similarity — enriched with WG and category data</p>
</div>
<div id="emptyState" class="hidden">
@@ -21,19 +21,30 @@
<div id="clusterContent" class="hidden">
<!-- Stat cards -->
<div class="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
<div class="grid grid-cols-1 md:grid-cols-4 gap-4 mb-6">
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Total Ideas Embedded</p>
<p class="text-xs text-slate-500 uppercase tracking-wide">Total Ideas</p>
<p class="text-2xl font-bold text-white mt-1" id="statTotal">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Clusters Found</p>
<p class="text-xs text-slate-500 uppercase tracking-wide">Clusters</p>
<p class="text-2xl font-bold text-white mt-1" id="statClusters">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Avg Cluster Size</p>
<p class="text-xs text-slate-500 uppercase tracking-wide">Avg Size</p>
<p class="text-2xl font-bold text-white mt-1" id="statAvgSize">0</p>
</div>
<div class="stat-card rounded-xl border border-slate-800 p-5">
<p class="text-xs text-slate-500 uppercase tracking-wide">Cross-WG Clusters</p>
<p class="text-2xl font-bold text-amber-400 mt-1" id="statCrossWg">0</p>
</div>
</div>
<!-- Filter bar -->
<div class="flex flex-wrap gap-3 mb-6">
<button id="filterAll" onclick="filterClusters('all')" class="px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white">All</button>
<button id="filterCrossWg" onclick="filterClusters('cross_wg')" class="px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white">Cross-WG only</button>
<button id="filterLarge" onclick="filterClusters('large')" class="px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white">Large (10+)</button>
</div>
<!-- t-SNE Scatter -->
@@ -46,7 +57,7 @@
<!-- Treemap -->
<div class="bg-slate-900 rounded-xl border border-slate-800 p-5 mb-6">
<h2 class="text-sm font-semibold text-slate-300 mb-1">Cluster Sizes</h2>
<p class="text-xs text-slate-500 mb-3">Treemap showing relative sizes of each idea cluster.</p>
<p class="text-xs text-slate-500 mb-3">Treemap showing relative sizes of each idea cluster. Amber borders = cross-WG clusters.</p>
<div id="treemapPlot" style="height: 450px;"></div>
</div>
@@ -72,6 +83,9 @@ const PALETTE = [
'#3b82f6', '#ef4444', '#22c55e', '#a855f7', '#f59e0b',
'#06b6d4', '#ec4899', '#84cc16', '#f97316', '#8b5cf6',
'#14b8a6', '#e11d48', '#64748b', '#eab308', '#6366f1',
'#fb923c', '#2dd4bf', '#c084fc', '#facc15', '#4ade80',
'#f472b6', '#38bdf8', '#a3e635', '#fb7185', '#818cf8',
'#34d399', '#fbbf24', '#e879f9', '#22d3ee', '#a78bfa',
];
const data = {{ clusters | tojson }};
@@ -81,46 +95,42 @@ if (data.empty) {
} else {
document.getElementById('clusterContent').classList.remove('hidden');
// Stats
const stats = data.stats;
const crossWgCount = data.clusters.filter(c => c.cross_wg).length;
document.getElementById('statTotal').textContent = stats.total.toLocaleString();
document.getElementById('statClusters').textContent = stats.num_clusters.toLocaleString();
document.getElementById('statAvgSize').textContent = stats.num_clusters > 0
? (stats.clustered / stats.num_clusters).toFixed(1) : '0';
document.getElementById('statCrossWg').textContent = crossWgCount;
// --- t-SNE Scatter ---
if (data.scatter.length > 0) {
// Group by cluster_id
const groups = {};
data.scatter.forEach(pt => {
if (!groups[pt.cluster_id]) groups[pt.cluster_id] = { x: [], y: [], text: [], names: [] };
if (!groups[pt.cluster_id]) groups[pt.cluster_id] = { x: [], y: [], text: [], names: [], wgs: [] };
groups[pt.cluster_id].x.push(pt.x);
groups[pt.cluster_id].y.push(pt.y);
groups[pt.cluster_id].text.push(pt.title);
groups[pt.cluster_id].names.push(pt.draft_name);
});
// Map cluster_id to cluster theme
const clusterThemes = {};
data.clusters.forEach((c, i) => {
// Find the original cluster_id by matching scatter points
groups[pt.cluster_id].wgs.push(pt.wg || 'none');
});
const clusterIds = Object.keys(groups).sort((a, b) => (groups[b].x.length - groups[a].x.length));
const traces = clusterIds.map((cid, i) => {
const g = groups[cid];
const theme = data.clusters[i] ? data.clusters[i].theme : `Cluster ${cid}`;
const hoverTexts = g.text.map((t, j) => `${t}<br><span style="color:#64748b">${g.wgs[j]}</span>`);
return {
x: g.x, y: g.y, text: g.text, name: theme,
x: g.x, y: g.y, text: hoverTexts, name: theme,
customdata: g.names,
mode: 'markers', type: 'scatter',
marker: {
size: 6,
size: 7,
color: PALETTE[i % PALETTE.length],
opacity: 0.8,
line: { width: 0.5, color: 'rgba(255,255,255,0.15)' },
},
hovertemplate: '<b>%{text}</b><extra>%{customdata}</extra>',
hovertemplate: '%{text}<extra>%{customdata}</extra>',
};
});
@@ -135,26 +145,33 @@ if (data.empty) {
document.getElementById('scatterPlot').on('plotly_click', function(ev) {
const pt = ev.points[0];
if (pt.customdata) {
window.location.href = '/drafts/' + pt.customdata;
}
if (pt.customdata) window.location.href = '/drafts/' + pt.customdata;
});
}
// --- Treemap ---
if (data.clusters.length > 0) {
const labels = data.clusters.map(c => c.theme);
const labels = data.clusters.map(c => c.cross_wg ? `${c.theme}` : c.theme);
const values = data.clusters.map(c => c.size);
const colors = data.clusters.map((_, i) => PALETTE[i % PALETTE.length]);
const colors = data.clusters.map((c, i) => c.cross_wg
? PALETTE[i % PALETTE.length] : PALETTE[i % PALETTE.length]);
const hoverTexts = data.clusters.map(c => {
const wgs = (c.wgs || []).filter(w => w.wg !== 'none').map(w => `${w.wg}(${w.count})`).join(', ');
const cats = (c.categories || []).map(cat => cat.cat).join(', ');
return `<b>${c.theme}</b><br>${c.size} ideas, ${c.drafts.length} drafts` +
(wgs ? `<br>WGs: ${wgs}` : '') +
(cats ? `<br>Categories: ${cats}` : '');
});
Plotly.newPlot('treemapPlot', [{
type: 'treemap',
labels: labels,
parents: labels.map(() => ''),
values: values,
text: hoverTexts,
textinfo: 'label+value',
marker: { colors: colors },
hovertemplate: '<b>%{label}</b><br>%{value} ideas<extra></extra>',
hovertemplate: '%{text}<extra></extra>',
}], {
...PLOTLY_LAYOUT,
margin: { t: 10, r: 10, b: 10, l: 10 },
@@ -163,38 +180,90 @@ if (data.empty) {
// --- Cluster Cards ---
const grid = document.getElementById('clusterGrid');
data.clusters.forEach((cluster, i) => {
const color = PALETTE[i % PALETTE.length];
const topIdeas = cluster.ideas.slice(0, 3);
const ideaListHtml = topIdeas.map(idea =>
`<li class="text-xs text-slate-400 truncate" title="${idea.title}">${idea.title}</li>`
).join('');
const extraCount = cluster.size - topIdeas.length;
const extraHtml = extraCount > 0
? `<li class="text-xs text-slate-600">+${extraCount} more</li>` : '';
const draftBadges = cluster.drafts.slice(0, 4).map(d =>
`<a href="/drafts/${d}" class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded hover:text-blue-400 truncate max-w-[140px]" title="${d}">${d.replace('draft-', '').substring(0, 20)}</a>`
).join(' ');
const extraDrafts = cluster.drafts.length > 4
? `<span class="text-xs text-slate-600">+${cluster.drafts.length - 4}</span>` : '';
function renderCards(filter) {
grid.innerHTML = '';
data.clusters.forEach((cluster, i) => {
if (filter === 'cross_wg' && !cluster.cross_wg) return;
if (filter === 'large' && cluster.size < 10) return;
const card = document.createElement('div');
card.className = 'bg-slate-900 rounded-xl border border-slate-800 p-5';
card.innerHTML = `
<div class="flex items-center gap-2 mb-3">
<div class="w-3 h-3 rounded-full" style="background: ${color}"></div>
<h3 class="text-sm font-semibold text-white">${cluster.theme}</h3>
<span class="ml-auto text-xs text-slate-500">${cluster.size} ideas</span>
</div>
<ul class="space-y-1 mb-3">${ideaListHtml}${extraHtml}</ul>
<div class="border-t border-slate-800 pt-3">
<p class="text-xs text-slate-500 mb-1">${cluster.drafts.length} source draft${cluster.drafts.length !== 1 ? 's' : ''}</p>
<div class="flex flex-wrap gap-1">${draftBadges}${extraDrafts}</div>
</div>
`;
grid.appendChild(card);
});
const color = PALETTE[i % PALETTE.length];
const topIdeas = cluster.ideas.slice(0, 5);
const ideaListHtml = topIdeas.map(idea =>
`<li class="text-xs text-slate-400 truncate" title="${idea.description || idea.title}">
<span class="text-slate-300">${idea.title}</span>
</li>`
).join('');
const extraCount = cluster.size - topIdeas.length;
const extraHtml = extraCount > 0
? `<li class="text-xs text-slate-600">+${extraCount} more</li>` : '';
// WG badges
const wgBadges = (cluster.wgs || []).filter(w => w.wg !== 'none').map(w =>
`<span class="inline-block bg-amber-900/30 text-amber-400 text-xs px-2 py-0.5 rounded border border-amber-800/30">${w.wg} (${w.count})</span>`
).join(' ');
const noneCount = (cluster.wgs || []).find(w => w.wg === 'none');
const noneHtml = noneCount
? `<span class="text-xs text-slate-600">${noneCount.count} individual</span>` : '';
// Category badges
const catBadges = (cluster.categories || []).map(c =>
`<span class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded">${c.cat}</span>`
).join(' ');
// Draft badges
const draftBadges = cluster.drafts.slice(0, 4).map(d =>
`<a href="/drafts/${d}" class="inline-block bg-slate-800 text-slate-400 text-xs px-2 py-0.5 rounded hover:text-blue-400 truncate max-w-[160px]" title="${d}">${d.replace('draft-', '').substring(0, 22)}</a>`
).join(' ');
const extraDrafts = cluster.drafts.length > 4
? `<span class="text-xs text-slate-600">+${cluster.drafts.length - 4}</span>` : '';
const crossBadge = cluster.cross_wg
? `<span class="text-xs bg-amber-900/30 text-amber-400 px-1.5 py-0.5 rounded">cross-WG</span>` : '';
const card = document.createElement('div');
card.className = 'bg-slate-900 rounded-xl border p-5 ' +
(cluster.cross_wg ? 'border-amber-800/40' : 'border-slate-800');
card.innerHTML = `
<div class="flex items-center gap-2 mb-3">
<div class="w-3 h-3 rounded-full flex-shrink-0" style="background: ${color}"></div>
<h3 class="text-sm font-semibold text-white truncate">${cluster.theme}</h3>
${crossBadge}
<span class="ml-auto text-xs text-slate-500 flex-shrink-0">${cluster.size} ideas</span>
</div>
<ul class="space-y-1 mb-3">${ideaListHtml}${extraHtml}</ul>
${(wgBadges || noneHtml) ? `<div class="mb-2"><p class="text-xs text-slate-500 mb-1">Working Groups</p><div class="flex flex-wrap gap-1">${wgBadges} ${noneHtml}</div></div>` : ''}
${catBadges ? `<div class="mb-2"><p class="text-xs text-slate-500 mb-1">Categories</p><div class="flex flex-wrap gap-1">${catBadges}</div></div>` : ''}
<div class="border-t border-slate-800 pt-3">
<p class="text-xs text-slate-500 mb-1">${cluster.drafts.length} source draft${cluster.drafts.length !== 1 ? 's' : ''}</p>
<div class="flex flex-wrap gap-1">${draftBadges}${extraDrafts}</div>
</div>
`;
grid.appendChild(card);
});
}
renderCards('all');
// Filter buttons
window.filterClusters = function(filter) {
document.querySelectorAll('[id^="filter"]').forEach(b => {
b.className = b.id === 'filter' + filter.charAt(0).toUpperCase() + filter.slice(1).replace('_w', 'W').replace('_', '')
? 'px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white'
: 'px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white';
});
// Simpler: just match by id
['filterAll', 'filterCrossWg', 'filterLarge'].forEach(id => {
const btn = document.getElementById(id);
const isActive = (filter === 'all' && id === 'filterAll') ||
(filter === 'cross_wg' && id === 'filterCrossWg') ||
(filter === 'large' && id === 'filterLarge');
btn.className = isActive
? 'px-3 py-1.5 text-xs rounded-lg bg-blue-600 text-white'
: 'px-3 py-1.5 text-xs rounded-lg bg-slate-800 text-slate-400 hover:text-white';
});
renderCards(filter);
};
}
</script>
{% endblock %}