fix: deduplicate draft revisions in search results

Different revisions of the same draft (e.g. draft-foo-bar-00, -01, -02)
were showing up as separate results. Now keeps only the highest-scoring
revision per base draft name.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 04:07:38 +01:00
parent cdb2e505c6
commit 2229e70c73

View File

@@ -93,21 +93,40 @@ class HybridSearch:
self._embeddings_cache_time = now
return self._embeddings_cache
@staticmethod
def _base_draft_name(name: str) -> str:
"""Strip revision suffix to get the base draft name for dedup.
e.g. 'draft-wang-cats-odsi-02''draft-wang-cats-odsi'
"""
return re.sub(r'-\d{2,3}$', '', name)
def search(self, query: str, top_k: int = 10) -> list[dict]:
"""Combine FTS5 keyword search + embedding similarity search.
Returns ranked list of {name, title, score, excerpt, match_type}.
Falls back to FTS5-only if Ollama is unavailable.
Deduplicates draft revisions, keeping the highest-scoring version.
"""
fts_results = self._fts_search(query, limit=top_k * 2)
embed_results = self._embedding_search(query, limit=top_k * 2)
fts_results = self._fts_search(query, limit=top_k * 3)
embed_results = self._embedding_search(query, limit=top_k * 3)
if embed_results:
merged = self._reciprocal_rank_fusion(fts_results, embed_results)
else:
merged = fts_results
return merged[:top_k]
# Deduplicate revisions of the same draft, keep best score
seen_bases: dict[str, int] = {}
deduped = []
for r in merged:
base = self._base_draft_name(r["name"])
if base not in seen_bases:
seen_bases[base] = len(deduped)
deduped.append(r)
# else: skip lower-ranked revision
return deduped[:top_k]
@staticmethod
def sanitize_fts_query(query: str) -> str: