fix: deduplicate draft revisions in search results
Different revisions of the same draft (e.g. draft-foo-bar-00, -01, -02) were showing up as separate results. Now keeps only the highest-scoring revision per base draft name. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -93,21 +93,40 @@ class HybridSearch:
|
|||||||
self._embeddings_cache_time = now
|
self._embeddings_cache_time = now
|
||||||
return self._embeddings_cache
|
return self._embeddings_cache
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _base_draft_name(name: str) -> str:
|
||||||
|
"""Strip revision suffix to get the base draft name for dedup.
|
||||||
|
|
||||||
|
e.g. 'draft-wang-cats-odsi-02' → 'draft-wang-cats-odsi'
|
||||||
|
"""
|
||||||
|
return re.sub(r'-\d{2,3}$', '', name)
|
||||||
|
|
||||||
def search(self, query: str, top_k: int = 10) -> list[dict]:
|
def search(self, query: str, top_k: int = 10) -> list[dict]:
|
||||||
"""Combine FTS5 keyword search + embedding similarity search.
|
"""Combine FTS5 keyword search + embedding similarity search.
|
||||||
|
|
||||||
Returns ranked list of {name, title, score, excerpt, match_type}.
|
Returns ranked list of {name, title, score, excerpt, match_type}.
|
||||||
Falls back to FTS5-only if Ollama is unavailable.
|
Falls back to FTS5-only if Ollama is unavailable.
|
||||||
|
Deduplicates draft revisions, keeping the highest-scoring version.
|
||||||
"""
|
"""
|
||||||
fts_results = self._fts_search(query, limit=top_k * 2)
|
fts_results = self._fts_search(query, limit=top_k * 3)
|
||||||
embed_results = self._embedding_search(query, limit=top_k * 2)
|
embed_results = self._embedding_search(query, limit=top_k * 3)
|
||||||
|
|
||||||
if embed_results:
|
if embed_results:
|
||||||
merged = self._reciprocal_rank_fusion(fts_results, embed_results)
|
merged = self._reciprocal_rank_fusion(fts_results, embed_results)
|
||||||
else:
|
else:
|
||||||
merged = fts_results
|
merged = fts_results
|
||||||
|
|
||||||
return merged[:top_k]
|
# Deduplicate revisions of the same draft, keep best score
|
||||||
|
seen_bases: dict[str, int] = {}
|
||||||
|
deduped = []
|
||||||
|
for r in merged:
|
||||||
|
base = self._base_draft_name(r["name"])
|
||||||
|
if base not in seen_bases:
|
||||||
|
seen_bases[base] = len(deduped)
|
||||||
|
deduped.append(r)
|
||||||
|
# else: skip lower-ranked revision
|
||||||
|
|
||||||
|
return deduped[:top_k]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanitize_fts_query(query: str) -> str:
|
def sanitize_fts_query(query: str) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user