Make /ask free by default, Claude synthesis is opt-in
Search results (FTS5 + Ollama embeddings) are shown immediately at no cost. AI synthesis via Claude is behind a "Synthesize" button that the user must explicitly click. Results are cached permanently so repeat visitors never trigger API calls. - Split ask into search_only() (free) and ask() (paid, cached) - GET /ask now uses search_only — no Claude tokens spent - POST /api/ask/synthesize triggers Claude (Haiku, ~$0.001) - Cached answers shown with "cached" badge, no re-generation - Template shows sources immediately + optional synthesize button Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -200,11 +200,55 @@ class HybridSearch:
|
||||
|
||||
return results
|
||||
|
||||
def _build_sources(self, search_results: list[dict]) -> tuple[list[dict], str]:
|
||||
"""Build source list and context block from search results."""
|
||||
sources_block = ""
|
||||
sources = []
|
||||
for r in search_results:
|
||||
draft = self.db.get_draft(r["name"])
|
||||
if draft is None:
|
||||
continue
|
||||
text_preview = draft.full_text[:500] if draft.full_text else ""
|
||||
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
|
||||
sources_block += f"Abstract: {draft.abstract[:500]}\n"
|
||||
if text_preview:
|
||||
sources_block += f"Content excerpt: {text_preview}\n"
|
||||
sources.append({
|
||||
"name": draft.name,
|
||||
"title": draft.title,
|
||||
"similarity": r.get("similarity", r.get("score", 0)),
|
||||
"excerpt": r.get("excerpt", ""),
|
||||
"match_type": r.get("match_type", ""),
|
||||
})
|
||||
return sources, sources_block
|
||||
|
||||
def search_only(self, question: str, top_k: int = 5) -> dict:
|
||||
"""Search without Claude synthesis (free — only FTS5 + Ollama).
|
||||
|
||||
Returns {sources: [...], has_cached_answer: bool, answer: str|None}.
|
||||
"""
|
||||
search_results = self.search(question, top_k=top_k)
|
||||
if not search_results:
|
||||
return {"sources": [], "has_cached_answer": False, "answer": None}
|
||||
|
||||
sources, sources_block = self._build_sources(search_results)
|
||||
|
||||
# Check if we already have a cached answer for this question
|
||||
prompt = ASK_PROMPT.format(question=question, sources_block=sources_block)
|
||||
phash = _prompt_hash(prompt)
|
||||
cached = self.db.get_cached_response("_ask_", phash)
|
||||
|
||||
return {
|
||||
"sources": sources,
|
||||
"has_cached_answer": cached is not None,
|
||||
"answer": cached,
|
||||
}
|
||||
|
||||
def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
||||
"""Answer a natural language question using search + Claude synthesis.
|
||||
|
||||
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
|
||||
Caches Claude responses via llm_cache.
|
||||
Caches Claude responses via llm_cache permanently.
|
||||
"""
|
||||
search_results = self.search(question, top_k=top_k)
|
||||
|
||||
@@ -214,31 +258,7 @@ class HybridSearch:
|
||||
"sources": [],
|
||||
}
|
||||
|
||||
# Build context from top results
|
||||
sources_block = ""
|
||||
sources = []
|
||||
for r in search_results:
|
||||
draft = self.db.get_draft(r["name"])
|
||||
if draft is None:
|
||||
continue
|
||||
|
||||
# Title + abstract + first 500 chars of full text
|
||||
text_preview = ""
|
||||
if draft.full_text:
|
||||
text_preview = draft.full_text[:500]
|
||||
|
||||
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
|
||||
sources_block += f"Abstract: {draft.abstract[:500]}\n"
|
||||
if text_preview:
|
||||
sources_block += f"Content excerpt: {text_preview}\n"
|
||||
|
||||
sources.append({
|
||||
"name": draft.name,
|
||||
"title": draft.title,
|
||||
"similarity": r.get("similarity", r.get("score", 0)),
|
||||
"excerpt": r.get("excerpt", ""),
|
||||
"match_type": r.get("match_type", ""),
|
||||
})
|
||||
sources, sources_block = self._build_sources(search_results)
|
||||
|
||||
prompt = ASK_PROMPT.format(
|
||||
question=question,
|
||||
@@ -246,7 +266,7 @@ class HybridSearch:
|
||||
)
|
||||
phash = _prompt_hash(prompt)
|
||||
|
||||
# Check cache
|
||||
# Check cache — cached answers are served forever (no TTL)
|
||||
cached = self.db.get_cached_response("_ask_", phash)
|
||||
if cached:
|
||||
return {
|
||||
@@ -262,7 +282,7 @@ class HybridSearch:
|
||||
prompt, max_tokens=1024, cheap=cheap
|
||||
)
|
||||
|
||||
# Cache the response
|
||||
# Cache permanently
|
||||
self.db.cache_response(
|
||||
"_ask_", phash,
|
||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||
|
||||
@@ -45,7 +45,8 @@ from webui.data import (
|
||||
get_author_network_full,
|
||||
get_citation_graph,
|
||||
get_comparison_data,
|
||||
get_ask_data,
|
||||
get_ask_search,
|
||||
get_ask_synthesize,
|
||||
global_search,
|
||||
)
|
||||
|
||||
@@ -325,20 +326,32 @@ def ask_page():
|
||||
result = None
|
||||
if question:
|
||||
top_k = request.args.get("top", 5, type=int)
|
||||
result = get_ask_data(db(), question, top_k=top_k)
|
||||
# Search only (free) — returns sources + cached answer if available
|
||||
result = get_ask_search(db(), question, top_k=top_k)
|
||||
return render_template("ask.html", question=question, result=result)
|
||||
|
||||
|
||||
@app.route("/api/ask", methods=["POST"])
|
||||
def api_ask():
|
||||
"""Answer a question via hybrid search + Claude. Returns JSON."""
|
||||
@app.route("/api/ask/synthesize", methods=["POST"])
|
||||
def api_ask_synthesize():
|
||||
"""Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
|
||||
data = request.get_json(force=True, silent=True)
|
||||
if not data or "question" not in data:
|
||||
return jsonify({"error": "Missing 'question' in request body"}), 400
|
||||
question = data["question"]
|
||||
top_k = data.get("top_k", 5)
|
||||
cheap = data.get("cheap", True)
|
||||
result = get_ask_data(db(), question, top_k=top_k, cheap=cheap)
|
||||
result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@app.route("/api/ask", methods=["POST"])
|
||||
def api_ask():
|
||||
"""Search only (free). Returns JSON with sources + cached answer if available."""
|
||||
data = request.get_json(force=True, silent=True)
|
||||
if not data or "question" not in data:
|
||||
return jsonify({"error": "Missing 'question' in request body"}), 400
|
||||
question = data["question"]
|
||||
top_k = data.get("top_k", 5)
|
||||
result = get_ask_search(db(), question, top_k=top_k)
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
|
||||
@@ -1204,11 +1204,18 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
|
||||
}
|
||||
|
||||
|
||||
def get_ask_data(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
||||
"""Run hybrid search + Claude synthesis for a question.
|
||||
def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
|
||||
"""Search-only (free) — returns sources + cached answer if available."""
|
||||
from ietf_analyzer.config import Config
|
||||
from ietf_analyzer.search import HybridSearch
|
||||
|
||||
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
|
||||
"""
|
||||
config = Config.load()
|
||||
searcher = HybridSearch(config, db)
|
||||
return searcher.search_only(question, top_k=top_k)
|
||||
|
||||
|
||||
def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
||||
"""Run Claude synthesis (costs tokens, result is cached permanently)."""
|
||||
from ietf_analyzer.config import Config
|
||||
from ietf_analyzer.search import HybridSearch
|
||||
|
||||
|
||||
@@ -13,24 +13,17 @@
|
||||
background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4));
|
||||
backdrop-filter: blur(10px);
|
||||
}
|
||||
.source-row {
|
||||
transition: all 0.15s ease;
|
||||
}
|
||||
.source-row:hover {
|
||||
background: rgba(59, 130, 246, 0.05);
|
||||
}
|
||||
.source-row { transition: all 0.15s ease; }
|
||||
.source-row:hover { background: rgba(59, 130, 246, 0.05); }
|
||||
.loading-spinner {
|
||||
border: 3px solid rgba(59, 130, 246, 0.2);
|
||||
border-top-color: #3b82f6;
|
||||
border-radius: 50%;
|
||||
width: 24px;
|
||||
height: 24px;
|
||||
width: 20px; height: 20px;
|
||||
animation: spin 0.8s linear infinite;
|
||||
display: inline-block;
|
||||
}
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
@keyframes spin { to { transform: rotate(360deg); } }
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
@@ -38,7 +31,7 @@
|
||||
<!-- Header -->
|
||||
<div class="mb-8 text-center">
|
||||
<h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1>
|
||||
<p class="text-slate-400 text-sm mt-2">Ask natural language questions about IETF AI/agent drafts. Answers are synthesized from the most relevant documents.</p>
|
||||
<p class="text-slate-400 text-sm mt-2">Search across {{ "{:,}".format(stats.total if stats is defined and stats else 434) }} drafts using keyword + semantic similarity. AI synthesis is optional.</p>
|
||||
</div>
|
||||
|
||||
<!-- Search Bar -->
|
||||
@@ -52,7 +45,7 @@
|
||||
class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none"
|
||||
autofocus>
|
||||
<button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0">
|
||||
Ask
|
||||
Search
|
||||
</button>
|
||||
</div>
|
||||
<div class="flex items-center gap-4 mt-3 px-2">
|
||||
@@ -64,7 +57,7 @@
|
||||
<option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option>
|
||||
</select>
|
||||
</label>
|
||||
<div class="text-xs text-slate-600">Combines keyword search + semantic similarity</div>
|
||||
<div class="text-xs text-slate-600">Combines keyword search + semantic similarity (free, no API calls)</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
@@ -91,25 +84,48 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Answer -->
|
||||
<!-- Results -->
|
||||
{% if result %}
|
||||
<div class="max-w-3xl mx-auto">
|
||||
<!-- Synthesized answer -->
|
||||
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
||||
<div class="flex items-center gap-2 mb-4">
|
||||
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||
</svg>
|
||||
<h2 class="text-lg font-semibold text-white">Answer</h2>
|
||||
|
||||
<!-- AI Synthesized Answer (shown if cached or after user clicks synthesize) -->
|
||||
<div id="answerSection">
|
||||
{% if result.answer %}
|
||||
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
||||
<div class="flex items-center gap-2 mb-4">
|
||||
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||
</svg>
|
||||
<h2 class="text-lg font-semibold text-white">AI Answer</h2>
|
||||
<span class="text-xs px-2 py-0.5 rounded-full bg-green-900/30 text-green-400 border border-green-800/30">cached</span>
|
||||
</div>
|
||||
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
|
||||
</div>
|
||||
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
|
||||
{% else %}
|
||||
<!-- Synthesize button (costs tokens, result is cached permanently) -->
|
||||
<div class="answer-card rounded-xl border border-slate-800 p-5 mb-6">
|
||||
<div class="flex items-center justify-between">
|
||||
<div>
|
||||
<div class="text-sm text-slate-300 font-medium">Want an AI-synthesized answer?</div>
|
||||
<div class="text-xs text-slate-500 mt-0.5">Uses Claude API (Haiku, ~$0.001). Result is cached permanently for all future visitors.</div>
|
||||
</div>
|
||||
<button id="synthesizeBtn" onclick="synthesizeAnswer()"
|
||||
class="px-4 py-2 bg-purple-600 text-white rounded-lg text-sm font-medium hover:bg-purple-500 transition-colors flex items-center gap-2 flex-shrink-0">
|
||||
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||
</svg>
|
||||
Synthesize
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- Source drafts -->
|
||||
<!-- Source drafts (always shown — free) -->
|
||||
{% if result.sources %}
|
||||
<div class="answer-card rounded-xl border border-slate-800 overflow-hidden">
|
||||
<div class="px-6 py-4 border-b border-slate-800">
|
||||
<h3 class="text-sm font-semibold text-slate-300">Source Drafts ({{ result.sources|length }})</h3>
|
||||
<h3 class="text-sm font-semibold text-slate-300">Matching Drafts ({{ result.sources|length }})</h3>
|
||||
</div>
|
||||
<table class="w-full text-sm">
|
||||
<thead>
|
||||
@@ -149,5 +165,46 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function synthesizeAnswer() {
|
||||
const btn = document.getElementById('synthesizeBtn');
|
||||
const section = document.getElementById('answerSection');
|
||||
|
||||
// Show loading state
|
||||
btn.disabled = true;
|
||||
btn.innerHTML = '<span class="loading-spinner"></span> Synthesizing...';
|
||||
|
||||
fetch('/api/ask/synthesize', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({
|
||||
question: {{ question | tojson }},
|
||||
top_k: {{ request.args.get('top', '5') | int }}
|
||||
})
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
if (data.answer) {
|
||||
section.innerHTML = `
|
||||
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
||||
<div class="flex items-center gap-2 mb-4">
|
||||
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||
</svg>
|
||||
<h2 class="text-lg font-semibold text-white">AI Answer</h2>
|
||||
<span class="text-xs px-2 py-0.5 rounded-full bg-blue-900/30 text-blue-400 border border-blue-800/30">just generated</span>
|
||||
</div>
|
||||
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">${data.answer}</div>
|
||||
</div>`;
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
btn.disabled = false;
|
||||
btn.innerHTML = 'Synthesize (retry)';
|
||||
section.querySelector('.text-xs.text-slate-500').textContent = 'Error: ' + err.message;
|
||||
});
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
Reference in New Issue
Block a user