Make /ask free by default, Claude synthesis is opt-in
Search results (FTS5 + Ollama embeddings) are shown immediately at no cost. AI synthesis via Claude is behind a "Synthesize" button that the user must explicitly click. Results are cached permanently so repeat visitors never trigger API calls. - Split ask into search_only() (free) and ask() (paid, cached) - GET /ask now uses search_only — no Claude tokens spent - POST /api/ask/synthesize triggers Claude (Haiku, ~$0.001) - Cached answers shown with "cached" badge, no re-generation - Template shows sources immediately + optional synthesize button Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -200,11 +200,55 @@ class HybridSearch:
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
def _build_sources(self, search_results: list[dict]) -> tuple[list[dict], str]:
|
||||||
|
"""Build source list and context block from search results."""
|
||||||
|
sources_block = ""
|
||||||
|
sources = []
|
||||||
|
for r in search_results:
|
||||||
|
draft = self.db.get_draft(r["name"])
|
||||||
|
if draft is None:
|
||||||
|
continue
|
||||||
|
text_preview = draft.full_text[:500] if draft.full_text else ""
|
||||||
|
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
|
||||||
|
sources_block += f"Abstract: {draft.abstract[:500]}\n"
|
||||||
|
if text_preview:
|
||||||
|
sources_block += f"Content excerpt: {text_preview}\n"
|
||||||
|
sources.append({
|
||||||
|
"name": draft.name,
|
||||||
|
"title": draft.title,
|
||||||
|
"similarity": r.get("similarity", r.get("score", 0)),
|
||||||
|
"excerpt": r.get("excerpt", ""),
|
||||||
|
"match_type": r.get("match_type", ""),
|
||||||
|
})
|
||||||
|
return sources, sources_block
|
||||||
|
|
||||||
|
def search_only(self, question: str, top_k: int = 5) -> dict:
|
||||||
|
"""Search without Claude synthesis (free — only FTS5 + Ollama).
|
||||||
|
|
||||||
|
Returns {sources: [...], has_cached_answer: bool, answer: str|None}.
|
||||||
|
"""
|
||||||
|
search_results = self.search(question, top_k=top_k)
|
||||||
|
if not search_results:
|
||||||
|
return {"sources": [], "has_cached_answer": False, "answer": None}
|
||||||
|
|
||||||
|
sources, sources_block = self._build_sources(search_results)
|
||||||
|
|
||||||
|
# Check if we already have a cached answer for this question
|
||||||
|
prompt = ASK_PROMPT.format(question=question, sources_block=sources_block)
|
||||||
|
phash = _prompt_hash(prompt)
|
||||||
|
cached = self.db.get_cached_response("_ask_", phash)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"sources": sources,
|
||||||
|
"has_cached_answer": cached is not None,
|
||||||
|
"answer": cached,
|
||||||
|
}
|
||||||
|
|
||||||
def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
||||||
"""Answer a natural language question using search + Claude synthesis.
|
"""Answer a natural language question using search + Claude synthesis.
|
||||||
|
|
||||||
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
|
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
|
||||||
Caches Claude responses via llm_cache.
|
Caches Claude responses via llm_cache permanently.
|
||||||
"""
|
"""
|
||||||
search_results = self.search(question, top_k=top_k)
|
search_results = self.search(question, top_k=top_k)
|
||||||
|
|
||||||
@@ -214,31 +258,7 @@ class HybridSearch:
|
|||||||
"sources": [],
|
"sources": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Build context from top results
|
sources, sources_block = self._build_sources(search_results)
|
||||||
sources_block = ""
|
|
||||||
sources = []
|
|
||||||
for r in search_results:
|
|
||||||
draft = self.db.get_draft(r["name"])
|
|
||||||
if draft is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Title + abstract + first 500 chars of full text
|
|
||||||
text_preview = ""
|
|
||||||
if draft.full_text:
|
|
||||||
text_preview = draft.full_text[:500]
|
|
||||||
|
|
||||||
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
|
|
||||||
sources_block += f"Abstract: {draft.abstract[:500]}\n"
|
|
||||||
if text_preview:
|
|
||||||
sources_block += f"Content excerpt: {text_preview}\n"
|
|
||||||
|
|
||||||
sources.append({
|
|
||||||
"name": draft.name,
|
|
||||||
"title": draft.title,
|
|
||||||
"similarity": r.get("similarity", r.get("score", 0)),
|
|
||||||
"excerpt": r.get("excerpt", ""),
|
|
||||||
"match_type": r.get("match_type", ""),
|
|
||||||
})
|
|
||||||
|
|
||||||
prompt = ASK_PROMPT.format(
|
prompt = ASK_PROMPT.format(
|
||||||
question=question,
|
question=question,
|
||||||
@@ -246,7 +266,7 @@ class HybridSearch:
|
|||||||
)
|
)
|
||||||
phash = _prompt_hash(prompt)
|
phash = _prompt_hash(prompt)
|
||||||
|
|
||||||
# Check cache
|
# Check cache — cached answers are served forever (no TTL)
|
||||||
cached = self.db.get_cached_response("_ask_", phash)
|
cached = self.db.get_cached_response("_ask_", phash)
|
||||||
if cached:
|
if cached:
|
||||||
return {
|
return {
|
||||||
@@ -262,7 +282,7 @@ class HybridSearch:
|
|||||||
prompt, max_tokens=1024, cheap=cheap
|
prompt, max_tokens=1024, cheap=cheap
|
||||||
)
|
)
|
||||||
|
|
||||||
# Cache the response
|
# Cache permanently
|
||||||
self.db.cache_response(
|
self.db.cache_response(
|
||||||
"_ask_", phash,
|
"_ask_", phash,
|
||||||
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
self.config.claude_model_cheap if cheap else self.config.claude_model,
|
||||||
|
|||||||
@@ -45,7 +45,8 @@ from webui.data import (
|
|||||||
get_author_network_full,
|
get_author_network_full,
|
||||||
get_citation_graph,
|
get_citation_graph,
|
||||||
get_comparison_data,
|
get_comparison_data,
|
||||||
get_ask_data,
|
get_ask_search,
|
||||||
|
get_ask_synthesize,
|
||||||
global_search,
|
global_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -325,20 +326,32 @@ def ask_page():
|
|||||||
result = None
|
result = None
|
||||||
if question:
|
if question:
|
||||||
top_k = request.args.get("top", 5, type=int)
|
top_k = request.args.get("top", 5, type=int)
|
||||||
result = get_ask_data(db(), question, top_k=top_k)
|
# Search only (free) — returns sources + cached answer if available
|
||||||
|
result = get_ask_search(db(), question, top_k=top_k)
|
||||||
return render_template("ask.html", question=question, result=result)
|
return render_template("ask.html", question=question, result=result)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/ask", methods=["POST"])
|
@app.route("/api/ask/synthesize", methods=["POST"])
|
||||||
def api_ask():
|
def api_ask_synthesize():
|
||||||
"""Answer a question via hybrid search + Claude. Returns JSON."""
|
"""Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
|
||||||
data = request.get_json(force=True, silent=True)
|
data = request.get_json(force=True, silent=True)
|
||||||
if not data or "question" not in data:
|
if not data or "question" not in data:
|
||||||
return jsonify({"error": "Missing 'question' in request body"}), 400
|
return jsonify({"error": "Missing 'question' in request body"}), 400
|
||||||
question = data["question"]
|
question = data["question"]
|
||||||
top_k = data.get("top_k", 5)
|
top_k = data.get("top_k", 5)
|
||||||
cheap = data.get("cheap", True)
|
result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
|
||||||
result = get_ask_data(db(), question, top_k=top_k, cheap=cheap)
|
return jsonify(result)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/ask", methods=["POST"])
|
||||||
|
def api_ask():
|
||||||
|
"""Search only (free). Returns JSON with sources + cached answer if available."""
|
||||||
|
data = request.get_json(force=True, silent=True)
|
||||||
|
if not data or "question" not in data:
|
||||||
|
return jsonify({"error": "Missing 'question' in request body"}), 400
|
||||||
|
question = data["question"]
|
||||||
|
top_k = data.get("top_k", 5)
|
||||||
|
result = get_ask_search(db(), question, top_k=top_k)
|
||||||
return jsonify(result)
|
return jsonify(result)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1204,11 +1204,18 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_ask_data(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
|
||||||
"""Run hybrid search + Claude synthesis for a question.
|
"""Search-only (free) — returns sources + cached answer if available."""
|
||||||
|
from ietf_analyzer.config import Config
|
||||||
|
from ietf_analyzer.search import HybridSearch
|
||||||
|
|
||||||
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
|
config = Config.load()
|
||||||
"""
|
searcher = HybridSearch(config, db)
|
||||||
|
return searcher.search_only(question, top_k=top_k)
|
||||||
|
|
||||||
|
|
||||||
|
def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
|
||||||
|
"""Run Claude synthesis (costs tokens, result is cached permanently)."""
|
||||||
from ietf_analyzer.config import Config
|
from ietf_analyzer.config import Config
|
||||||
from ietf_analyzer.search import HybridSearch
|
from ietf_analyzer.search import HybridSearch
|
||||||
|
|
||||||
|
|||||||
@@ -13,24 +13,17 @@
|
|||||||
background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4));
|
background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4));
|
||||||
backdrop-filter: blur(10px);
|
backdrop-filter: blur(10px);
|
||||||
}
|
}
|
||||||
.source-row {
|
.source-row { transition: all 0.15s ease; }
|
||||||
transition: all 0.15s ease;
|
.source-row:hover { background: rgba(59, 130, 246, 0.05); }
|
||||||
}
|
|
||||||
.source-row:hover {
|
|
||||||
background: rgba(59, 130, 246, 0.05);
|
|
||||||
}
|
|
||||||
.loading-spinner {
|
.loading-spinner {
|
||||||
border: 3px solid rgba(59, 130, 246, 0.2);
|
border: 3px solid rgba(59, 130, 246, 0.2);
|
||||||
border-top-color: #3b82f6;
|
border-top-color: #3b82f6;
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
width: 24px;
|
width: 20px; height: 20px;
|
||||||
height: 24px;
|
|
||||||
animation: spin 0.8s linear infinite;
|
animation: spin 0.8s linear infinite;
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
}
|
}
|
||||||
@keyframes spin {
|
@keyframes spin { to { transform: rotate(360deg); } }
|
||||||
to { transform: rotate(360deg); }
|
|
||||||
}
|
|
||||||
</style>
|
</style>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
@@ -38,7 +31,7 @@
|
|||||||
<!-- Header -->
|
<!-- Header -->
|
||||||
<div class="mb-8 text-center">
|
<div class="mb-8 text-center">
|
||||||
<h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1>
|
<h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1>
|
||||||
<p class="text-slate-400 text-sm mt-2">Ask natural language questions about IETF AI/agent drafts. Answers are synthesized from the most relevant documents.</p>
|
<p class="text-slate-400 text-sm mt-2">Search across {{ "{:,}".format(stats.total if stats is defined and stats else 434) }} drafts using keyword + semantic similarity. AI synthesis is optional.</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Search Bar -->
|
<!-- Search Bar -->
|
||||||
@@ -52,7 +45,7 @@
|
|||||||
class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none"
|
class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none"
|
||||||
autofocus>
|
autofocus>
|
||||||
<button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0">
|
<button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0">
|
||||||
Ask
|
Search
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center gap-4 mt-3 px-2">
|
<div class="flex items-center gap-4 mt-3 px-2">
|
||||||
@@ -64,7 +57,7 @@
|
|||||||
<option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option>
|
<option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option>
|
||||||
</select>
|
</select>
|
||||||
</label>
|
</label>
|
||||||
<div class="text-xs text-slate-600">Combines keyword search + semantic similarity</div>
|
<div class="text-xs text-slate-600">Combines keyword search + semantic similarity (free, no API calls)</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
@@ -91,25 +84,48 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<!-- Answer -->
|
<!-- Results -->
|
||||||
{% if result %}
|
{% if result %}
|
||||||
<div class="max-w-3xl mx-auto">
|
<div class="max-w-3xl mx-auto">
|
||||||
<!-- Synthesized answer -->
|
|
||||||
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
<!-- AI Synthesized Answer (shown if cached or after user clicks synthesize) -->
|
||||||
<div class="flex items-center gap-2 mb-4">
|
<div id="answerSection">
|
||||||
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
{% if result.answer %}
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
||||||
</svg>
|
<div class="flex items-center gap-2 mb-4">
|
||||||
<h2 class="text-lg font-semibold text-white">Answer</h2>
|
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||||
|
</svg>
|
||||||
|
<h2 class="text-lg font-semibold text-white">AI Answer</h2>
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded-full bg-green-900/30 text-green-400 border border-green-800/30">cached</span>
|
||||||
|
</div>
|
||||||
|
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
|
{% else %}
|
||||||
|
<!-- Synthesize button (costs tokens, result is cached permanently) -->
|
||||||
|
<div class="answer-card rounded-xl border border-slate-800 p-5 mb-6">
|
||||||
|
<div class="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<div class="text-sm text-slate-300 font-medium">Want an AI-synthesized answer?</div>
|
||||||
|
<div class="text-xs text-slate-500 mt-0.5">Uses Claude API (Haiku, ~$0.001). Result is cached permanently for all future visitors.</div>
|
||||||
|
</div>
|
||||||
|
<button id="synthesizeBtn" onclick="synthesizeAnswer()"
|
||||||
|
class="px-4 py-2 bg-purple-600 text-white rounded-lg text-sm font-medium hover:bg-purple-500 transition-colors flex items-center gap-2 flex-shrink-0">
|
||||||
|
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||||
|
</svg>
|
||||||
|
Synthesize
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Source drafts -->
|
<!-- Source drafts (always shown — free) -->
|
||||||
{% if result.sources %}
|
{% if result.sources %}
|
||||||
<div class="answer-card rounded-xl border border-slate-800 overflow-hidden">
|
<div class="answer-card rounded-xl border border-slate-800 overflow-hidden">
|
||||||
<div class="px-6 py-4 border-b border-slate-800">
|
<div class="px-6 py-4 border-b border-slate-800">
|
||||||
<h3 class="text-sm font-semibold text-slate-300">Source Drafts ({{ result.sources|length }})</h3>
|
<h3 class="text-sm font-semibold text-slate-300">Matching Drafts ({{ result.sources|length }})</h3>
|
||||||
</div>
|
</div>
|
||||||
<table class="w-full text-sm">
|
<table class="w-full text-sm">
|
||||||
<thead>
|
<thead>
|
||||||
@@ -149,5 +165,46 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function synthesizeAnswer() {
|
||||||
|
const btn = document.getElementById('synthesizeBtn');
|
||||||
|
const section = document.getElementById('answerSection');
|
||||||
|
|
||||||
|
// Show loading state
|
||||||
|
btn.disabled = true;
|
||||||
|
btn.innerHTML = '<span class="loading-spinner"></span> Synthesizing...';
|
||||||
|
|
||||||
|
fetch('/api/ask/synthesize', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({
|
||||||
|
question: {{ question | tojson }},
|
||||||
|
top_k: {{ request.args.get('top', '5') | int }}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.then(r => r.json())
|
||||||
|
.then(data => {
|
||||||
|
if (data.answer) {
|
||||||
|
section.innerHTML = `
|
||||||
|
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
|
||||||
|
<div class="flex items-center gap-2 mb-4">
|
||||||
|
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
|
||||||
|
</svg>
|
||||||
|
<h2 class="text-lg font-semibold text-white">AI Answer</h2>
|
||||||
|
<span class="text-xs px-2 py-0.5 rounded-full bg-blue-900/30 text-blue-400 border border-blue-800/30">just generated</span>
|
||||||
|
</div>
|
||||||
|
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">${data.answer}</div>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(err => {
|
||||||
|
btn.disabled = false;
|
||||||
|
btn.innerHTML = 'Synthesize (retry)';
|
||||||
|
section.querySelector('.text-xs.text-slate-500').textContent = 'Error: ' + err.message;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
</script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
Reference in New Issue
Block a user