Make /ask free by default, Claude synthesis is opt-in

Search results (FTS5 + Ollama embeddings) are shown immediately at no
cost. AI synthesis via Claude is behind a "Synthesize" button that the
user must explicitly click. Results are cached permanently so repeat
visitors never trigger API calls.

- Split ask into search_only() (free) and ask() (paid, cached)
- GET /ask now uses search_only — no Claude tokens spent
- POST /api/ask/synthesize triggers Claude (Haiku, ~$0.001)
- Cached answers shown with "cached" badge, no re-generation
- Template shows sources immediately + optional synthesize button

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 21:15:07 +01:00
parent e65d1cfacd
commit 34c36f81f1
4 changed files with 161 additions and 64 deletions

View File

@@ -200,11 +200,55 @@ class HybridSearch:
return results return results
def _build_sources(self, search_results: list[dict]) -> tuple[list[dict], str]:
"""Build source list and context block from search results."""
sources_block = ""
sources = []
for r in search_results:
draft = self.db.get_draft(r["name"])
if draft is None:
continue
text_preview = draft.full_text[:500] if draft.full_text else ""
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
sources_block += f"Abstract: {draft.abstract[:500]}\n"
if text_preview:
sources_block += f"Content excerpt: {text_preview}\n"
sources.append({
"name": draft.name,
"title": draft.title,
"similarity": r.get("similarity", r.get("score", 0)),
"excerpt": r.get("excerpt", ""),
"match_type": r.get("match_type", ""),
})
return sources, sources_block
def search_only(self, question: str, top_k: int = 5) -> dict:
"""Search without Claude synthesis (free — only FTS5 + Ollama).
Returns {sources: [...], has_cached_answer: bool, answer: str|None}.
"""
search_results = self.search(question, top_k=top_k)
if not search_results:
return {"sources": [], "has_cached_answer": False, "answer": None}
sources, sources_block = self._build_sources(search_results)
# Check if we already have a cached answer for this question
prompt = ASK_PROMPT.format(question=question, sources_block=sources_block)
phash = _prompt_hash(prompt)
cached = self.db.get_cached_response("_ask_", phash)
return {
"sources": sources,
"has_cached_answer": cached is not None,
"answer": cached,
}
def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict: def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict:
"""Answer a natural language question using search + Claude synthesis. """Answer a natural language question using search + Claude synthesis.
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}. Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
Caches Claude responses via llm_cache. Caches Claude responses via llm_cache permanently.
""" """
search_results = self.search(question, top_k=top_k) search_results = self.search(question, top_k=top_k)
@@ -214,31 +258,7 @@ class HybridSearch:
"sources": [], "sources": [],
} }
# Build context from top results sources, sources_block = self._build_sources(search_results)
sources_block = ""
sources = []
for r in search_results:
draft = self.db.get_draft(r["name"])
if draft is None:
continue
# Title + abstract + first 500 chars of full text
text_preview = ""
if draft.full_text:
text_preview = draft.full_text[:500]
sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
sources_block += f"Abstract: {draft.abstract[:500]}\n"
if text_preview:
sources_block += f"Content excerpt: {text_preview}\n"
sources.append({
"name": draft.name,
"title": draft.title,
"similarity": r.get("similarity", r.get("score", 0)),
"excerpt": r.get("excerpt", ""),
"match_type": r.get("match_type", ""),
})
prompt = ASK_PROMPT.format( prompt = ASK_PROMPT.format(
question=question, question=question,
@@ -246,7 +266,7 @@ class HybridSearch:
) )
phash = _prompt_hash(prompt) phash = _prompt_hash(prompt)
# Check cache # Check cache — cached answers are served forever (no TTL)
cached = self.db.get_cached_response("_ask_", phash) cached = self.db.get_cached_response("_ask_", phash)
if cached: if cached:
return { return {
@@ -262,7 +282,7 @@ class HybridSearch:
prompt, max_tokens=1024, cheap=cheap prompt, max_tokens=1024, cheap=cheap
) )
# Cache the response # Cache permanently
self.db.cache_response( self.db.cache_response(
"_ask_", phash, "_ask_", phash,
self.config.claude_model_cheap if cheap else self.config.claude_model, self.config.claude_model_cheap if cheap else self.config.claude_model,

View File

@@ -45,7 +45,8 @@ from webui.data import (
get_author_network_full, get_author_network_full,
get_citation_graph, get_citation_graph,
get_comparison_data, get_comparison_data,
get_ask_data, get_ask_search,
get_ask_synthesize,
global_search, global_search,
) )
@@ -325,20 +326,32 @@ def ask_page():
result = None result = None
if question: if question:
top_k = request.args.get("top", 5, type=int) top_k = request.args.get("top", 5, type=int)
result = get_ask_data(db(), question, top_k=top_k) # Search only (free) — returns sources + cached answer if available
result = get_ask_search(db(), question, top_k=top_k)
return render_template("ask.html", question=question, result=result) return render_template("ask.html", question=question, result=result)
@app.route("/api/ask", methods=["POST"]) @app.route("/api/ask/synthesize", methods=["POST"])
def api_ask(): def api_ask_synthesize():
"""Answer a question via hybrid search + Claude. Returns JSON.""" """Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
data = request.get_json(force=True, silent=True) data = request.get_json(force=True, silent=True)
if not data or "question" not in data: if not data or "question" not in data:
return jsonify({"error": "Missing 'question' in request body"}), 400 return jsonify({"error": "Missing 'question' in request body"}), 400
question = data["question"] question = data["question"]
top_k = data.get("top_k", 5) top_k = data.get("top_k", 5)
cheap = data.get("cheap", True) result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
result = get_ask_data(db(), question, top_k=top_k, cheap=cheap) return jsonify(result)
@app.route("/api/ask", methods=["POST"])
def api_ask():
"""Search only (free). Returns JSON with sources + cached answer if available."""
data = request.get_json(force=True, silent=True)
if not data or "question" not in data:
return jsonify({"error": "Missing 'question' in request body"}), 400
question = data["question"]
top_k = data.get("top_k", 5)
result = get_ask_search(db(), question, top_k=top_k)
return jsonify(result) return jsonify(result)

View File

@@ -1204,11 +1204,18 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
} }
def get_ask_data(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict: def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
"""Run hybrid search + Claude synthesis for a question. """Search-only (free) — returns sources + cached answer if available."""
from ietf_analyzer.config import Config
from ietf_analyzer.search import HybridSearch
Returns {answer: str, sources: [{name, title, similarity, excerpt}]}. config = Config.load()
""" searcher = HybridSearch(config, db)
return searcher.search_only(question, top_k=top_k)
def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
"""Run Claude synthesis (costs tokens, result is cached permanently)."""
from ietf_analyzer.config import Config from ietf_analyzer.config import Config
from ietf_analyzer.search import HybridSearch from ietf_analyzer.search import HybridSearch

View File

@@ -13,24 +13,17 @@
background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4)); background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4));
backdrop-filter: blur(10px); backdrop-filter: blur(10px);
} }
.source-row { .source-row { transition: all 0.15s ease; }
transition: all 0.15s ease; .source-row:hover { background: rgba(59, 130, 246, 0.05); }
}
.source-row:hover {
background: rgba(59, 130, 246, 0.05);
}
.loading-spinner { .loading-spinner {
border: 3px solid rgba(59, 130, 246, 0.2); border: 3px solid rgba(59, 130, 246, 0.2);
border-top-color: #3b82f6; border-top-color: #3b82f6;
border-radius: 50%; border-radius: 50%;
width: 24px; width: 20px; height: 20px;
height: 24px;
animation: spin 0.8s linear infinite; animation: spin 0.8s linear infinite;
display: inline-block; display: inline-block;
} }
@keyframes spin { @keyframes spin { to { transform: rotate(360deg); } }
to { transform: rotate(360deg); }
}
</style> </style>
{% endblock %} {% endblock %}
@@ -38,7 +31,7 @@
<!-- Header --> <!-- Header -->
<div class="mb-8 text-center"> <div class="mb-8 text-center">
<h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1> <h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1>
<p class="text-slate-400 text-sm mt-2">Ask natural language questions about IETF AI/agent drafts. Answers are synthesized from the most relevant documents.</p> <p class="text-slate-400 text-sm mt-2">Search across {{ "{:,}".format(stats.total if stats is defined and stats else 434) }} drafts using keyword + semantic similarity. AI synthesis is optional.</p>
</div> </div>
<!-- Search Bar --> <!-- Search Bar -->
@@ -52,7 +45,7 @@
class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none" class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none"
autofocus> autofocus>
<button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0"> <button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0">
Ask Search
</button> </button>
</div> </div>
<div class="flex items-center gap-4 mt-3 px-2"> <div class="flex items-center gap-4 mt-3 px-2">
@@ -64,7 +57,7 @@
<option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option> <option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option>
</select> </select>
</label> </label>
<div class="text-xs text-slate-600">Combines keyword search + semantic similarity</div> <div class="text-xs text-slate-600">Combines keyword search + semantic similarity (free, no API calls)</div>
</div> </div>
</form> </form>
</div> </div>
@@ -91,25 +84,48 @@
</div> </div>
{% endif %} {% endif %}
<!-- Answer --> <!-- Results -->
{% if result %} {% if result %}
<div class="max-w-3xl mx-auto"> <div class="max-w-3xl mx-auto">
<!-- Synthesized answer -->
<!-- AI Synthesized Answer (shown if cached or after user clicks synthesize) -->
<div id="answerSection">
{% if result.answer %}
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6"> <div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
<div class="flex items-center gap-2 mb-4"> <div class="flex items-center gap-2 mb-4">
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24"> <svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
</svg> </svg>
<h2 class="text-lg font-semibold text-white">Answer</h2> <h2 class="text-lg font-semibold text-white">AI Answer</h2>
<span class="text-xs px-2 py-0.5 rounded-full bg-green-900/30 text-green-400 border border-green-800/30">cached</span>
</div> </div>
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div> <div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
</div> </div>
{% else %}
<!-- Synthesize button (costs tokens, result is cached permanently) -->
<div class="answer-card rounded-xl border border-slate-800 p-5 mb-6">
<div class="flex items-center justify-between">
<div>
<div class="text-sm text-slate-300 font-medium">Want an AI-synthesized answer?</div>
<div class="text-xs text-slate-500 mt-0.5">Uses Claude API (Haiku, ~$0.001). Result is cached permanently for all future visitors.</div>
</div>
<button id="synthesizeBtn" onclick="synthesizeAnswer()"
class="px-4 py-2 bg-purple-600 text-white rounded-lg text-sm font-medium hover:bg-purple-500 transition-colors flex items-center gap-2 flex-shrink-0">
<svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
</svg>
Synthesize
</button>
</div>
</div>
{% endif %}
</div>
<!-- Source drafts --> <!-- Source drafts (always shown — free) -->
{% if result.sources %} {% if result.sources %}
<div class="answer-card rounded-xl border border-slate-800 overflow-hidden"> <div class="answer-card rounded-xl border border-slate-800 overflow-hidden">
<div class="px-6 py-4 border-b border-slate-800"> <div class="px-6 py-4 border-b border-slate-800">
<h3 class="text-sm font-semibold text-slate-300">Source Drafts ({{ result.sources|length }})</h3> <h3 class="text-sm font-semibold text-slate-300">Matching Drafts ({{ result.sources|length }})</h3>
</div> </div>
<table class="w-full text-sm"> <table class="w-full text-sm">
<thead> <thead>
@@ -149,5 +165,46 @@
</div> </div>
{% endif %} {% endif %}
</div> </div>
<script>
function synthesizeAnswer() {
const btn = document.getElementById('synthesizeBtn');
const section = document.getElementById('answerSection');
// Show loading state
btn.disabled = true;
btn.innerHTML = '<span class="loading-spinner"></span> Synthesizing...';
fetch('/api/ask/synthesize', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
question: {{ question | tojson }},
top_k: {{ request.args.get('top', '5') | int }}
})
})
.then(r => r.json())
.then(data => {
if (data.answer) {
section.innerHTML = `
<div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
<div class="flex items-center gap-2 mb-4">
<svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
</svg>
<h2 class="text-lg font-semibold text-white">AI Answer</h2>
<span class="text-xs px-2 py-0.5 rounded-full bg-blue-900/30 text-blue-400 border border-blue-800/30">just generated</span>
</div>
<div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">${data.answer}</div>
</div>`;
}
})
.catch(err => {
btn.disabled = false;
btn.innerHTML = 'Synthesize (retry)';
section.querySelector('.text-xs.text-slate-500').textContent = 'Error: ' + err.message;
});
}
</script>
{% endif %} {% endif %}
{% endblock %} {% endblock %}