diff --git a/src/ietf_analyzer/search.py b/src/ietf_analyzer/search.py
index cbff081..bb1f84a 100644
--- a/src/ietf_analyzer/search.py
+++ b/src/ietf_analyzer/search.py
@@ -200,11 +200,55 @@ class HybridSearch:
 
         return results
 
+    def _build_sources(self, search_results: list[dict]) -> tuple[list[dict], str]:
+        """Build source list and context block from search results."""
+        sources_block = ""
+        sources = []
+        for r in search_results:
+            draft = self.db.get_draft(r["name"])
+            if draft is None:
+                continue
+            text_preview = draft.full_text[:500] if draft.full_text else ""
+            sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
+            sources_block += f"Abstract: {draft.abstract[:500]}\n"
+            if text_preview:
+                sources_block += f"Content excerpt: {text_preview}\n"
+            sources.append({
+                "name": draft.name,
+                "title": draft.title,
+                "similarity": r.get("similarity", r.get("score", 0)),
+                "excerpt": r.get("excerpt", ""),
+                "match_type": r.get("match_type", ""),
+            })
+        return sources, sources_block
+
+    def search_only(self, question: str, top_k: int = 5) -> dict:
+        """Search without Claude synthesis (free — only FTS5 + Ollama).
+
+        Returns {sources: [...], has_cached_answer: bool, answer: str|None}.
+        """
+        search_results = self.search(question, top_k=top_k)
+        if not search_results:
+            return {"sources": [], "has_cached_answer": False, "answer": None}
+
+        sources, sources_block = self._build_sources(search_results)
+
+        # Check if we already have a cached answer for this question
+        prompt = ASK_PROMPT.format(question=question, sources_block=sources_block)
+        phash = _prompt_hash(prompt)
+        cached = self.db.get_cached_response("_ask_", phash)
+
+        return {
+            "sources": sources,
+            "has_cached_answer": cached is not None,
+            "answer": cached,
+        }
+
     def ask(self, question: str, top_k: int = 5, cheap: bool = True) -> dict:
         """Answer a natural language question using search + Claude synthesis.
 
         Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
-        Caches Claude responses via llm_cache.
+        Caches Claude responses via llm_cache permanently.
         """
         search_results = self.search(question, top_k=top_k)
 
@@ -214,31 +258,7 @@ class HybridSearch:
                 "sources": [],
             }
 
-        # Build context from top results
-        sources_block = ""
-        sources = []
-        for r in search_results:
-            draft = self.db.get_draft(r["name"])
-            if draft is None:
-                continue
-
-            # Title + abstract + first 500 chars of full text
-            text_preview = ""
-            if draft.full_text:
-                text_preview = draft.full_text[:500]
-
-            sources_block += f"\n---\n**{draft.name}** — {draft.title}\n"
-            sources_block += f"Abstract: {draft.abstract[:500]}\n"
-            if text_preview:
-                sources_block += f"Content excerpt: {text_preview}\n"
-
-            sources.append({
-                "name": draft.name,
-                "title": draft.title,
-                "similarity": r.get("similarity", r.get("score", 0)),
-                "excerpt": r.get("excerpt", ""),
-                "match_type": r.get("match_type", ""),
-            })
+        sources, sources_block = self._build_sources(search_results)
 
         prompt = ASK_PROMPT.format(
             question=question,
@@ -246,7 +266,7 @@ class HybridSearch:
         )
         phash = _prompt_hash(prompt)
 
-        # Check cache
+        # Check cache — cached answers are served forever (no TTL)
         cached = self.db.get_cached_response("_ask_", phash)
         if cached:
             return {
@@ -262,7 +282,7 @@ class HybridSearch:
                 prompt, max_tokens=1024, cheap=cheap
             )
 
-            # Cache the response
+            # Cache permanently
             self.db.cache_response(
                 "_ask_", phash,
                 self.config.claude_model_cheap if cheap else self.config.claude_model,
diff --git a/src/webui/app.py b/src/webui/app.py
index 25d77a7..9b7e21a 100644
--- a/src/webui/app.py
+++ b/src/webui/app.py
@@ -45,7 +45,8 @@ from webui.data import (
     get_author_network_full,
     get_citation_graph,
     get_comparison_data,
-    get_ask_data,
+    get_ask_search,
+    get_ask_synthesize,
     global_search,
 )
 
@@ -325,20 +326,32 @@ def ask_page():
     result = None
     if question:
         top_k = request.args.get("top", 5, type=int)
-        result = get_ask_data(db(), question, top_k=top_k)
+        # Search only (free) — returns sources + cached answer if available
+        result = get_ask_search(db(), question, top_k=top_k)
     return render_template("ask.html", question=question, result=result)
 
 
-@app.route("/api/ask", methods=["POST"])
-def api_ask():
-    """Answer a question via hybrid search + Claude. Returns JSON."""
+@app.route("/api/ask/synthesize", methods=["POST"])
+def api_ask_synthesize():
+    """Synthesize an answer via Claude (costs tokens, cached permanently). Returns JSON."""
     data = request.get_json(force=True, silent=True)
     if not data or "question" not in data:
         return jsonify({"error": "Missing 'question' in request body"}), 400
     question = data["question"]
     top_k = data.get("top_k", 5)
-    cheap = data.get("cheap", True)
-    result = get_ask_data(db(), question, top_k=top_k, cheap=cheap)
+    result = get_ask_synthesize(db(), question, top_k=top_k, cheap=True)
+    return jsonify(result)
+
+
+@app.route("/api/ask", methods=["POST"])
+def api_ask():
+    """Search only (free). Returns JSON with sources + cached answer if available."""
+    data = request.get_json(force=True, silent=True)
+    if not data or "question" not in data:
+        return jsonify({"error": "Missing 'question' in request body"}), 400
+    question = data["question"]
+    top_k = data.get("top_k", 5)
+    result = get_ask_search(db(), question, top_k=top_k)
     return jsonify(result)
 
 
diff --git a/src/webui/data.py b/src/webui/data.py
index d4dbd1c..915bd9a 100644
--- a/src/webui/data.py
+++ b/src/webui/data.py
@@ -1204,11 +1204,18 @@ def get_comparison_data(db: Database, names: list[str]) -> dict | None:
     }
 
 
-def get_ask_data(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
-    """Run hybrid search + Claude synthesis for a question.
+def get_ask_search(db: Database, question: str, top_k: int = 5) -> dict:
+    """Search-only (free) — returns sources + cached answer if available."""
+    from ietf_analyzer.config import Config
+    from ietf_analyzer.search import HybridSearch
 
-    Returns {answer: str, sources: [{name, title, similarity, excerpt}]}.
-    """
+    config = Config.load()
+    searcher = HybridSearch(config, db)
+    return searcher.search_only(question, top_k=top_k)
+
+
+def get_ask_synthesize(db: Database, question: str, top_k: int = 5, cheap: bool = True) -> dict:
+    """Run Claude synthesis (costs tokens, result is cached permanently)."""
     from ietf_analyzer.config import Config
     from ietf_analyzer.search import HybridSearch
 
diff --git a/src/webui/templates/ask.html b/src/webui/templates/ask.html
index 9e3f571..a44e6c2 100644
--- a/src/webui/templates/ask.html
+++ b/src/webui/templates/ask.html
@@ -13,24 +13,17 @@
         background: linear-gradient(135deg, rgba(30, 41, 59, 0.8), rgba(30, 41, 59, 0.4));
         backdrop-filter: blur(10px);
     }
-    .source-row {
-        transition: all 0.15s ease;
-    }
-    .source-row:hover {
-        background: rgba(59, 130, 246, 0.05);
-    }
+    .source-row { transition: all 0.15s ease; }
+    .source-row:hover { background: rgba(59, 130, 246, 0.05); }
     .loading-spinner {
         border: 3px solid rgba(59, 130, 246, 0.2);
         border-top-color: #3b82f6;
         border-radius: 50%;
-        width: 24px;
-        height: 24px;
+        width: 20px; height: 20px;
         animation: spin 0.8s linear infinite;
         display: inline-block;
     }
-    @keyframes spin {
-        to { transform: rotate(360deg); }
-    }
+    @keyframes spin { to { transform: rotate(360deg); } }
 </style>
 {% endblock %}
 
@@ -38,7 +31,7 @@
 <!-- Header -->
 <div class="mb-8 text-center">
     <h1 class="text-3xl font-bold text-white">Ask the Draft Corpus</h1>
-    <p class="text-slate-400 text-sm mt-2">Ask natural language questions about IETF AI/agent drafts. Answers are synthesized from the most relevant documents.</p>
+    <p class="text-slate-400 text-sm mt-2">Search across {{ "{:,}".format(stats.total if stats is defined and stats else 434) }} drafts using keyword + semantic similarity. AI synthesis is optional.</p>
 </div>
 
 <!-- Search Bar -->
@@ -52,7 +45,7 @@
                 class="flex-1 bg-transparent border-0 px-3 py-3 text-base text-slate-200 placeholder-slate-500 focus:outline-none"
                 autofocus>
             <button type="submit" class="px-6 py-3 bg-blue-600 text-white rounded-lg text-sm font-medium hover:bg-blue-500 transition-colors flex-shrink-0">
-                Ask
+                Search
             </button>
         </div>
         <div class="flex items-center gap-4 mt-3 px-2">
@@ -64,7 +57,7 @@
                     <option value="10" {% if request.args.get('top', '5') == '10' %}selected{% endif %}>10</option>
                 </select>
             </label>
-            <div class="text-xs text-slate-600">Combines keyword search + semantic similarity</div>
+            <div class="text-xs text-slate-600">Combines keyword search + semantic similarity (free, no API calls)</div>
         </div>
     </form>
 </div>
@@ -91,25 +84,48 @@
 </div>
 {% endif %}
 
-<!-- Answer -->
+<!-- Results -->
 {% if result %}
 <div class="max-w-3xl mx-auto">
-    <!-- Synthesized answer -->
-    <div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
-        <div class="flex items-center gap-2 mb-4">
-            <svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
-                <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
-            </svg>
-            <h2 class="text-lg font-semibold text-white">Answer</h2>
+
+    <!-- AI Synthesized Answer (shown if cached or after user clicks synthesize) -->
+    <div id="answerSection">
+    {% if result.answer %}
+        <div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
+            <div class="flex items-center gap-2 mb-4">
+                <svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
+                </svg>
+                <h2 class="text-lg font-semibold text-white">AI Answer</h2>
+                <span class="text-xs px-2 py-0.5 rounded-full bg-green-900/30 text-green-400 border border-green-800/30">cached</span>
+            </div>
+            <div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
         </div>
-        <div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">{{ result.answer }}</div>
+    {% else %}
+        <!-- Synthesize button (costs tokens, result is cached permanently) -->
+        <div class="answer-card rounded-xl border border-slate-800 p-5 mb-6">
+            <div class="flex items-center justify-between">
+                <div>
+                    <div class="text-sm text-slate-300 font-medium">Want an AI-synthesized answer?</div>
+                    <div class="text-xs text-slate-500 mt-0.5">Uses Claude API (Haiku, ~$0.001). Result is cached permanently for all future visitors.</div>
+                </div>
+                <button id="synthesizeBtn" onclick="synthesizeAnswer()"
+                    class="px-4 py-2 bg-purple-600 text-white rounded-lg text-sm font-medium hover:bg-purple-500 transition-colors flex items-center gap-2 flex-shrink-0">
+                    <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                        <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
+                    </svg>
+                    Synthesize
+                </button>
+            </div>
+        </div>
+    {% endif %}
     </div>
 
-    <!-- Source drafts -->
+    <!-- Source drafts (always shown — free) -->
     {% if result.sources %}
     <div class="answer-card rounded-xl border border-slate-800 overflow-hidden">
         <div class="px-6 py-4 border-b border-slate-800">
-            <h3 class="text-sm font-semibold text-slate-300">Source Drafts ({{ result.sources|length }})</h3>
+            <h3 class="text-sm font-semibold text-slate-300">Matching Drafts ({{ result.sources|length }})</h3>
         </div>
         <table class="w-full text-sm">
             <thead>
@@ -149,5 +165,46 @@
     </div>
     {% endif %}
 </div>
+
+<script>
+function synthesizeAnswer() {
+    const btn = document.getElementById('synthesizeBtn');
+    const section = document.getElementById('answerSection');
+
+    // Show loading state
+    btn.disabled = true;
+    btn.innerHTML = '<span class="loading-spinner"></span> Synthesizing...';
+
+    fetch('/api/ask/synthesize', {
+        method: 'POST',
+        headers: {'Content-Type': 'application/json'},
+        body: JSON.stringify({
+            question: {{ question | tojson }},
+            top_k: {{ request.args.get('top', '5') | int }}
+        })
+    })
+    .then(r => r.json())
+    .then(data => {
+        if (data.answer) {
+            section.innerHTML = `
+                <div class="answer-card rounded-xl border border-slate-800 p-6 mb-6">
+                    <div class="flex items-center gap-2 mb-4">
+                        <svg class="w-5 h-5 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
+                        </svg>
+                        <h2 class="text-lg font-semibold text-white">AI Answer</h2>
+                        <span class="text-xs px-2 py-0.5 rounded-full bg-blue-900/30 text-blue-400 border border-blue-800/30">just generated</span>
+                    </div>
+                    <div class="text-slate-300 text-sm leading-relaxed whitespace-pre-line">${data.answer}</div>
+                </div>`;
+        }
+    })
+    .catch(err => {
+        btn.disabled = false;
+        btn.innerHTML = 'Synthesize (retry)';
+        section.querySelector('.text-xs.text-slate-500').textContent = 'Error: ' + err.message;
+    });
+}
+</script>
 {% endif %}
 {% endblock %}