#!/usr/bin/env python3 """ Save a Claude Code chat session to {project}/transcripts/ as: ---.json — all JSONL records (100% fidelity) ---.md — human-readable markdown Usage: python3 save-transcript.py [output_dir] # manual python3 save-transcript.py --verify # verify saved JSON vs source (stdin) # called from Stop hook """ import hashlib import json import os import re import sys from datetime import datetime, timezone from pathlib import Path # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def slugify(text: str, max_len: int = 40) -> str: text = text.lower() text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[\s_]+", "-", text) text = text.strip("-") return text[:max_len].rstrip("-") def load_jsonl(path: str) -> list: records = [] with open(path, "r", encoding="utf-8") as fh: for line in fh: line = line.strip() if line: try: records.append(json.loads(line)) except json.JSONDecodeError as exc: print(f" [warn] bad JSON line: {exc}", file=sys.stderr) return records def find_jsonl_by_session(session_id: str) -> str | None: projects_dir = Path.home() / ".claude" / "projects" for p in projects_dir.rglob(f"{session_id}.jsonl"): return str(p) return None # --------------------------------------------------------------------------- # Content extraction # --------------------------------------------------------------------------- def content_to_text(content) -> str: """Flatten message content (str or list of blocks) to plain text / md.""" if isinstance(content, str): return content if not isinstance(content, list): return "" parts = [] for block in content: if not isinstance(block, dict): continue t = block.get("type", "") if t == "text": parts.append(block.get("text", "")) elif t == "tool_use": name = block.get("name", "tool") inp = json.dumps(block.get("input", {}), indent=2, ensure_ascii=False) parts.append(f"**Tool call:** `{name}`\n```json\n{inp}\n```") elif t == "tool_result": inner = block.get("content", "") if isinstance(inner, list): inner = "\n".join( b.get("text", "") for b in inner if isinstance(b, dict) and b.get("type") == "text" ) status = " *(error)*" if block.get("is_error") else "" parts.append(f"**Tool result**{status}\n```\n{inner}\n```") return "\n\n".join(p for p in parts if p) def strip_system_tags(text: str) -> str: """Remove , , and similar injected tags.""" text = re.sub(r".*?", "", text, flags=re.DOTALL) text = re.sub(r"]*>.*?]*>", "", text, flags=re.DOTALL) text = re.sub(r"", "", text) return text.strip() # --------------------------------------------------------------------------- # Metadata derivation # --------------------------------------------------------------------------- def derive_project_name(cwd: str) -> str: return slugify(Path(cwd.rstrip("/")).name or "project") def derive_task_name(records: list) -> str: for r in records: if r.get("type") != "user": continue content = r.get("message", {}).get("content", "") text = content_to_text(content) text = strip_system_tags(text) text = re.sub(r"\[Request interrupted[^\]]*\]", "", text) text = text.strip() if len(text) > 5: first_line = text.split("\n")[0].strip() return slugify(first_line) or "session" return "session" def derive_timestamp(records: list) -> datetime: for r in records: ts = r.get("timestamp", "") if ts: try: return datetime.fromisoformat(ts.replace("Z", "+00:00")) except ValueError: pass return datetime.now(timezone.utc) # --------------------------------------------------------------------------- # Markdown rendering # --------------------------------------------------------------------------- def render_markdown(records: list, session_id: str, cwd: str) -> str: messages = [r for r in records if r.get("type") in ("user", "assistant")] lines = [ f"# Transcript: {session_id}", "", f"**Project:** {cwd} ", f"**Messages:** {len(messages)}", "", "---", "", ] for msg in messages: role = "User" if msg.get("type") == "user" else "Assistant" ts = msg.get("timestamp", "") content = msg.get("message", {}).get("content", "") text = content_to_text(content) if role == "User": text = strip_system_tags(text) text = text.strip() if not text: continue header = f"### {role}" if ts: header += f" *({ts})*" lines.append(header) lines.append("") lines.append(text) lines.append("") lines.append("---") lines.append("") return "\n".join(lines) # --------------------------------------------------------------------------- # Save # --------------------------------------------------------------------------- def save(jsonl_path: str, output_dir: str | None) -> tuple[str, str]: records = load_jsonl(jsonl_path) if not records: raise ValueError(f"No records found in {jsonl_path}") # Find session metadata from first record that has it meta = next( (r for r in records if r.get("sessionId") or r.get("cwd")), records[0], ) session_id = meta.get("sessionId") or Path(jsonl_path).stem cwd = meta.get("cwd") or os.getcwd() project = derive_project_name(cwd) task = derive_task_name(records) dt = derive_timestamp(records) ts_str = dt.strftime("%Y%m%d-%H%M%S") base = f"{project}-{task}-{ts_str}" if output_dir is None: output_dir = os.path.join(cwd, "transcripts") os.makedirs(output_dir, exist_ok=True) json_path = os.path.join(output_dir, f"{base}.json") md_path = os.path.join(output_dir, f"{base}.md") # JSON: all records (100% fidelity — identical content to source JSONL) with open(json_path, "w", encoding="utf-8") as fh: json.dump(records, fh, indent=2, ensure_ascii=False) # Verify: re-read and compare checksums source_hash = _jsonl_hash(jsonl_path) saved_hash = _json_array_hash(json_path) if source_hash != saved_hash: raise RuntimeError( f"Fidelity check FAILED: source={source_hash} saved={saved_hash}\n" f" Source: {jsonl_path}\n Saved: {json_path}" ) # Markdown: human-readable rendering md = render_markdown(records, session_id, cwd) with open(md_path, "w", encoding="utf-8") as fh: fh.write(md) return md_path, json_path # --------------------------------------------------------------------------- # Fidelity verification # --------------------------------------------------------------------------- def _jsonl_hash(path: str) -> str: """Hash the ordered sequence of parsed records from a JSONL file.""" records = load_jsonl(path) # Normalise to canonical JSON so key order doesn't matter canonical = json.dumps(records, sort_keys=True, ensure_ascii=False) return hashlib.sha256(canonical.encode()).hexdigest() def _json_array_hash(path: str) -> str: """Hash the record array stored in a .json transcript file.""" with open(path, "r", encoding="utf-8") as fh: records = json.load(fh) canonical = json.dumps(records, sort_keys=True, ensure_ascii=False) return hashlib.sha256(canonical.encode()).hexdigest() def verify(json_path: str) -> bool: """Verify a saved .json transcript against its source JSONL.""" with open(json_path, "r", encoding="utf-8") as fh: records = json.load(fh) meta = next( (r for r in records if r.get("sessionId")), None, ) if not meta: print("ERROR: no sessionId found in saved transcript", file=sys.stderr) return False session_id = meta["sessionId"] jsonl_path = find_jsonl_by_session(session_id) if not jsonl_path: print(f"ERROR: source JSONL not found for session {session_id}", file=sys.stderr) return False source_hash = _jsonl_hash(jsonl_path) saved_hash = _json_array_hash(json_path) ok = source_hash == saved_hash status = "OK" if ok else "MISMATCH" print(f"[{status}] {json_path}") print(f" source: {jsonl_path}") print(f" source hash: {source_hash}") print(f" saved hash: {saved_hash}") return ok # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- def main(): # --verify mode if len(sys.argv) >= 3 and sys.argv[1] == "--verify": ok = verify(sys.argv[2]) sys.exit(0 if ok else 1) # Manual invocation if len(sys.argv) >= 2: jsonl_path = sys.argv[1] output_dir = sys.argv[2] if len(sys.argv) >= 3 else None md_path, json_path = save(jsonl_path, output_dir) print(f"Saved:\n {md_path}\n {json_path}") return # Called from Stop hook (JSON on stdin) if not sys.stdin.isatty(): try: hook_data = json.load(sys.stdin) except json.JSONDecodeError as exc: print(f"save-transcript: bad hook JSON: {exc}", file=sys.stderr) sys.exit(1) jsonl_path = hook_data.get("transcript_path") if not jsonl_path or not os.path.exists(jsonl_path): session_id = hook_data.get("session_id", "") jsonl_path = find_jsonl_by_session(session_id) if session_id else None if not jsonl_path or not os.path.exists(jsonl_path): print("save-transcript: cannot locate JSONL for this session", file=sys.stderr) sys.exit(1) cwd = hook_data.get("cwd") or os.getcwd() output_dir = os.path.join(cwd, "transcripts") try: md_path, json_path = save(jsonl_path, output_dir) print(f"Transcript saved:\n {md_path}\n {json_path}") except Exception as exc: print(f"save-transcript ERROR: {exc}", file=sys.stderr) sys.exit(1) return print(__doc__) sys.exit(1) if __name__ == "__main__": main()