17 KiB
Auto-saving Claude Code Session Transcripts
Automatically saves every Claude Code chat session to a transcripts/ folder in your project directory, in both machine-readable JSON and human-readable Markdown.
What gets saved
For each session, two files are written to <project-dir>/transcripts/:
| File | Contents |
|---|---|
<project>-<task>-<yyyymmdd>-<hhmmss>.json |
All raw JSONL records as a JSON array — 100% fidelity, SHA-256 verified |
<project>-<task>-<yyyymmdd>-<hhmmss>.md |
Human-readable rendering: user + assistant turns, tool calls in code blocks |
The project name is derived from the working directory basename; the task name from the first user message (slugified).
Setup
1. Create the script
Create ~/.claude/scripts/save-transcript.py:
mkdir -p ~/.claude/scripts
Paste the full script from the source below, or copy it directly:
cp /path/to/save-transcript.py ~/.claude/scripts/save-transcript.py
chmod +x ~/.claude/scripts/save-transcript.py
Full script — ~/.claude/scripts/save-transcript.py
#!/usr/bin/env python3
"""
Save a Claude Code chat session to {project}/transcripts/ as:
<project>-<task>-<yyyymmdd>-<hhmmss>.json — all JSONL records (100% fidelity)
<project>-<task>-<yyyymmdd>-<hhmmss>.md — human-readable markdown
Usage:
python3 save-transcript.py <jsonl_path> [output_dir] # manual
python3 save-transcript.py --verify <json_path> # verify saved JSON vs source
(stdin) # called from Stop hook
"""
import hashlib
import json
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
def slugify(text: str, max_len: int = 40) -> str:
text = text.lower()
text = re.sub(r"[^\w\s-]", "", text)
text = re.sub(r"[\s_]+", "-", text)
text = text.strip("-")
return text[:max_len].rstrip("-")
def load_jsonl(path: str) -> list:
records = []
with open(path, "r", encoding="utf-8") as fh:
for line in fh:
line = line.strip()
if line:
try:
records.append(json.loads(line))
except json.JSONDecodeError as exc:
print(f" [warn] bad JSON line: {exc}", file=sys.stderr)
return records
def find_jsonl_by_session(session_id: str) -> str | None:
projects_dir = Path.home() / ".claude" / "projects"
for p in projects_dir.rglob(f"{session_id}.jsonl"):
return str(p)
return None
def content_to_text(content) -> str:
if isinstance(content, str):
return content
if not isinstance(content, list):
return ""
parts = []
for block in content:
if not isinstance(block, dict):
continue
t = block.get("type", "")
if t == "text":
parts.append(block.get("text", ""))
elif t == "tool_use":
name = block.get("name", "tool")
inp = json.dumps(block.get("input", {}), indent=2, ensure_ascii=False)
parts.append(f"**Tool call:** `{name}`\n```json\n{inp}\n```")
elif t == "tool_result":
inner = block.get("content", "")
if isinstance(inner, list):
inner = "\n".join(
b.get("text", "")
for b in inner
if isinstance(b, dict) and b.get("type") == "text"
)
status = " *(error)*" if block.get("is_error") else ""
parts.append(f"**Tool result**{status}\n```\n{inner}\n```")
return "\n\n".join(p for p in parts if p)
def strip_system_tags(text: str) -> str:
text = re.sub(r"<system-reminder>.*?</system-reminder>", "", text, flags=re.DOTALL)
text = re.sub(r"<ide_[^>]*>.*?</ide_[^>]*>", "", text, flags=re.DOTALL)
text = re.sub(r"<system-reminder[^/]*/?>", "", text)
return text.strip()
def derive_project_name(cwd: str) -> str:
return slugify(Path(cwd.rstrip("/")).name or "project")
def derive_task_name(records: list) -> str:
for r in records:
if r.get("type") != "user":
continue
content = r.get("message", {}).get("content", "")
text = content_to_text(content)
text = strip_system_tags(text)
text = re.sub(r"\[Request interrupted[^\]]*\]", "", text)
text = text.strip()
if len(text) > 5:
first_line = text.split("\n")[0].strip()
return slugify(first_line) or "session"
return "session"
def derive_timestamp(records: list) -> datetime:
for r in records:
ts = r.get("timestamp", "")
if ts:
try:
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
except ValueError:
pass
return datetime.now(timezone.utc)
def render_markdown(records: list, session_id: str, cwd: str) -> str:
messages = [r for r in records if r.get("type") in ("user", "assistant")]
lines = [
f"# Transcript: {session_id}",
"",
f"**Project:** {cwd} ",
f"**Messages:** {len(messages)}",
"",
"---",
"",
]
for msg in messages:
role = "User" if msg.get("type") == "user" else "Assistant"
ts = msg.get("timestamp", "")
content = msg.get("message", {}).get("content", "")
text = content_to_text(content)
if role == "User":
text = strip_system_tags(text)
text = text.strip()
if not text:
continue
header = f"### {role}"
if ts:
header += f" *({ts})*"
lines.append(header)
lines.append("")
lines.append(text)
lines.append("")
lines.append("---")
lines.append("")
return "\n".join(lines)
def _jsonl_hash(path: str) -> str:
records = load_jsonl(path)
canonical = json.dumps(records, sort_keys=True, ensure_ascii=False)
return hashlib.sha256(canonical.encode()).hexdigest()
def _json_array_hash(path: str) -> str:
with open(path, "r", encoding="utf-8") as fh:
records = json.load(fh)
canonical = json.dumps(records, sort_keys=True, ensure_ascii=False)
return hashlib.sha256(canonical.encode()).hexdigest()
def save(jsonl_path: str, output_dir: str | None) -> tuple[str, str]:
records = load_jsonl(jsonl_path)
if not records:
raise ValueError(f"No records found in {jsonl_path}")
meta = next(
(r for r in records if r.get("sessionId") or r.get("cwd")),
records[0],
)
session_id = meta.get("sessionId") or Path(jsonl_path).stem
cwd = meta.get("cwd") or os.getcwd()
project = derive_project_name(cwd)
task = derive_task_name(records)
dt = derive_timestamp(records)
ts_str = dt.strftime("%Y%m%d-%H%M%S")
base = f"{project}-{task}-{ts_str}"
if output_dir is None:
output_dir = os.path.join(cwd, "transcripts")
os.makedirs(output_dir, exist_ok=True)
json_path = os.path.join(output_dir, f"{base}.json")
md_path = os.path.join(output_dir, f"{base}.md")
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(records, fh, indent=2, ensure_ascii=False)
source_hash = _jsonl_hash(jsonl_path)
saved_hash = _json_array_hash(json_path)
if source_hash != saved_hash:
raise RuntimeError(
f"Fidelity check FAILED: source={source_hash} saved={saved_hash}\n"
f" Source: {jsonl_path}\n Saved: {json_path}"
)
md = render_markdown(records, session_id, cwd)
with open(md_path, "w", encoding="utf-8") as fh:
fh.write(md)
return md_path, json_path
def verify(json_path: str) -> bool:
with open(json_path, "r", encoding="utf-8") as fh:
records = json.load(fh)
meta = next((r for r in records if r.get("sessionId")), None)
if not meta:
print("ERROR: no sessionId found in saved transcript", file=sys.stderr)
return False
session_id = meta["sessionId"]
jsonl_path = find_jsonl_by_session(session_id)
if not jsonl_path:
print(f"ERROR: source JSONL not found for session {session_id}", file=sys.stderr)
return False
source_hash = _jsonl_hash(jsonl_path)
saved_hash = _json_array_hash(json_path)
ok = source_hash == saved_hash
status = "OK" if ok else "MISMATCH"
print(f"[{status}] {json_path}")
print(f" source: {jsonl_path}")
print(f" source hash: {source_hash}")
print(f" saved hash: {saved_hash}")
return ok
def main():
if len(sys.argv) >= 3 and sys.argv[1] == "--verify":
ok = verify(sys.argv[2])
sys.exit(0 if ok else 1)
if len(sys.argv) >= 2:
jsonl_path = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) >= 3 else None
md_path, json_path = save(jsonl_path, output_dir)
print(f"Saved:\n {md_path}\n {json_path}")
return
if not sys.stdin.isatty():
try:
hook_data = json.load(sys.stdin)
except json.JSONDecodeError as exc:
print(f"save-transcript: bad hook JSON: {exc}", file=sys.stderr)
sys.exit(1)
jsonl_path = hook_data.get("transcript_path")
if not jsonl_path or not os.path.exists(jsonl_path):
session_id = hook_data.get("session_id", "")
jsonl_path = find_jsonl_by_session(session_id) if session_id else None
if not jsonl_path or not os.path.exists(jsonl_path):
print("save-transcript: cannot locate JSONL for this session", file=sys.stderr)
sys.exit(1)
cwd = hook_data.get("cwd") or os.getcwd()
output_dir = os.path.join(cwd, "transcripts")
try:
md_path, json_path = save(jsonl_path, output_dir)
print(f"Transcript saved:\n {md_path}\n {json_path}")
except Exception as exc:
print(f"save-transcript ERROR: {exc}", file=sys.stderr)
sys.exit(1)
return
print(__doc__)
sys.exit(1)
if __name__ == "__main__":
main()
2. Register the Stop hook
Claude Code runs hooks defined in ~/.claude/settings.json. The Stop event fires each time Claude finishes a response turn.
Edit (or create) ~/.claude/settings.json:
{
"hooks": {
"Stop": [
{
"hooks": [
{
"type": "command",
"command": "python3 /YOUR_HOME/.claude/scripts/save-transcript.py"
}
]
}
]
}
}
Replace /YOUR_HOME with your actual home directory (e.g. /Users/alice). Using ~ in the command path does not expand reliably in hooks.
If settings.json already has content, merge the "hooks" key into the existing object — don't replace the whole file.
3. (Optional) Add a reminder to CLAUDE.md
Add this section to ~/.claude/CLAUDE.md so Claude knows transcripts are being saved:
## Transcripts
Every session is auto-saved to `transcripts/` in the project's working directory via the `Stop` hook.
File naming: `<project>-<task>-<yyyymmdd>-<hhmmss>.<ext>`
- `.json` — all JSONL records as a JSON array (100% fidelity, SHA-256 verified)
- `.md` — human-readable markdown (user + assistant turns, tool calls shown)
Manual save:
python3 ~/.claude/scripts/save-transcript.py <jsonl_path> [output_dir]
Verify saved transcript against source:
python3 ~/.claude/scripts/save-transcript.py --verify <transcript.json>
Manual usage
Save any session by JSONL path:
python3 ~/.claude/scripts/save-transcript.py \
~/.claude/projects/-Users-alice-myproject/abc123.jsonl
Save to a custom directory:
python3 ~/.claude/scripts/save-transcript.py \
~/.claude/projects/.../abc123.jsonl \
/path/to/output/dir
Verify a saved transcript has not drifted from its source:
python3 ~/.claude/scripts/save-transcript.py --verify \
myproject/transcripts/myproject-some-task-20260222-114800.json
# → [OK] ... or [MISMATCH] ...
How it works
Claude Code stores every session as a .jsonl file at:
~/.claude/projects/<encoded-cwd>/<session-uuid>.jsonl
where <encoded-cwd> is the project path with / replaced by -.
Each line is a JSON record of type user, assistant, queue-operation, file-history-snapshot, etc. The script:
- Reads all records from the source JSONL.
- Writes them as a JSON array to
<project>/transcripts/<name>.json. - Computes a SHA-256 hash of the canonical (sort-keys) JSON for both source and saved file, and raises an error if they differ.
- Renders a
.mdfile from theuserandassistantrecords, stripping injected system tags and formatting tool calls as fenced code blocks.
The Stop hook passes a JSON object on stdin with at minimum session_id and (in recent Claude Code versions) transcript_path. The script falls back to scanning ~/.claude/projects/ by session ID if transcript_path is absent.
Notes
- One file per session start, overwritten each turn. The timestamp in the filename comes from the session's first message, so all saves within a session share the same base name. The final state after the last turn is what remains.
- Python 3.10+ required (uses
str | Noneunion syntax). Test withpython3 --version. - No third-party dependencies — stdlib only.
- The hook fires on every assistant turn, not just at explicit session close, so transcripts are incrementally up to date even if a session is force-quit.
TODO
1. Security — encryption at rest
Transcripts are currently saved as plain text JSON and Markdown. For sessions that include sensitive context (credentials, private code, personal data), this is a risk on shared or unencrypted disks.
Options to explore:
- Symmetric encryption at save time — encrypt the
.jsonand.mdfiles with a key derived from a passphrase (e.g. viacryptography/Fernet, orage). Requires a decryption step before viewing. - Selective redaction — pre-process records before saving: strip tool results that match patterns (API keys, tokens, file paths outside the project), log a redaction marker in their place.
- Disk-level encryption — rely on macOS FileVault or a dedicated encrypted volume for the
transcripts/directory. Simpler operationally, but doesn't protect against an already-unlocked session. - Opt-out per project — check for a
.no-transcriptmarker file in the project root and skip saving if found. Useful for projects where you never want anything persisted.
Key question to answer: is the threat model "protect files if the laptop is stolen" (disk encryption is enough) or "protect against an attacker with filesystem access to a running system" (requires key management)?
2. Interval saves and crash recovery
The current design saves on every Stop event (each assistant turn), which gives reasonable durability — but a very long single turn (e.g. a multi-minute agentic task) won't be checkpointed mid-flight, and force-quitting before Claude responds at all will leave the last user message unsaved.
Options to explore:
- Periodic background save — a
launchdplist (macOS) orcronjob that runs the save script every N minutes, scanning~/.claude/projects/for JSONL files newer than the last saved transcript. This is the most robust recovery path for power interruptions. PreToolUsehook checkpoint — fire a lightweight checkpoint save before each tool call. Already partially covered by the current approach (tool results appear in subsequent turns), but worth testing for long agentic sessions.- Append-only saves — instead of overwriting the same filename, write a new file each save cycle with an incrementing suffix or current timestamp. Lets you reconstruct sessions from partial saves. Trade-off: many small files.
- Signal handler in the script — trap
SIGTERM/SIGINTand flush a partial save before exit. Only helps if the process is killed cleanly.
Worth testing: how large do JSONL files get in a 2-hour agentic session? That determines whether the "overwrite on each turn" approach is fast enough not to lag behind the session.
3. Automatic git commit of transcripts
Persisting transcripts in a git repository gives versioned history, remote backup, and a searchable audit trail.
Options to explore:
- Dedicated transcripts repo — a separate bare repo (local or remote) that only holds transcripts. The save script commits and pushes after each save. Clean separation from project code; no risk of transcript noise in project history.
- Subdirectory in the project repo — commit
transcripts/as part of the existing project. Simpler, but means transcript commits appear in the project log. Mitigate with a branch (transcripts) or a.gitattributesrule to exclude fromgit log --oneline. - Post-save hook extension — extend
save-transcript.pyto optionally rungit -C <transcripts_dir> add . && git commit -m "transcript: <base_name>"after a successful save. Add a--git-pushflag to also push to a remote. - Rclone / cloud sync — if the transcripts directory is synced to S3, Backblaze, or similar via
rclone, git may be unnecessary. Simpler for backup; loses diff history.
Key decisions: (a) same repo or separate, (b) commit on every turn or only at session end, (c) whether to push automatically (requires network access and auth on every save, which will slow the hook).