320 lines
11 KiB
Python
Executable File
320 lines
11 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Save a Claude Code chat session to {project}/transcripts/ as:
|
|
<project>-<task>-<yyyymmdd>-<hhmmss>.json — all JSONL records (100% fidelity)
|
|
<project>-<task>-<yyyymmdd>-<hhmmss>.md — human-readable markdown
|
|
|
|
Usage:
|
|
python3 save-transcript.py <jsonl_path> [output_dir] # manual
|
|
python3 save-transcript.py --verify <json_path> # verify saved JSON vs source
|
|
(stdin) # called from Stop hook
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def slugify(text: str, max_len: int = 40) -> str:
|
|
text = text.lower()
|
|
text = re.sub(r"[^\w\s-]", "", text)
|
|
text = re.sub(r"[\s_]+", "-", text)
|
|
text = text.strip("-")
|
|
return text[:max_len].rstrip("-")
|
|
|
|
|
|
def load_jsonl(path: str) -> list:
|
|
records = []
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
for line in fh:
|
|
line = line.strip()
|
|
if line:
|
|
try:
|
|
records.append(json.loads(line))
|
|
except json.JSONDecodeError as exc:
|
|
print(f" [warn] bad JSON line: {exc}", file=sys.stderr)
|
|
return records
|
|
|
|
|
|
def find_jsonl_by_session(session_id: str) -> str | None:
|
|
projects_dir = Path.home() / ".claude" / "projects"
|
|
for p in projects_dir.rglob(f"{session_id}.jsonl"):
|
|
return str(p)
|
|
return None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Content extraction
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def content_to_text(content) -> str:
|
|
"""Flatten message content (str or list of blocks) to plain text / md."""
|
|
if isinstance(content, str):
|
|
return content
|
|
if not isinstance(content, list):
|
|
return ""
|
|
parts = []
|
|
for block in content:
|
|
if not isinstance(block, dict):
|
|
continue
|
|
t = block.get("type", "")
|
|
if t == "text":
|
|
parts.append(block.get("text", ""))
|
|
elif t == "tool_use":
|
|
name = block.get("name", "tool")
|
|
inp = json.dumps(block.get("input", {}), indent=2, ensure_ascii=False)
|
|
parts.append(f"**Tool call:** `{name}`\n```json\n{inp}\n```")
|
|
elif t == "tool_result":
|
|
inner = block.get("content", "")
|
|
if isinstance(inner, list):
|
|
inner = "\n".join(
|
|
b.get("text", "")
|
|
for b in inner
|
|
if isinstance(b, dict) and b.get("type") == "text"
|
|
)
|
|
status = " *(error)*" if block.get("is_error") else ""
|
|
parts.append(f"**Tool result**{status}\n```\n{inner}\n```")
|
|
return "\n\n".join(p for p in parts if p)
|
|
|
|
|
|
def strip_system_tags(text: str) -> str:
|
|
"""Remove <system-reminder>, <ide_*>, and similar injected tags."""
|
|
text = re.sub(r"<system-reminder>.*?</system-reminder>", "", text, flags=re.DOTALL)
|
|
text = re.sub(r"<ide_[^>]*>.*?</ide_[^>]*>", "", text, flags=re.DOTALL)
|
|
text = re.sub(r"<system-reminder[^/]*/?>", "", text)
|
|
return text.strip()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Metadata derivation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def derive_project_name(cwd: str) -> str:
|
|
return slugify(Path(cwd.rstrip("/")).name or "project")
|
|
|
|
|
|
def derive_task_name(records: list) -> str:
|
|
for r in records:
|
|
if r.get("type") != "user":
|
|
continue
|
|
content = r.get("message", {}).get("content", "")
|
|
text = content_to_text(content)
|
|
text = strip_system_tags(text)
|
|
text = re.sub(r"\[Request interrupted[^\]]*\]", "", text)
|
|
text = text.strip()
|
|
if len(text) > 5:
|
|
first_line = text.split("\n")[0].strip()
|
|
return slugify(first_line) or "session"
|
|
return "session"
|
|
|
|
|
|
def derive_timestamp(records: list) -> datetime:
|
|
for r in records:
|
|
ts = r.get("timestamp", "")
|
|
if ts:
|
|
try:
|
|
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
except ValueError:
|
|
pass
|
|
return datetime.now(timezone.utc)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Markdown rendering
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def render_markdown(records: list, session_id: str, cwd: str) -> str:
|
|
messages = [r for r in records if r.get("type") in ("user", "assistant")]
|
|
lines = [
|
|
f"# Transcript: {session_id}",
|
|
"",
|
|
f"**Project:** {cwd} ",
|
|
f"**Messages:** {len(messages)}",
|
|
"",
|
|
"---",
|
|
"",
|
|
]
|
|
for msg in messages:
|
|
role = "User" if msg.get("type") == "user" else "Assistant"
|
|
ts = msg.get("timestamp", "")
|
|
content = msg.get("message", {}).get("content", "")
|
|
text = content_to_text(content)
|
|
if role == "User":
|
|
text = strip_system_tags(text)
|
|
text = text.strip()
|
|
if not text:
|
|
continue
|
|
header = f"### {role}"
|
|
if ts:
|
|
header += f" *({ts})*"
|
|
lines.append(header)
|
|
lines.append("")
|
|
lines.append(text)
|
|
lines.append("")
|
|
lines.append("---")
|
|
lines.append("")
|
|
return "\n".join(lines)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Save
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def save(jsonl_path: str, output_dir: str | None) -> tuple[str, str]:
|
|
records = load_jsonl(jsonl_path)
|
|
if not records:
|
|
raise ValueError(f"No records found in {jsonl_path}")
|
|
|
|
# Find session metadata from first record that has it
|
|
meta = next(
|
|
(r for r in records if r.get("sessionId") or r.get("cwd")),
|
|
records[0],
|
|
)
|
|
session_id = meta.get("sessionId") or Path(jsonl_path).stem
|
|
cwd = meta.get("cwd") or os.getcwd()
|
|
|
|
project = derive_project_name(cwd)
|
|
task = derive_task_name(records)
|
|
dt = derive_timestamp(records)
|
|
ts_str = dt.strftime("%Y%m%d-%H%M%S")
|
|
base = f"{project}-{task}-{ts_str}"
|
|
|
|
if output_dir is None:
|
|
output_dir = os.path.join(cwd, "transcripts")
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
json_path = os.path.join(output_dir, f"{base}.json")
|
|
md_path = os.path.join(output_dir, f"{base}.md")
|
|
|
|
# JSON: all records (100% fidelity — identical content to source JSONL)
|
|
with open(json_path, "w", encoding="utf-8") as fh:
|
|
json.dump(records, fh, indent=2, ensure_ascii=False)
|
|
|
|
# Verify: re-read and compare checksums
|
|
source_hash = _jsonl_hash(jsonl_path)
|
|
saved_hash = _json_array_hash(json_path)
|
|
if source_hash != saved_hash:
|
|
raise RuntimeError(
|
|
f"Fidelity check FAILED: source={source_hash} saved={saved_hash}\n"
|
|
f" Source: {jsonl_path}\n Saved: {json_path}"
|
|
)
|
|
|
|
# Markdown: human-readable rendering
|
|
md = render_markdown(records, session_id, cwd)
|
|
with open(md_path, "w", encoding="utf-8") as fh:
|
|
fh.write(md)
|
|
|
|
return md_path, json_path
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fidelity verification
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _jsonl_hash(path: str) -> str:
|
|
"""Hash the ordered sequence of parsed records from a JSONL file."""
|
|
records = load_jsonl(path)
|
|
# Normalise to canonical JSON so key order doesn't matter
|
|
canonical = json.dumps(records, sort_keys=True, ensure_ascii=False)
|
|
return hashlib.sha256(canonical.encode()).hexdigest()
|
|
|
|
|
|
def _json_array_hash(path: str) -> str:
|
|
"""Hash the record array stored in a .json transcript file."""
|
|
with open(path, "r", encoding="utf-8") as fh:
|
|
records = json.load(fh)
|
|
canonical = json.dumps(records, sort_keys=True, ensure_ascii=False)
|
|
return hashlib.sha256(canonical.encode()).hexdigest()
|
|
|
|
|
|
def verify(json_path: str) -> bool:
|
|
"""Verify a saved .json transcript against its source JSONL."""
|
|
with open(json_path, "r", encoding="utf-8") as fh:
|
|
records = json.load(fh)
|
|
|
|
meta = next(
|
|
(r for r in records if r.get("sessionId")),
|
|
None,
|
|
)
|
|
if not meta:
|
|
print("ERROR: no sessionId found in saved transcript", file=sys.stderr)
|
|
return False
|
|
|
|
session_id = meta["sessionId"]
|
|
jsonl_path = find_jsonl_by_session(session_id)
|
|
if not jsonl_path:
|
|
print(f"ERROR: source JSONL not found for session {session_id}", file=sys.stderr)
|
|
return False
|
|
|
|
source_hash = _jsonl_hash(jsonl_path)
|
|
saved_hash = _json_array_hash(json_path)
|
|
ok = source_hash == saved_hash
|
|
|
|
status = "OK" if ok else "MISMATCH"
|
|
print(f"[{status}] {json_path}")
|
|
print(f" source: {jsonl_path}")
|
|
print(f" source hash: {source_hash}")
|
|
print(f" saved hash: {saved_hash}")
|
|
return ok
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
# --verify mode
|
|
if len(sys.argv) >= 3 and sys.argv[1] == "--verify":
|
|
ok = verify(sys.argv[2])
|
|
sys.exit(0 if ok else 1)
|
|
|
|
# Manual invocation
|
|
if len(sys.argv) >= 2:
|
|
jsonl_path = sys.argv[1]
|
|
output_dir = sys.argv[2] if len(sys.argv) >= 3 else None
|
|
md_path, json_path = save(jsonl_path, output_dir)
|
|
print(f"Saved:\n {md_path}\n {json_path}")
|
|
return
|
|
|
|
# Called from Stop hook (JSON on stdin)
|
|
if not sys.stdin.isatty():
|
|
try:
|
|
hook_data = json.load(sys.stdin)
|
|
except json.JSONDecodeError as exc:
|
|
print(f"save-transcript: bad hook JSON: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
jsonl_path = hook_data.get("transcript_path")
|
|
if not jsonl_path or not os.path.exists(jsonl_path):
|
|
session_id = hook_data.get("session_id", "")
|
|
jsonl_path = find_jsonl_by_session(session_id) if session_id else None
|
|
|
|
if not jsonl_path or not os.path.exists(jsonl_path):
|
|
print("save-transcript: cannot locate JSONL for this session", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
cwd = hook_data.get("cwd") or os.getcwd()
|
|
output_dir = os.path.join(cwd, "transcripts")
|
|
try:
|
|
md_path, json_path = save(jsonl_path, output_dir)
|
|
print(f"Transcript saved:\n {md_path}\n {json_path}")
|
|
except Exception as exc:
|
|
print(f"save-transcript ERROR: {exc}", file=sys.stderr)
|
|
sys.exit(1)
|
|
return
|
|
|
|
print(__doc__)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|