feat: add memory injection audit trail for effectiveness tracking

2026-04-04 07:36:55 +02:00
parent 8af9db2c12
commit dd82944529
3 changed files with 193 additions and 2 deletions
--- a/lib/archeflow-memory.sh
+++ b/lib/archeflow-memory.sh
@@ -201,6 +201,16 @@ cmd_inject() {
  local domain="${1:-}"
  local archetype="${2:-}"

+  # Parse optional --audit <run_id>
+  local audit_run_id=""
+  shift 2 2>/dev/null || true
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --audit) audit_run_id="$2"; shift 2 ;;
+      *) shift ;;
+    esac
+  done
+
  if [[ ! -f "$LESSONS_FILE" ]]; then
    return 0
  fi
@@ -237,14 +247,118 @@ cmd_inject() {
    return 0
  fi

+  # Collect injected lesson IDs for audit
+  local injected_ids=()
+
  echo "## Known Issues (from past runs)"
  while IFS= read -r lesson; do
-    local desc freq src
+    local desc freq src lid
    desc=$(echo "$lesson" | jq -r '.description')
    freq=$(echo "$lesson" | jq -r '.frequency')
    src=$(echo "$lesson" | jq -r '.source')
+    lid=$(echo "$lesson" | jq -r '.id')
+    injected_ids+=("$lid")
    echo "- ${desc} [seen ${freq}x, ${src}]"
  done <<< "$lessons"
+
+  # Write audit record if --audit was passed
+  if [[ -n "$audit_run_id" && ${#injected_ids[@]} -gt 0 ]]; then
+    ensure_dir
+    local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
+    local ids_json
+    ids_json=$(printf '%s\n' "${injected_ids[@]}" | jq -R . | jq -sc .)
+    jq -cn \
+      --arg ts "$(now_ts)" \
+      --arg run_id "$audit_run_id" \
+      --arg domain "$domain" \
+      --arg archetype "$archetype" \
+      --argjson lessons_injected "$ids_json" \
+      --argjson lesson_count "${#injected_ids[@]}" \
+      '{ts:$ts,run_id:$run_id,domain:$domain,archetype:$archetype,lessons_injected:$lessons_injected,lesson_count:$lesson_count}' \
+      >> "$AUDIT_FILE"
+  fi
+}
+
+cmd_audit_check() {
+  local run_id="${1:?Usage: $0 audit-check <run_id>}"
+  local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
+  local EVENTS_FILE=".archeflow/events/${run_id}.jsonl"
+
+  if [[ ! -f "$AUDIT_FILE" ]]; then
+    echo "No audit records found." >&2
+    return 0
+  fi
+
+  if [[ ! -f "$EVENTS_FILE" ]]; then
+    echo "No events file found for run $run_id." >&2
+    return 0
+  fi
+
+  # Get lessons injected for this run
+  local injected
+  injected=$(jq -c "select(.run_id == \"$run_id\")" "$AUDIT_FILE" 2>/dev/null || true)
+
+  if [[ -z "$injected" ]]; then
+    echo "No audit records for run $run_id." >&2
+    return 0
+  fi
+
+  # Get all finding descriptions from review.verdict events
+  local finding_descs
+  finding_descs=$(jq -r '
+    select(.type == "review.verdict") |
+    .data.findings[]? | .description // empty
+  ' "$EVENTS_FILE" 2>/dev/null | tr '[:upper:]' '[:lower:]' || true)
+
+  # For each injected lesson, check if findings match the lesson's topic
+  local lesson_ids
+  lesson_ids=$(echo "$injected" | jq -r '.lessons_injected[]' 2>/dev/null | sort -u)
+
+  while IFS= read -r lid; do
+    [[ -z "$lid" ]] && continue
+
+    # Get lesson description
+    local lesson_desc
+    lesson_desc=$(jq -r "select(.id == \"$lid\") | .description" "$LESSONS_FILE" 2>/dev/null | head -1)
+    [[ -z "$lesson_desc" ]] && continue
+
+    # Check keyword overlap between lesson and findings
+    local lesson_tokens finding_overlap
+    lesson_tokens=$(tokenize "$lesson_desc")
+    finding_overlap=0
+
+    if [[ -n "$finding_descs" ]]; then
+      local finding_tokens
+      finding_tokens=$(echo "$finding_descs" | tr -cs '[:alnum:]' '\n' | awk 'length >= 3' | sort -u)
+      local common
+      common=$(comm -12 <(echo "$lesson_tokens") <(echo "$finding_tokens") | wc -l)
+      local total
+      total=$(echo "$lesson_tokens" | wc -l)
+      if [[ "$total" -gt 0 ]]; then
+        finding_overlap=$(( common * 100 / total ))
+      fi
+    fi
+
+    local effectiveness
+    if [[ "$finding_overlap" -ge 30 ]]; then
+      effectiveness="ineffective"  # Issue repeated despite lesson injection
+    else
+      effectiveness="helpful"  # Issue was prevented (no matching finding)
+    fi
+
+    # Append result to audit.jsonl
+    jq -cn \
+      --arg ts "$(now_ts)" \
+      --arg run_id "$run_id" \
+      --arg lesson_id "$lid" \
+      --arg lesson_desc "$lesson_desc" \
+      --arg effectiveness "$effectiveness" \
+      --argjson overlap "$finding_overlap" \
+      '{ts:$ts,run_id:$run_id,type:"effectiveness_check",lesson_id:$lesson_id,lesson_desc:$lesson_desc,effectiveness:$effectiveness,keyword_overlap_pct:$overlap}' \
+      >> "$AUDIT_FILE"
+
+    echo "[archeflow-memory] Lesson $lid ($effectiveness): $lesson_desc" >&2
+  done <<< "$lesson_ids"
 }

 cmd_add() {
@@ -383,11 +497,12 @@ if [[ $# -lt 1 ]]; then
  echo "" >&2
  echo "Commands:" >&2
  echo "  extract <events.jsonl>       Extract lessons from a completed run" >&2
-  echo "  inject <domain> <archetype>  Output relevant lessons for injection" >&2
+  echo "  inject <domain> <archetype> [--audit <run_id>]  Output relevant lessons for injection" >&2
  echo "  add <type> <description>     Manually add a lesson" >&2
  echo "  list                         List all active lessons" >&2
  echo "  decay                        Apply decay to all lessons" >&2
  echo "  forget <id>                  Archive a lesson by ID" >&2
+  echo "  audit-check <run_id>         Check lesson effectiveness for a run" >&2
  exit 1
 fi

@@ -416,6 +531,10 @@ case "$COMMAND" in
    [[ $# -lt 1 ]] && { echo "Usage: $0 forget <id>" >&2; exit 1; }
    cmd_forget "$1"
    ;;
+  audit-check)
+    [[ $# -lt 1 ]] && { echo "Usage: $0 audit-check <run_id>" >&2; exit 1; }
+    cmd_audit_check "$1"
+    ;;
  *)
    echo "Unknown command: $COMMAND" >&2
    exit 1
--- a/skills/memory/SKILL.md
+++ b/skills/memory/SKILL.md
@@ -215,6 +215,59 @@ Moves the lesson to `archive.jsonl` regardless of frequency.
 | Before agent spawn (run start) | Inject relevant lessons | `archeflow-memory.sh inject <domain> <archetype>` |
 | User command | Add/list/forget lessons | `archeflow-memory.sh add/list/forget` |

+## Audit Trail
+
+Track which lessons are injected into each run and whether they were effective.
+
+### Storage
+
+```
+.archeflow/memory/audit.jsonl    # Append-only audit log
+```
+
+### Injection Audit Record
+
+When `--audit <run_id>` is passed to the `inject` command, an audit record is written:
+
+```jsonl
+{"ts":"2026-04-04T10:00:00Z","run_id":"2026-04-04-auth-fix","domain":"code","archetype":"","lessons_injected":["m-001","m-003"],"lesson_count":2}
+```
+
+Usage:
+```bash
+./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID"
+```
+
+### Effectiveness Check
+
+After a run completes, check whether injected lessons prevented issues:
+
+```bash
+./lib/archeflow-memory.sh audit-check <run_id>
+```
+
+This command:
+1. Reads `audit.jsonl` for lessons injected in the given run
+2. Reads the run's event file for `review.verdict` events
+3. For each injected lesson, checks keyword overlap between the lesson's description and review findings
+4. **No matching finding** = `helpful` (the lesson likely prevented the issue)
+5. **Matching finding** = `ineffective` (the issue repeated despite the lesson being injected)
+6. Appends effectiveness results to `audit.jsonl`
+
+### Effectiveness Over Time
+
+By querying `audit.jsonl` for effectiveness records, you can measure:
+- Which lessons consistently prevent issues (high `helpful` count)
+- Which lessons are not working (high `ineffective` count — consider rewording or removing)
+- Overall memory system ROI (ratio of helpful to ineffective across all runs)
+
+```bash
+# Count effectiveness per lesson
+jq -r 'select(.type == "effectiveness_check") | [.lesson_id, .effectiveness] | @tsv' .archeflow/memory/audit.jsonl | sort | uniq -c
+```
+
+---
+
 ## Design Principles

 1. **Append-only storage.** `lessons.jsonl` is append-only during writes; decay rewrites the file in place but preserves all data (archived lessons move to `archive.jsonl`).
--- a/skills/run/SKILL.md
+++ b/skills/run/SKILL.md
@@ -63,6 +63,25 @@ After emitting `run.start`, record `SEQ_RUN_START=1`.

 If `--start-from` is specified, verify that the required prior artifacts exist in `.archeflow/artifacts/${RUN_ID}/` before skipping phases. If missing, abort with an error.

+#### 0b. Memory Injection
+
+Load cross-run memory lessons and inject into agent prompts. Use `--audit` to track which lessons were injected for this run:
+
+```bash
+# Load cross-run memory for this domain (with audit trail)
+MEMORY_LESSONS=$(./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID")
+
+# Inject into Explorer/Creator prompts if non-empty
+if [[ -n "$MEMORY_LESSONS" ]]; then
+  EXPLORER_PROMPT="${EXPLORER_PROMPT}
+
+${MEMORY_LESSONS}"
+  CREATOR_PROMPT="${CREATOR_PROMPT}
+
+${MEMORY_LESSONS}"
+fi
+```
+
 ---

 ### 1. Plan Phase