feat: add memory injection audit trail for effectiveness tracking
This commit is contained in:
@@ -201,6 +201,16 @@ cmd_inject() {
|
||||
local domain="${1:-}"
|
||||
local archetype="${2:-}"
|
||||
|
||||
# Parse optional --audit <run_id>
|
||||
local audit_run_id=""
|
||||
shift 2 2>/dev/null || true
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--audit) audit_run_id="$2"; shift 2 ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f "$LESSONS_FILE" ]]; then
|
||||
return 0
|
||||
fi
|
||||
@@ -237,14 +247,118 @@ cmd_inject() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Collect injected lesson IDs for audit
|
||||
local injected_ids=()
|
||||
|
||||
echo "## Known Issues (from past runs)"
|
||||
while IFS= read -r lesson; do
|
||||
local desc freq src
|
||||
local desc freq src lid
|
||||
desc=$(echo "$lesson" | jq -r '.description')
|
||||
freq=$(echo "$lesson" | jq -r '.frequency')
|
||||
src=$(echo "$lesson" | jq -r '.source')
|
||||
lid=$(echo "$lesson" | jq -r '.id')
|
||||
injected_ids+=("$lid")
|
||||
echo "- ${desc} [seen ${freq}x, ${src}]"
|
||||
done <<< "$lessons"
|
||||
|
||||
# Write audit record if --audit was passed
|
||||
if [[ -n "$audit_run_id" && ${#injected_ids[@]} -gt 0 ]]; then
|
||||
ensure_dir
|
||||
local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
|
||||
local ids_json
|
||||
ids_json=$(printf '%s\n' "${injected_ids[@]}" | jq -R . | jq -sc .)
|
||||
jq -cn \
|
||||
--arg ts "$(now_ts)" \
|
||||
--arg run_id "$audit_run_id" \
|
||||
--arg domain "$domain" \
|
||||
--arg archetype "$archetype" \
|
||||
--argjson lessons_injected "$ids_json" \
|
||||
--argjson lesson_count "${#injected_ids[@]}" \
|
||||
'{ts:$ts,run_id:$run_id,domain:$domain,archetype:$archetype,lessons_injected:$lessons_injected,lesson_count:$lesson_count}' \
|
||||
>> "$AUDIT_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_audit_check() {
|
||||
local run_id="${1:?Usage: $0 audit-check <run_id>}"
|
||||
local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
|
||||
local EVENTS_FILE=".archeflow/events/${run_id}.jsonl"
|
||||
|
||||
if [[ ! -f "$AUDIT_FILE" ]]; then
|
||||
echo "No audit records found." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ ! -f "$EVENTS_FILE" ]]; then
|
||||
echo "No events file found for run $run_id." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get lessons injected for this run
|
||||
local injected
|
||||
injected=$(jq -c "select(.run_id == \"$run_id\")" "$AUDIT_FILE" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$injected" ]]; then
|
||||
echo "No audit records for run $run_id." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get all finding descriptions from review.verdict events
|
||||
local finding_descs
|
||||
finding_descs=$(jq -r '
|
||||
select(.type == "review.verdict") |
|
||||
.data.findings[]? | .description // empty
|
||||
' "$EVENTS_FILE" 2>/dev/null | tr '[:upper:]' '[:lower:]' || true)
|
||||
|
||||
# For each injected lesson, check if findings match the lesson's topic
|
||||
local lesson_ids
|
||||
lesson_ids=$(echo "$injected" | jq -r '.lessons_injected[]' 2>/dev/null | sort -u)
|
||||
|
||||
while IFS= read -r lid; do
|
||||
[[ -z "$lid" ]] && continue
|
||||
|
||||
# Get lesson description
|
||||
local lesson_desc
|
||||
lesson_desc=$(jq -r "select(.id == \"$lid\") | .description" "$LESSONS_FILE" 2>/dev/null | head -1)
|
||||
[[ -z "$lesson_desc" ]] && continue
|
||||
|
||||
# Check keyword overlap between lesson and findings
|
||||
local lesson_tokens finding_overlap
|
||||
lesson_tokens=$(tokenize "$lesson_desc")
|
||||
finding_overlap=0
|
||||
|
||||
if [[ -n "$finding_descs" ]]; then
|
||||
local finding_tokens
|
||||
finding_tokens=$(echo "$finding_descs" | tr -cs '[:alnum:]' '\n' | awk 'length >= 3' | sort -u)
|
||||
local common
|
||||
common=$(comm -12 <(echo "$lesson_tokens") <(echo "$finding_tokens") | wc -l)
|
||||
local total
|
||||
total=$(echo "$lesson_tokens" | wc -l)
|
||||
if [[ "$total" -gt 0 ]]; then
|
||||
finding_overlap=$(( common * 100 / total ))
|
||||
fi
|
||||
fi
|
||||
|
||||
local effectiveness
|
||||
if [[ "$finding_overlap" -ge 30 ]]; then
|
||||
effectiveness="ineffective" # Issue repeated despite lesson injection
|
||||
else
|
||||
effectiveness="helpful" # Issue was prevented (no matching finding)
|
||||
fi
|
||||
|
||||
# Append result to audit.jsonl
|
||||
jq -cn \
|
||||
--arg ts "$(now_ts)" \
|
||||
--arg run_id "$run_id" \
|
||||
--arg lesson_id "$lid" \
|
||||
--arg lesson_desc "$lesson_desc" \
|
||||
--arg effectiveness "$effectiveness" \
|
||||
--argjson overlap "$finding_overlap" \
|
||||
'{ts:$ts,run_id:$run_id,type:"effectiveness_check",lesson_id:$lesson_id,lesson_desc:$lesson_desc,effectiveness:$effectiveness,keyword_overlap_pct:$overlap}' \
|
||||
>> "$AUDIT_FILE"
|
||||
|
||||
echo "[archeflow-memory] Lesson $lid ($effectiveness): $lesson_desc" >&2
|
||||
done <<< "$lesson_ids"
|
||||
}
|
||||
|
||||
cmd_add() {
|
||||
@@ -383,11 +497,12 @@ if [[ $# -lt 1 ]]; then
|
||||
echo "" >&2
|
||||
echo "Commands:" >&2
|
||||
echo " extract <events.jsonl> Extract lessons from a completed run" >&2
|
||||
echo " inject <domain> <archetype> Output relevant lessons for injection" >&2
|
||||
echo " inject <domain> <archetype> [--audit <run_id>] Output relevant lessons for injection" >&2
|
||||
echo " add <type> <description> Manually add a lesson" >&2
|
||||
echo " list List all active lessons" >&2
|
||||
echo " decay Apply decay to all lessons" >&2
|
||||
echo " forget <id> Archive a lesson by ID" >&2
|
||||
echo " audit-check <run_id> Check lesson effectiveness for a run" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -416,6 +531,10 @@ case "$COMMAND" in
|
||||
[[ $# -lt 1 ]] && { echo "Usage: $0 forget <id>" >&2; exit 1; }
|
||||
cmd_forget "$1"
|
||||
;;
|
||||
audit-check)
|
||||
[[ $# -lt 1 ]] && { echo "Usage: $0 audit-check <run_id>" >&2; exit 1; }
|
||||
cmd_audit_check "$1"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: $COMMAND" >&2
|
||||
exit 1
|
||||
|
||||
@@ -215,6 +215,59 @@ Moves the lesson to `archive.jsonl` regardless of frequency.
|
||||
| Before agent spawn (run start) | Inject relevant lessons | `archeflow-memory.sh inject <domain> <archetype>` |
|
||||
| User command | Add/list/forget lessons | `archeflow-memory.sh add/list/forget` |
|
||||
|
||||
## Audit Trail
|
||||
|
||||
Track which lessons are injected into each run and whether they were effective.
|
||||
|
||||
### Storage
|
||||
|
||||
```
|
||||
.archeflow/memory/audit.jsonl # Append-only audit log
|
||||
```
|
||||
|
||||
### Injection Audit Record
|
||||
|
||||
When `--audit <run_id>` is passed to the `inject` command, an audit record is written:
|
||||
|
||||
```jsonl
|
||||
{"ts":"2026-04-04T10:00:00Z","run_id":"2026-04-04-auth-fix","domain":"code","archetype":"","lessons_injected":["m-001","m-003"],"lesson_count":2}
|
||||
```
|
||||
|
||||
Usage:
|
||||
```bash
|
||||
./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID"
|
||||
```
|
||||
|
||||
### Effectiveness Check
|
||||
|
||||
After a run completes, check whether injected lessons prevented issues:
|
||||
|
||||
```bash
|
||||
./lib/archeflow-memory.sh audit-check <run_id>
|
||||
```
|
||||
|
||||
This command:
|
||||
1. Reads `audit.jsonl` for lessons injected in the given run
|
||||
2. Reads the run's event file for `review.verdict` events
|
||||
3. For each injected lesson, checks keyword overlap between the lesson's description and review findings
|
||||
4. **No matching finding** = `helpful` (the lesson likely prevented the issue)
|
||||
5. **Matching finding** = `ineffective` (the issue repeated despite the lesson being injected)
|
||||
6. Appends effectiveness results to `audit.jsonl`
|
||||
|
||||
### Effectiveness Over Time
|
||||
|
||||
By querying `audit.jsonl` for effectiveness records, you can measure:
|
||||
- Which lessons consistently prevent issues (high `helpful` count)
|
||||
- Which lessons are not working (high `ineffective` count — consider rewording or removing)
|
||||
- Overall memory system ROI (ratio of helpful to ineffective across all runs)
|
||||
|
||||
```bash
|
||||
# Count effectiveness per lesson
|
||||
jq -r 'select(.type == "effectiveness_check") | [.lesson_id, .effectiveness] | @tsv' .archeflow/memory/audit.jsonl | sort | uniq -c
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Append-only storage.** `lessons.jsonl` is append-only during writes; decay rewrites the file in place but preserves all data (archived lessons move to `archive.jsonl`).
|
||||
|
||||
@@ -63,6 +63,25 @@ After emitting `run.start`, record `SEQ_RUN_START=1`.
|
||||
|
||||
If `--start-from` is specified, verify that the required prior artifacts exist in `.archeflow/artifacts/${RUN_ID}/` before skipping phases. If missing, abort with an error.
|
||||
|
||||
#### 0b. Memory Injection
|
||||
|
||||
Load cross-run memory lessons and inject into agent prompts. Use `--audit` to track which lessons were injected for this run:
|
||||
|
||||
```bash
|
||||
# Load cross-run memory for this domain (with audit trail)
|
||||
MEMORY_LESSONS=$(./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID")
|
||||
|
||||
# Inject into Explorer/Creator prompts if non-empty
|
||||
if [[ -n "$MEMORY_LESSONS" ]]; then
|
||||
EXPLORER_PROMPT="${EXPLORER_PROMPT}
|
||||
|
||||
${MEMORY_LESSONS}"
|
||||
CREATOR_PROMPT="${CREATOR_PROMPT}
|
||||
|
||||
${MEMORY_LESSONS}"
|
||||
fi
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 1. Plan Phase
|
||||
|
||||
Reference in New Issue
Block a user