feat: add process logging with DAG-based event sourcing

Event-sourced orchestration logging: JSONL events with parent
relationships form a DAG for causal reconstruction of agent flows.
Includes bash event emitter (jq-based) and markdown report generator.
This commit is contained in:
2026-04-03 11:06:02 +02:00
parent 8dec44d199
commit 1753e69a9f
4 changed files with 605 additions and 1 deletions

72
lib/archeflow-event.sh Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/env bash
# archeflow-event.sh — Append a structured event to an ArcheFlow run's JSONL log.
#
# Usage: ./lib/archeflow-event.sh <run_id> <type> <phase> <agent> '<json_data>' [parent_seqs]
#
# Examples:
# ./lib/archeflow-event.sh 2026-04-03-der-huster run.start plan "" '{"task":"Write Der Huster"}'
# ./lib/archeflow-event.sh 2026-04-03-der-huster agent.complete plan creator '{"duration_ms":167522}' 2
# ./lib/archeflow-event.sh 2026-04-03-der-huster phase.transition do "" '{"from":"plan","to":"do"}' 3,4
# ./lib/archeflow-event.sh 2026-04-03-der-huster fix.applied act "" '{"source":"guardian"}' 8
#
# Parent seqs: comma-separated seq numbers of causal parent events (DAG).
# "2" → single parent [2]
# "3,4" → multiple parents [3,4] (fan-in)
# "" → root event []
#
# Events are appended to .archeflow/events/<run_id>.jsonl
# If the events directory doesn't exist, it is created automatically.
set -euo pipefail
if [[ $# -lt 4 ]]; then
echo "Usage: $0 <run_id> <type> <phase> <agent> [json_data] [parent_seqs]" >&2
exit 1
fi
RUN_ID="$1"
TYPE="$2"
PHASE="$3"
AGENT="$4"
DATA="${5:-"{}"}"
PARENT_RAW="${6:-}"
EVENTS_DIR=".archeflow/events"
EVENT_FILE="${EVENTS_DIR}/${RUN_ID}.jsonl"
mkdir -p "$EVENTS_DIR"
# Determine sequence number (count existing lines + 1)
if [[ -f "$EVENT_FILE" ]]; then
SEQ=$(( $(wc -l < "$EVENT_FILE") + 1 ))
else
SEQ=1
fi
TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
# Build parent array from comma-separated seq numbers
if [[ -z "$PARENT_RAW" ]]; then
PARENT_JSON="[]"
else
PARENT_JSON="[${PARENT_RAW}]"
fi
# Construct the event using jq for reliable JSON assembly
# Agent is passed as --arg (string), then converted to null if empty via jq expression
EVENT=$(jq -cn \
--arg ts "$TS" \
--arg run_id "$RUN_ID" \
--argjson seq "$SEQ" \
--argjson parent "$PARENT_JSON" \
--arg type "$TYPE" \
--arg phase "$PHASE" \
--arg agent_raw "$AGENT" \
--argjson data "$DATA" \
'{ts:$ts, run_id:$run_id, seq:$seq, parent:$parent, type:$type, phase:$phase, agent:(if $agent_raw == "" then null else $agent_raw end), data:$data}'
)
echo "$EVENT" >> "$EVENT_FILE"
# Print confirmation to stderr (non-intrusive)
echo "[archeflow-event] #${SEQ} ${TYPE} (${PHASE}/${AGENT:-_})" >&2

222
lib/archeflow-report.sh Executable file
View File

@@ -0,0 +1,222 @@
#!/usr/bin/env bash
# archeflow-report.sh — Generate a Markdown process report from ArcheFlow JSONL events.
#
# Usage: ./lib/archeflow-report.sh <events.jsonl> [--output <file.md>]
#
# Reads a JSONL event file and produces a structured Markdown report showing
# the full orchestration process: phases, decisions, reviews, fixes, metrics.
#
# Requires: jq
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <events.jsonl> [--output <file.md>]" >&2
exit 1
fi
EVENT_FILE="$1"
OUTPUT=""
if [[ "${2:-}" == "--output" && -n "${3:-}" ]]; then
OUTPUT="$3"
fi
if ! command -v jq &> /dev/null; then
echo "Error: jq is required but not installed." >&2
exit 1
fi
if [[ ! -f "$EVENT_FILE" ]]; then
echo "Error: Event file not found: $EVENT_FILE" >&2
exit 1
fi
# Helper: extract events by type
events_of_type() {
jq -c "select(.type == \"$1\")" "$EVENT_FILE"
}
# Extract run metadata
RUN_START=$(events_of_type "run.start" | head -1)
RUN_COMPLETE=$(events_of_type "run.complete" | head -1)
RUN_ID=$(echo "$RUN_START" | jq -r '.run_id // "unknown"')
TASK=$(echo "$RUN_START" | jq -r '.data.task // "unknown"')
WORKFLOW=$(echo "$RUN_START" | jq -r '.data.workflow // "unknown"')
TEAM=$(echo "$RUN_START" | jq -r '.data.team // "unknown"')
# Generate report
generate_report() {
cat <<HEADER
# Process Report: ${TASK}
> Auto-generated from ArcheFlow event log.
> Run: \`${RUN_ID}\` | Workflow: \`${WORKFLOW}\` | Team: \`${TEAM}\`
---
## Overview
HEADER
# Overview table from run.complete
if [[ -n "$RUN_COMPLETE" ]]; then
STATUS=$(echo "$RUN_COMPLETE" | jq -r '.data.status // "unknown"')
CYCLES=$(echo "$RUN_COMPLETE" | jq -r '.data.cycles // "?"')
AGENTS=$(echo "$RUN_COMPLETE" | jq -r '.data.agents_total // "?"')
FIXES=$(echo "$RUN_COMPLETE" | jq -r '.data.fixes_total // "?"')
SHADOWS=$(echo "$RUN_COMPLETE" | jq -r '.data.shadows // "0"')
DURATION_MS=$(echo "$RUN_COMPLETE" | jq -r '.data.duration_ms // "0"')
DURATION_MIN=$(( DURATION_MS / 60000 ))
cat <<TABLE
| Field | Value |
|-------|-------|
| **Status** | ${STATUS} |
| **PDCA Cycles** | ${CYCLES} |
| **Agents** | ${AGENTS} |
| **Fixes** | ${FIXES} |
| **Shadows** | ${SHADOWS} |
| **Duration** | ~${DURATION_MIN} min |
TABLE
fi
# Config from run.start
CONFIG=$(echo "$RUN_START" | jq -r '.data.config // empty')
if [[ -n "$CONFIG" ]]; then
echo "### Configuration"
echo '```json'
echo "$CONFIG" | jq .
echo '```'
echo ""
fi
echo "---"
echo ""
# Phase sections — iterate through phase transitions
echo "## Phases"
echo ""
CURRENT_PHASE=""
# Process all events chronologically
while IFS= read -r event; do
TYPE=$(echo "$event" | jq -r '.type')
PHASE=$(echo "$event" | jq -r '.phase')
AGENT=$(echo "$event" | jq -r '.agent // ""')
TS=$(echo "$event" | jq -r '.ts')
# Phase header on transition
if [[ "$PHASE" != "$CURRENT_PHASE" && "$TYPE" != "run.start" && "$TYPE" != "run.complete" ]]; then
CURRENT_PHASE="$PHASE"
PHASE_UPPER=$(echo "$PHASE" | tr '[:lower:]' '[:upper:]')
echo "### ${PHASE_UPPER}"
echo ""
fi
case "$TYPE" in
agent.complete)
ARCHETYPE=$(echo "$event" | jq -r '.data.archetype // .agent // "unknown"')
DURATION=$(echo "$event" | jq -r '.data.duration_ms // 0')
TOKENS=$(echo "$event" | jq -r '.data.tokens // 0')
SUMMARY=$(echo "$event" | jq -r '.data.summary // "no summary"')
ARTIFACTS=$(echo "$event" | jq -r '(.data.artifacts // []) | join(", ")')
DURATION_S=$(( DURATION / 1000 ))
echo "**${ARCHETYPE}** (${DURATION_S}s, ${TOKENS} tokens)"
echo ": ${SUMMARY}"
if [[ -n "$ARTIFACTS" ]]; then
echo ": Artifacts: ${ARTIFACTS}"
fi
echo ""
;;
decision)
WHAT=$(echo "$event" | jq -r '.data.what // "unknown"')
CHOSEN=$(echo "$event" | jq -r '.data.chosen // "unknown"')
RATIONALE=$(echo "$event" | jq -r '.data.rationale // ""')
echo "**Decision: ${WHAT}**"
echo ": Chosen: ${CHOSEN}"
if [[ -n "$RATIONALE" ]]; then
echo ": Rationale: ${RATIONALE}"
fi
# List alternatives if present
ALTS=$(echo "$event" | jq -r '(.data.alternatives // [])[] | " - ~" + .id + "~ " + .label + " — " + .reason_rejected')
if [[ -n "$ALTS" ]]; then
echo ": Rejected:"
echo "$ALTS"
fi
echo ""
;;
review.verdict)
ARCHETYPE=$(echo "$event" | jq -r '.data.archetype // .agent // "unknown"')
VERDICT=$(echo "$event" | jq -r '.data.verdict // "unknown"')
VERDICT_UPPER=$(echo "$VERDICT" | tr '[:lower:]' '[:upper:]' | tr '_' ' ')
echo "**${ARCHETYPE}** → ${VERDICT_UPPER}"
# List findings
echo "$event" | jq -r '(.data.findings // [])[] | " - [" + .severity + "] " + .description' 2>/dev/null || true
echo ""
;;
fix.applied)
SOURCE=$(echo "$event" | jq -r '.data.source // "unknown"')
FINDING=$(echo "$event" | jq -r '.data.finding // "unknown"')
FILE=$(echo "$event" | jq -r '.data.file // ""')
LINE=$(echo "$event" | jq -r '.data.line // ""')
if [[ -n "$FILE" && "$LINE" != "null" && -n "$LINE" ]]; then
echo "- **Fix** (${SOURCE}): ${FINDING} — \`${FILE}:${LINE}\`"
else
echo "- **Fix** (${SOURCE}): ${FINDING}"
fi
;;
shadow.detected)
ARCHETYPE=$(echo "$event" | jq -r '.data.archetype // "unknown"')
SHADOW=$(echo "$event" | jq -r '.data.shadow // "unknown"')
ACTION=$(echo "$event" | jq -r '.data.action // "unknown"')
echo "- **Shadow** ⚠️ ${ARCHETYPE}: ${SHADOW}${ACTION}"
echo ""
;;
cycle.boundary)
CYCLE=$(echo "$event" | jq -r '.data.cycle // "?"')
MAX=$(echo "$event" | jq -r '.data.max_cycles // "?"')
MET=$(echo "$event" | jq -r '.data.met // false')
NEXT=$(echo "$event" | jq -r '.data.next_action // "unknown"')
echo ""
echo "---"
echo ""
echo "**Cycle ${CYCLE}/${MAX}** — exit condition met: ${MET}${NEXT}"
echo ""
;;
esac
done < "$EVENT_FILE"
# Artifacts list from run.complete
if [[ -n "$RUN_COMPLETE" ]]; then
echo ""
echo "---"
echo ""
echo "## Artifacts"
echo ""
echo "$RUN_COMPLETE" | jq -r '(.data.artifacts // [])[] | "- `" + . + "`"'
fi
}
if [[ -n "$OUTPUT" ]]; then
generate_report > "$OUTPUT"
echo "Report written to: $OUTPUT" >&2
else
generate_report
fi

View File

@@ -62,6 +62,33 @@ Guardian completes → A2 (fast-path check) → if clean, skip other reviewers
Check phase done → A1 (escalation check) → if 2+ CRITICALs in fast, next cycle is standard
```
## Process Logging
If `.archeflow/events/` exists (or should be created), emit structured events throughout orchestration. See `archeflow:process-log` skill for full schema.
**Quick reference — emit at these points:**
```
run.start → After workflow selection, before first agent
agent.start → Before each Agent tool call
agent.complete → After each Agent returns (include duration, tokens, summary, artifacts)
decision → When choosing between alternatives (plot direction, approach, fix strategy)
phase.transition → At Plan→Do, Do→Check, Check→Act boundaries
review.verdict → After each reviewer delivers verdict
fix.applied → After each edit addressing a review finding
cycle.boundary → End of PDCA cycle
shadow.detected → When shadow threshold triggers
run.complete → After final Act phase (include totals)
```
**Helper:** `./lib/archeflow-event.sh <run_id> <type> <phase> <agent> '<json>'`
**Report:** `./lib/archeflow-report.sh .archeflow/events/<run_id>.jsonl`
Events are optional — if the events dir doesn't exist, skip logging. Never let logging block orchestration.
---
## Step 1: Plan Phase
Spawn agents sequentially — Creator needs Explorer's findings.
@@ -409,7 +436,9 @@ After each task completes (success or failure):
### Session Log Protocol
Write to `.archeflow/session-log.md` after each task:
**Primary:** Emit `run.complete` event to `.archeflow/events/<run_id>.jsonl` (see Process Logging section above). The event stream is the source of truth.
**Secondary:** Also write a human-readable summary to `.archeflow/session-log.md`:
```markdown
## Task N: <description>
@@ -419,8 +448,11 @@ Write to `.archeflow/session-log.md` after each task:
**Files changed:** 5 | **Tests added:** 12
**Branch:** merged to main (commit abc1234) | OR: archeflow/maker-xyz (NOT merged)
**Duration:** 8 min
**Events:** `.archeflow/events/<run_id>.jsonl` (full process log)
```
Generate the full Markdown report: `./lib/archeflow-report.sh .archeflow/events/<run_id>.jsonl`
### Safety Rules
- Never force-push. Never modify main history.
- All work stays on worktree branches until explicitly merged

278
skills/process-log/SKILL.md Normal file
View File

@@ -0,0 +1,278 @@
---
name: process-log
description: |
Event-based process logging for ArcheFlow orchestrations. Captures every phase transition,
agent output, decision, and fix as structured JSONL events. Enables post-hoc reports,
dashboards, and process archaeology.
<example>Automatically loaded during orchestration</example>
<example>User: "Show me how this story was made"</example>
---
# Process Log — Event-Sourced Orchestration History
Every ArcheFlow orchestration writes structured events to a JSONL file. Events are the **single source of truth** — all reports (Markdown, dashboards, timelines) are generated views.
## Event Storage
```
.archeflow/events/<run-id>.jsonl # One file per orchestration run
.archeflow/events/index.jsonl # Run index (one line per run, for listing)
```
**Run ID format:** `<date>-<slug>` (e.g., `2026-04-03-der-huster`)
## When to Emit Events
Emit an event at each of these points during orchestration:
| Moment | Event Type | Trigger |
|--------|-----------|---------|
| Orchestration starts | `run.start` | After workflow selection, before first agent |
| Agent spawned | `agent.start` | Before each Agent tool call |
| Agent completes | `agent.complete` | After each Agent returns |
| Phase transition | `phase.transition` | Plan→Do, Do→Check, Check→Act |
| Decision made | `decision` | Plot direction chosen, fix applied, workflow adapted |
| Review verdict | `review.verdict` | Guardian/Sage/Skeptic delivers verdict |
| Fix applied | `fix.applied` | After each edit that addresses a review finding |
| Cycle boundary | `cycle.boundary` | End of PDCA cycle, before next (or exit) |
| Shadow detected | `shadow.detected` | Shadow threshold triggered |
| Orchestration ends | `run.complete` | After final Act phase |
## Event Schema
Every event is one JSON line with these required fields:
```jsonl
{
"ts": "2026-04-03T14:32:07Z",
"run_id": "2026-04-03-der-huster",
"seq": 4,
"parent": [2],
"type": "agent.complete",
"phase": "plan",
"agent": "creator",
"data": { ... }
}
```
| Field | Type | Description |
|-------|------|-------------|
| `ts` | ISO 8601 | Timestamp |
| `run_id` | string | Unique run identifier |
| `seq` | integer | Monotonically increasing sequence number within run |
| `parent` | int[] | Seq numbers of causal parent events. Forms a DAG. `[]` for root events. |
| `type` | string | Event type (see table above) |
| `phase` | string | Current PDCA phase: `plan`, `do`, `check`, `act` |
| `agent` | string or null | Agent archetype that triggered the event |
| `data` | object | Event-type-specific payload (see below) |
### Parent Relationships (DAG)
The `parent` field turns the flat event stream into a directed acyclic graph (agent call graph). This enables:
- **Causal reconstruction:** which agent output caused which downstream action
- **Parallel visualization:** agents sharing a parent ran concurrently
- **Blame tracking:** trace a fix back through review → draft → outline → research
Rules:
- `run.start` has `parent: []` (root node)
- An agent has `parent: [seq of event that triggered it]`
- A phase transition has `parent: [seq of all completing events in prior phase]`
- A fix has `parent: [seq of the review that found the issue]`
- A decision has `parent: [seq of the agent that produced the alternatives]`
- Parallel agents share the same parent (fan-out), phase transitions collect them (fan-in)
Example DAG from a writing workflow:
```
#1 run.start []
├── #2 agent.complete (explorer) [1]
│ └── #3 decision (plot direction) [2]
├── #4 agent.complete (creator) [2] ← explorer informs creator
├── #5 phase.transition (plan→do) [3,4] ← fan-in
│ └── #6 agent.complete (maker) [5]
├── #7 phase.transition (do→check) [6]
│ ├── #8 review (guardian) [7] ← parallel (fan-out)
│ └── #9 review (sage) [7] ← parallel (fan-out)
├── #10 phase.transition (check→act) [8,9] ← fan-in
├── #11 fix (timeline) [8] ← caused by guardian
├── #12 fix (voice drift) [9] ← caused by sage
└── #18 run.complete [17]
```
## Event Payloads by Type
### `run.start`
```json
{
"task": "Write short story 'Der Huster'",
"workflow": "kurzgeschichte",
"team": "story-development",
"max_cycles": 2,
"config": {
"voice_profile": "vp-giesing-gschichten-v1",
"persona": "giesinger",
"target_words": 6000
}
}
```
### `agent.start`
```json
{
"archetype": "story-explorer",
"model": "haiku",
"prompt_summary": "Research premise, find emotional core, suggest 3 plot directions"
}
```
### `agent.complete`
```json
{
"archetype": "story-explorer",
"duration_ms": 87605,
"tokens": 21645,
"artifacts": ["docs/01-der-huster-research.md"],
"summary": "3 plot directions developed, recommended C (Mo krank + Koffer)"
}
```
### `decision`
```json
{
"what": "plot_direction",
"chosen": "C — Mo krank + Koffer aus B",
"alternatives": [
{"id": "A", "label": "Mo ist weg", "reason_rejected": "Zu passiv für 6k-Story"},
{"id": "B", "label": "Huster gehört nicht Mo", "reason_rejected": "Zu Krimi-nah"}
],
"rationale": "Stärkster emotionaler Kern, passt zum Voice Profile"
}
```
### `review.verdict`
```json
{
"archetype": "guardian",
"verdict": "approved_with_fixes",
"findings": [
{"severity": "bug", "description": "Timeline: 'Montag' referenced but story starts Dienstag", "fix_required": true},
{"severity": "recommendation", "description": "Gentrification monologue too long for Alex register", "fix_required": false}
]
}
```
### `fix.applied`
```json
{
"source": "guardian",
"finding": "Timeline: Montag → Dienstag",
"file": "stories/01-der-huster.md",
"line": 302,
"before": "das Gegenteil von Montag",
"after": "das Gegenteil von Dienstag"
}
```
### `phase.transition`
```json
{
"from": "plan",
"to": "do",
"artifacts_so_far": ["research.md", "outline.md"],
"notes": "Explorer recommended direction C, Creator produced 6-scene outline"
}
```
### `cycle.boundary`
```json
{
"cycle": 1,
"max_cycles": 2,
"exit_condition": "all_approved",
"met": true,
"fixes_applied": 6,
"next_action": "complete"
}
```
### `shadow.detected`
```json
{
"archetype": "story-explorer",
"shadow": "endless_research",
"trigger": "output >2000 words without recommendation",
"action": "correction_prompt_applied",
"occurrence": 1
}
```
### `run.complete`
```json
{
"status": "completed",
"cycles": 1,
"agents_total": 5,
"fixes_total": 6,
"shadows": 0,
"duration_ms": 1295519,
"artifacts": [
"docs/01-der-huster-research.md",
"docs/01-der-huster-outline.md",
"stories/01-der-huster.md",
"docs/01-der-huster-guardian-review.md",
"docs/01-der-huster-sage-review.md",
"docs/01-der-huster-process.md"
]
}
```
## How to Emit Events
During orchestration, write events using this pattern:
```bash
# Append one event to the run's JSONL file
echo '{"ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","run_id":"RUN_ID","seq":SEQ,"type":"TYPE","phase":"PHASE","agent":"AGENT","data":{...}}' >> .archeflow/events/RUN_ID.jsonl
```
Or use the helper script:
```bash
./lib/archeflow-event.sh RUN_ID TYPE PHASE AGENT '{"key":"value"}'
```
The orchestration skill should call the event emitter at each trigger point listed in the table above.
## Generating Reports
After orchestration completes (or during, for live progress):
```bash
# Generate markdown process report
./lib/archeflow-report.sh .archeflow/events/2026-04-03-der-huster.jsonl > docs/process-report.md
# List all runs
cat .archeflow/events/index.jsonl | jq -r '[.run_id, .status, .task] | @tsv'
```
## Run Index
After each `run.complete`, append a summary line to `.archeflow/events/index.jsonl`:
```jsonl
{"run_id":"2026-04-03-der-huster","ts":"2026-04-03T16:00:00Z","task":"Write Der Huster","workflow":"kurzgeschichte","status":"completed","cycles":1,"agents":5,"fixes":6,"duration_ms":1295519}
```
## Integration with Existing Skills
- **`orchestration`**: Emit events at phase transitions and after each agent
- **`shadow-detection`**: Emit `shadow.detected` when thresholds trigger
- **`autonomous-mode`**: Use `index.jsonl` for session summaries instead of separate session-log
- **`workflow-design`**: Custom workflows inherit logging automatically
## Design Principles
1. **Append-only.** Never modify or delete events. They are immutable facts.
2. **Self-contained.** Each event has enough context to be understood alone (no forward references).
3. **Cheap.** One `echo >>` per event. No database, no service, no dependencies.
4. **Optional.** If events dir doesn't exist, orchestration works fine without logging. Events are observation, not control flow.