From 1753e69a9f4f9d74b6a8a8f961b45cbdd3051bee Mon Sep 17 00:00:00 2001 From: Christian Nennemann Date: Fri, 3 Apr 2026 11:06:02 +0200 Subject: [PATCH] feat: add process logging with DAG-based event sourcing Event-sourced orchestration logging: JSONL events with parent relationships form a DAG for causal reconstruction of agent flows. Includes bash event emitter (jq-based) and markdown report generator. --- lib/archeflow-event.sh | 72 +++++++++ lib/archeflow-report.sh | 222 +++++++++++++++++++++++++++ skills/orchestration/SKILL.md | 34 ++++- skills/process-log/SKILL.md | 278 ++++++++++++++++++++++++++++++++++ 4 files changed, 605 insertions(+), 1 deletion(-) create mode 100755 lib/archeflow-event.sh create mode 100755 lib/archeflow-report.sh create mode 100644 skills/process-log/SKILL.md diff --git a/lib/archeflow-event.sh b/lib/archeflow-event.sh new file mode 100755 index 0000000..dfec718 --- /dev/null +++ b/lib/archeflow-event.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# archeflow-event.sh — Append a structured event to an ArcheFlow run's JSONL log. +# +# Usage: ./lib/archeflow-event.sh '' [parent_seqs] +# +# Examples: +# ./lib/archeflow-event.sh 2026-04-03-der-huster run.start plan "" '{"task":"Write Der Huster"}' +# ./lib/archeflow-event.sh 2026-04-03-der-huster agent.complete plan creator '{"duration_ms":167522}' 2 +# ./lib/archeflow-event.sh 2026-04-03-der-huster phase.transition do "" '{"from":"plan","to":"do"}' 3,4 +# ./lib/archeflow-event.sh 2026-04-03-der-huster fix.applied act "" '{"source":"guardian"}' 8 +# +# Parent seqs: comma-separated seq numbers of causal parent events (DAG). +# "2" → single parent [2] +# "3,4" → multiple parents [3,4] (fan-in) +# "" → root event [] +# +# Events are appended to .archeflow/events/.jsonl +# If the events directory doesn't exist, it is created automatically. + +set -euo pipefail + +if [[ $# -lt 4 ]]; then + echo "Usage: $0 [json_data] [parent_seqs]" >&2 + exit 1 +fi + +RUN_ID="$1" +TYPE="$2" +PHASE="$3" +AGENT="$4" +DATA="${5:-"{}"}" +PARENT_RAW="${6:-}" + +EVENTS_DIR=".archeflow/events" +EVENT_FILE="${EVENTS_DIR}/${RUN_ID}.jsonl" + +mkdir -p "$EVENTS_DIR" + +# Determine sequence number (count existing lines + 1) +if [[ -f "$EVENT_FILE" ]]; then + SEQ=$(( $(wc -l < "$EVENT_FILE") + 1 )) +else + SEQ=1 +fi + +TS=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +# Build parent array from comma-separated seq numbers +if [[ -z "$PARENT_RAW" ]]; then + PARENT_JSON="[]" +else + PARENT_JSON="[${PARENT_RAW}]" +fi + +# Construct the event using jq for reliable JSON assembly +# Agent is passed as --arg (string), then converted to null if empty via jq expression +EVENT=$(jq -cn \ + --arg ts "$TS" \ + --arg run_id "$RUN_ID" \ + --argjson seq "$SEQ" \ + --argjson parent "$PARENT_JSON" \ + --arg type "$TYPE" \ + --arg phase "$PHASE" \ + --arg agent_raw "$AGENT" \ + --argjson data "$DATA" \ + '{ts:$ts, run_id:$run_id, seq:$seq, parent:$parent, type:$type, phase:$phase, agent:(if $agent_raw == "" then null else $agent_raw end), data:$data}' +) + +echo "$EVENT" >> "$EVENT_FILE" + +# Print confirmation to stderr (non-intrusive) +echo "[archeflow-event] #${SEQ} ${TYPE} (${PHASE}/${AGENT:-_})" >&2 diff --git a/lib/archeflow-report.sh b/lib/archeflow-report.sh new file mode 100755 index 0000000..37ecba9 --- /dev/null +++ b/lib/archeflow-report.sh @@ -0,0 +1,222 @@ +#!/usr/bin/env bash +# archeflow-report.sh — Generate a Markdown process report from ArcheFlow JSONL events. +# +# Usage: ./lib/archeflow-report.sh [--output ] +# +# Reads a JSONL event file and produces a structured Markdown report showing +# the full orchestration process: phases, decisions, reviews, fixes, metrics. +# +# Requires: jq + +set -euo pipefail + +if [[ $# -lt 1 ]]; then + echo "Usage: $0 [--output ]" >&2 + exit 1 +fi + +EVENT_FILE="$1" +OUTPUT="" + +if [[ "${2:-}" == "--output" && -n "${3:-}" ]]; then + OUTPUT="$3" +fi + +if ! command -v jq &> /dev/null; then + echo "Error: jq is required but not installed." >&2 + exit 1 +fi + +if [[ ! -f "$EVENT_FILE" ]]; then + echo "Error: Event file not found: $EVENT_FILE" >&2 + exit 1 +fi + +# Helper: extract events by type +events_of_type() { + jq -c "select(.type == \"$1\")" "$EVENT_FILE" +} + +# Extract run metadata +RUN_START=$(events_of_type "run.start" | head -1) +RUN_COMPLETE=$(events_of_type "run.complete" | head -1) +RUN_ID=$(echo "$RUN_START" | jq -r '.run_id // "unknown"') +TASK=$(echo "$RUN_START" | jq -r '.data.task // "unknown"') +WORKFLOW=$(echo "$RUN_START" | jq -r '.data.workflow // "unknown"') +TEAM=$(echo "$RUN_START" | jq -r '.data.team // "unknown"') + +# Generate report +generate_report() { + cat <
Auto-generated from ArcheFlow event log. +> Run: \`${RUN_ID}\` | Workflow: \`${WORKFLOW}\` | Team: \`${TEAM}\` + +--- + +## Overview + +HEADER + + # Overview table from run.complete + if [[ -n "$RUN_COMPLETE" ]]; then + STATUS=$(echo "$RUN_COMPLETE" | jq -r '.data.status // "unknown"') + CYCLES=$(echo "$RUN_COMPLETE" | jq -r '.data.cycles // "?"') + AGENTS=$(echo "$RUN_COMPLETE" | jq -r '.data.agents_total // "?"') + FIXES=$(echo "$RUN_COMPLETE" | jq -r '.data.fixes_total // "?"') + SHADOWS=$(echo "$RUN_COMPLETE" | jq -r '.data.shadows // "0"') + DURATION_MS=$(echo "$RUN_COMPLETE" | jq -r '.data.duration_ms // "0"') + DURATION_MIN=$(( DURATION_MS / 60000 )) + + cat </dev/null || true + echo "" + ;; + + fix.applied) + SOURCE=$(echo "$event" | jq -r '.data.source // "unknown"') + FINDING=$(echo "$event" | jq -r '.data.finding // "unknown"') + FILE=$(echo "$event" | jq -r '.data.file // ""') + LINE=$(echo "$event" | jq -r '.data.line // ""') + + if [[ -n "$FILE" && "$LINE" != "null" && -n "$LINE" ]]; then + echo "- **Fix** (${SOURCE}): ${FINDING} — \`${FILE}:${LINE}\`" + else + echo "- **Fix** (${SOURCE}): ${FINDING}" + fi + ;; + + shadow.detected) + ARCHETYPE=$(echo "$event" | jq -r '.data.archetype // "unknown"') + SHADOW=$(echo "$event" | jq -r '.data.shadow // "unknown"') + ACTION=$(echo "$event" | jq -r '.data.action // "unknown"') + + echo "- **Shadow** ⚠️ ${ARCHETYPE}: ${SHADOW} → ${ACTION}" + echo "" + ;; + + cycle.boundary) + CYCLE=$(echo "$event" | jq -r '.data.cycle // "?"') + MAX=$(echo "$event" | jq -r '.data.max_cycles // "?"') + MET=$(echo "$event" | jq -r '.data.met // false') + NEXT=$(echo "$event" | jq -r '.data.next_action // "unknown"') + + echo "" + echo "---" + echo "" + echo "**Cycle ${CYCLE}/${MAX}** — exit condition met: ${MET} → ${NEXT}" + echo "" + ;; + esac + + done < "$EVENT_FILE" + + # Artifacts list from run.complete + if [[ -n "$RUN_COMPLETE" ]]; then + echo "" + echo "---" + echo "" + echo "## Artifacts" + echo "" + echo "$RUN_COMPLETE" | jq -r '(.data.artifacts // [])[] | "- `" + . + "`"' + fi +} + +if [[ -n "$OUTPUT" ]]; then + generate_report > "$OUTPUT" + echo "Report written to: $OUTPUT" >&2 +else + generate_report +fi diff --git a/skills/orchestration/SKILL.md b/skills/orchestration/SKILL.md index eb680df..306481e 100644 --- a/skills/orchestration/SKILL.md +++ b/skills/orchestration/SKILL.md @@ -62,6 +62,33 @@ Guardian completes → A2 (fast-path check) → if clean, skip other reviewers Check phase done → A1 (escalation check) → if 2+ CRITICALs in fast, next cycle is standard ``` +## Process Logging + +If `.archeflow/events/` exists (or should be created), emit structured events throughout orchestration. See `archeflow:process-log` skill for full schema. + +**Quick reference — emit at these points:** + +``` +run.start → After workflow selection, before first agent +agent.start → Before each Agent tool call +agent.complete → After each Agent returns (include duration, tokens, summary, artifacts) +decision → When choosing between alternatives (plot direction, approach, fix strategy) +phase.transition → At Plan→Do, Do→Check, Check→Act boundaries +review.verdict → After each reviewer delivers verdict +fix.applied → After each edit addressing a review finding +cycle.boundary → End of PDCA cycle +shadow.detected → When shadow threshold triggers +run.complete → After final Act phase (include totals) +``` + +**Helper:** `./lib/archeflow-event.sh ''` + +**Report:** `./lib/archeflow-report.sh .archeflow/events/.jsonl` + +Events are optional — if the events dir doesn't exist, skip logging. Never let logging block orchestration. + +--- + ## Step 1: Plan Phase Spawn agents sequentially — Creator needs Explorer's findings. @@ -409,7 +436,9 @@ After each task completes (success or failure): ### Session Log Protocol -Write to `.archeflow/session-log.md` after each task: +**Primary:** Emit `run.complete` event to `.archeflow/events/.jsonl` (see Process Logging section above). The event stream is the source of truth. + +**Secondary:** Also write a human-readable summary to `.archeflow/session-log.md`: ```markdown ## Task N: @@ -419,8 +448,11 @@ Write to `.archeflow/session-log.md` after each task: **Files changed:** 5 | **Tests added:** 12 **Branch:** merged to main (commit abc1234) | OR: archeflow/maker-xyz (NOT merged) **Duration:** 8 min +**Events:** `.archeflow/events/.jsonl` (full process log) ``` +Generate the full Markdown report: `./lib/archeflow-report.sh .archeflow/events/.jsonl` + ### Safety Rules - Never force-push. Never modify main history. - All work stays on worktree branches until explicitly merged diff --git a/skills/process-log/SKILL.md b/skills/process-log/SKILL.md new file mode 100644 index 0000000..4dce56e --- /dev/null +++ b/skills/process-log/SKILL.md @@ -0,0 +1,278 @@ +--- +name: process-log +description: | + Event-based process logging for ArcheFlow orchestrations. Captures every phase transition, + agent output, decision, and fix as structured JSONL events. Enables post-hoc reports, + dashboards, and process archaeology. + Automatically loaded during orchestration + User: "Show me how this story was made" +--- + +# Process Log — Event-Sourced Orchestration History + +Every ArcheFlow orchestration writes structured events to a JSONL file. Events are the **single source of truth** — all reports (Markdown, dashboards, timelines) are generated views. + +## Event Storage + +``` +.archeflow/events/.jsonl # One file per orchestration run +.archeflow/events/index.jsonl # Run index (one line per run, for listing) +``` + +**Run ID format:** `-` (e.g., `2026-04-03-der-huster`) + +## When to Emit Events + +Emit an event at each of these points during orchestration: + +| Moment | Event Type | Trigger | +|--------|-----------|---------| +| Orchestration starts | `run.start` | After workflow selection, before first agent | +| Agent spawned | `agent.start` | Before each Agent tool call | +| Agent completes | `agent.complete` | After each Agent returns | +| Phase transition | `phase.transition` | Plan→Do, Do→Check, Check→Act | +| Decision made | `decision` | Plot direction chosen, fix applied, workflow adapted | +| Review verdict | `review.verdict` | Guardian/Sage/Skeptic delivers verdict | +| Fix applied | `fix.applied` | After each edit that addresses a review finding | +| Cycle boundary | `cycle.boundary` | End of PDCA cycle, before next (or exit) | +| Shadow detected | `shadow.detected` | Shadow threshold triggered | +| Orchestration ends | `run.complete` | After final Act phase | + +## Event Schema + +Every event is one JSON line with these required fields: + +```jsonl +{ + "ts": "2026-04-03T14:32:07Z", + "run_id": "2026-04-03-der-huster", + "seq": 4, + "parent": [2], + "type": "agent.complete", + "phase": "plan", + "agent": "creator", + "data": { ... } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `ts` | ISO 8601 | Timestamp | +| `run_id` | string | Unique run identifier | +| `seq` | integer | Monotonically increasing sequence number within run | +| `parent` | int[] | Seq numbers of causal parent events. Forms a DAG. `[]` for root events. | +| `type` | string | Event type (see table above) | +| `phase` | string | Current PDCA phase: `plan`, `do`, `check`, `act` | +| `agent` | string or null | Agent archetype that triggered the event | +| `data` | object | Event-type-specific payload (see below) | + +### Parent Relationships (DAG) + +The `parent` field turns the flat event stream into a directed acyclic graph (agent call graph). This enables: + +- **Causal reconstruction:** which agent output caused which downstream action +- **Parallel visualization:** agents sharing a parent ran concurrently +- **Blame tracking:** trace a fix back through review → draft → outline → research + +Rules: +- `run.start` has `parent: []` (root node) +- An agent has `parent: [seq of event that triggered it]` +- A phase transition has `parent: [seq of all completing events in prior phase]` +- A fix has `parent: [seq of the review that found the issue]` +- A decision has `parent: [seq of the agent that produced the alternatives]` +- Parallel agents share the same parent (fan-out), phase transitions collect them (fan-in) + +Example DAG from a writing workflow: +``` +#1 run.start [] +├── #2 agent.complete (explorer) [1] +│ └── #3 decision (plot direction) [2] +├── #4 agent.complete (creator) [2] ← explorer informs creator +├── #5 phase.transition (plan→do) [3,4] ← fan-in +│ └── #6 agent.complete (maker) [5] +├── #7 phase.transition (do→check) [6] +│ ├── #8 review (guardian) [7] ← parallel (fan-out) +│ └── #9 review (sage) [7] ← parallel (fan-out) +├── #10 phase.transition (check→act) [8,9] ← fan-in +├── #11 fix (timeline) [8] ← caused by guardian +├── #12 fix (voice drift) [9] ← caused by sage +└── #18 run.complete [17] +``` + +## Event Payloads by Type + +### `run.start` +```json +{ + "task": "Write short story 'Der Huster'", + "workflow": "kurzgeschichte", + "team": "story-development", + "max_cycles": 2, + "config": { + "voice_profile": "vp-giesing-gschichten-v1", + "persona": "giesinger", + "target_words": 6000 + } +} +``` + +### `agent.start` +```json +{ + "archetype": "story-explorer", + "model": "haiku", + "prompt_summary": "Research premise, find emotional core, suggest 3 plot directions" +} +``` + +### `agent.complete` +```json +{ + "archetype": "story-explorer", + "duration_ms": 87605, + "tokens": 21645, + "artifacts": ["docs/01-der-huster-research.md"], + "summary": "3 plot directions developed, recommended C (Mo krank + Koffer)" +} +``` + +### `decision` +```json +{ + "what": "plot_direction", + "chosen": "C — Mo krank + Koffer aus B", + "alternatives": [ + {"id": "A", "label": "Mo ist weg", "reason_rejected": "Zu passiv für 6k-Story"}, + {"id": "B", "label": "Huster gehört nicht Mo", "reason_rejected": "Zu Krimi-nah"} + ], + "rationale": "Stärkster emotionaler Kern, passt zum Voice Profile" +} +``` + +### `review.verdict` +```json +{ + "archetype": "guardian", + "verdict": "approved_with_fixes", + "findings": [ + {"severity": "bug", "description": "Timeline: 'Montag' referenced but story starts Dienstag", "fix_required": true}, + {"severity": "recommendation", "description": "Gentrification monologue too long for Alex register", "fix_required": false} + ] +} +``` + +### `fix.applied` +```json +{ + "source": "guardian", + "finding": "Timeline: Montag → Dienstag", + "file": "stories/01-der-huster.md", + "line": 302, + "before": "das Gegenteil von Montag", + "after": "das Gegenteil von Dienstag" +} +``` + +### `phase.transition` +```json +{ + "from": "plan", + "to": "do", + "artifacts_so_far": ["research.md", "outline.md"], + "notes": "Explorer recommended direction C, Creator produced 6-scene outline" +} +``` + +### `cycle.boundary` +```json +{ + "cycle": 1, + "max_cycles": 2, + "exit_condition": "all_approved", + "met": true, + "fixes_applied": 6, + "next_action": "complete" +} +``` + +### `shadow.detected` +```json +{ + "archetype": "story-explorer", + "shadow": "endless_research", + "trigger": "output >2000 words without recommendation", + "action": "correction_prompt_applied", + "occurrence": 1 +} +``` + +### `run.complete` +```json +{ + "status": "completed", + "cycles": 1, + "agents_total": 5, + "fixes_total": 6, + "shadows": 0, + "duration_ms": 1295519, + "artifacts": [ + "docs/01-der-huster-research.md", + "docs/01-der-huster-outline.md", + "stories/01-der-huster.md", + "docs/01-der-huster-guardian-review.md", + "docs/01-der-huster-sage-review.md", + "docs/01-der-huster-process.md" + ] +} +``` + +## How to Emit Events + +During orchestration, write events using this pattern: + +```bash +# Append one event to the run's JSONL file +echo '{"ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","run_id":"RUN_ID","seq":SEQ,"type":"TYPE","phase":"PHASE","agent":"AGENT","data":{...}}' >> .archeflow/events/RUN_ID.jsonl +``` + +Or use the helper script: + +```bash +./lib/archeflow-event.sh RUN_ID TYPE PHASE AGENT '{"key":"value"}' +``` + +The orchestration skill should call the event emitter at each trigger point listed in the table above. + +## Generating Reports + +After orchestration completes (or during, for live progress): + +```bash +# Generate markdown process report +./lib/archeflow-report.sh .archeflow/events/2026-04-03-der-huster.jsonl > docs/process-report.md + +# List all runs +cat .archeflow/events/index.jsonl | jq -r '[.run_id, .status, .task] | @tsv' +``` + +## Run Index + +After each `run.complete`, append a summary line to `.archeflow/events/index.jsonl`: + +```jsonl +{"run_id":"2026-04-03-der-huster","ts":"2026-04-03T16:00:00Z","task":"Write Der Huster","workflow":"kurzgeschichte","status":"completed","cycles":1,"agents":5,"fixes":6,"duration_ms":1295519} +``` + +## Integration with Existing Skills + +- **`orchestration`**: Emit events at phase transitions and after each agent +- **`shadow-detection`**: Emit `shadow.detected` when thresholds trigger +- **`autonomous-mode`**: Use `index.jsonl` for session summaries instead of separate session-log +- **`workflow-design`**: Custom workflows inherit logging automatically + +## Design Principles + +1. **Append-only.** Never modify or delete events. They are immutable facts. +2. **Self-contained.** Each event has enough context to be understood alone (no forward references). +3. **Cheap.** One `echo >>` per event. No database, no service, no dependencies. +4. **Optional.** If events dir doesn't exist, orchestration works fine without logging. Events are observation, not control flow.