feat: add automated PDCA loop, domain adapters, cost tracking, DAG renderer
- skills/run: automated PDCA execution loop with --start-from, --dry-run - skills/artifact-routing: inter-phase artifact protocol with context injection - skills/act-phase: structured review→fix pipeline with cycle feedback - skills/domains: domain adapter system (writing, code, research) - skills/cost-tracking: per-agent cost estimation, budget enforcement - lib/archeflow-dag.sh: ASCII DAG renderer from JSONL events - lib/archeflow-report.sh: updated with DAG section, cycle diff, --dag/--summary flags
This commit is contained in:
261
lib/archeflow-dag.sh
Executable file
261
lib/archeflow-dag.sh
Executable file
@@ -0,0 +1,261 @@
|
||||
#!/usr/bin/env bash
|
||||
# archeflow-dag.sh — Render an ASCII DAG from ArcheFlow JSONL events.
|
||||
#
|
||||
# Usage: ./lib/archeflow-dag.sh <events.jsonl> [--color] [--no-color]
|
||||
#
|
||||
# Reads a JSONL event file and renders the causal DAG as ASCII art.
|
||||
# Each event shows: #seq description (phase) [metadata]
|
||||
# Tree drawing uses Unicode box-drawing characters for branches.
|
||||
#
|
||||
# The rendering uses a "logical grouping" strategy: phase transitions and
|
||||
# structural events appear as top-level siblings under root, with agents
|
||||
# and sub-events nested beneath their phase section. This gives a readable
|
||||
# timeline view while preserving DAG relationships.
|
||||
#
|
||||
# Requires: jq
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: $0 <events.jsonl> [--color] [--no-color]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
EVENT_FILE="$1"
|
||||
shift
|
||||
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Error: jq is required but not installed." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ ! -f "$EVENT_FILE" ]]; then
|
||||
echo "Error: Event file not found: $EVENT_FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Color support: auto-detect terminal, allow override
|
||||
USE_COLOR=auto
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--color) USE_COLOR=yes ;;
|
||||
--no-color) USE_COLOR=no ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$USE_COLOR" == "auto" ]]; then
|
||||
if [[ -t 1 ]]; then
|
||||
USE_COLOR=yes
|
||||
else
|
||||
USE_COLOR=no
|
||||
fi
|
||||
fi
|
||||
|
||||
# ANSI color codes
|
||||
if [[ "$USE_COLOR" == "yes" ]]; then
|
||||
C_RESET="\033[0m"
|
||||
C_SEQ="\033[1;37m" # bold white for seq numbers
|
||||
C_PLAN="\033[1;34m" # blue for plan phase
|
||||
C_DO="\033[1;32m" # green for do phase
|
||||
C_CHECK="\033[1;33m" # yellow for check phase
|
||||
C_ACT="\033[1;35m" # magenta for act phase
|
||||
C_TRANS="\033[0;36m" # cyan for phase transitions
|
||||
C_DIM="\033[0;90m" # dim for metadata
|
||||
C_DECISION="\033[1;33m" # yellow for decisions
|
||||
C_VERDICT="\033[1;31m" # red for verdicts
|
||||
else
|
||||
C_RESET="" C_SEQ="" C_PLAN="" C_DO="" C_CHECK="" C_ACT=""
|
||||
C_TRANS="" C_DIM="" C_DECISION="" C_VERDICT=""
|
||||
fi
|
||||
|
||||
phase_color() {
|
||||
case "$1" in
|
||||
plan) printf "%s" "$C_PLAN" ;;
|
||||
do) printf "%s" "$C_DO" ;;
|
||||
check) printf "%s" "$C_CHECK" ;;
|
||||
act) printf "%s" "$C_ACT" ;;
|
||||
*) printf "%s" "$C_RESET" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Pre-process all events with jq into a structured format for bash consumption.
|
||||
# Output: seq|type|phase|agent|parents_csv|label
|
||||
# This avoids calling jq per-event in a loop.
|
||||
EVENTS_PARSED=$(jq -r '
|
||||
def mklabel:
|
||||
if .type == "run.start" then "run.start"
|
||||
elif .type == "agent.complete" then
|
||||
(.data.archetype // .agent // "unknown") + " (" + .phase + ")" +
|
||||
(if (.data.tokens // 0) > 0 then " [" + (.data.tokens | tostring) + " tok]" else "" end)
|
||||
elif .type == "decision" then
|
||||
"decision: " + (.data.what // "unknown") + " → " + (.data.chosen // "unknown")
|
||||
elif .type == "phase.transition" then
|
||||
"─── " + (.data.from // "?") + " → " + (.data.to // "?") + " ───"
|
||||
elif .type == "review.verdict" then
|
||||
(.data.archetype // .agent // "unknown") + " (" + .phase + ") → " +
|
||||
((.data.verdict // "unknown") | ascii_upcase | gsub("_"; " "))
|
||||
elif .type == "fix.applied" then
|
||||
"fix (" + (.data.source // "unknown") + "): " + (.data.finding // "unknown")
|
||||
elif .type == "cycle.boundary" then
|
||||
"cycle " + ((.data.cycle // 0) | tostring) + "/" + ((.data.max_cycles // 0) | tostring) +
|
||||
" → " + (.data.next_action // "continue")
|
||||
elif .type == "shadow.detected" then
|
||||
"shadow: " + (.data.archetype // "unknown") + " — " + (.data.shadow // "unknown")
|
||||
elif .type == "run.complete" then
|
||||
"run.complete [" + ((.data.agents_total // .data.agents // 0) | tostring) +
|
||||
" agents, " + ((.data.fixes_total // .data.fixes // 0) | tostring) + " fixes]"
|
||||
else .type
|
||||
end;
|
||||
[.seq, .type, .phase,
|
||||
(.agent // "_NONE_"),
|
||||
(((.parent // []) | map(tostring) | join(",")) | if . == "" then "_NONE_" else . end),
|
||||
mklabel]
|
||||
| join("§")
|
||||
' "$EVENT_FILE")
|
||||
|
||||
# Parse into arrays
|
||||
declare -A EVENT_TYPE EVENT_PHASE EVENT_LABEL EVENT_PARENTS
|
||||
declare -A CHILDREN_OF # parent_seq -> space-separated child seqs
|
||||
MAX_SEQ=0
|
||||
|
||||
while IFS='§' read -r seq type phase agent parents label; do
|
||||
[[ "$agent" == "_NONE_" ]] && agent=""
|
||||
[[ "$parents" == "_NONE_" ]] && parents=""
|
||||
EVENT_TYPE[$seq]="$type"
|
||||
EVENT_PHASE[$seq]="$phase"
|
||||
EVENT_LABEL[$seq]="$label"
|
||||
EVENT_PARENTS[$seq]="$parents"
|
||||
|
||||
# Register parent-child relationships
|
||||
if [[ -z "$parents" ]]; then
|
||||
CHILDREN_OF[0]="${CHILDREN_OF[0]:-} $seq"
|
||||
else
|
||||
IFS=',' read -ra parent_arr <<< "$parents"
|
||||
for p in "${parent_arr[@]}"; do
|
||||
CHILDREN_OF[$p]="${CHILDREN_OF[$p]:-} $seq"
|
||||
done
|
||||
fi
|
||||
|
||||
if (( seq > MAX_SEQ )); then
|
||||
MAX_SEQ=$seq
|
||||
fi
|
||||
done <<< "$EVENTS_PARSED"
|
||||
|
||||
# Sort and deduplicate children
|
||||
for key in "${!CHILDREN_OF[@]}"; do
|
||||
CHILDREN_OF[$key]=$(echo "${CHILDREN_OF[$key]}" | tr ' ' '\n' | sort -un | tr '\n' ' ' | xargs)
|
||||
done
|
||||
|
||||
# Determine display parent for each event.
|
||||
# Strategy: structural events (phase.transition, cycle.boundary, run.complete) are promoted
|
||||
# to be direct children of #1 (run.start), creating a flat timeline backbone.
|
||||
# All other events use their first (lowest-numbered) parent for display.
|
||||
declare -A DISPLAY_PARENT # seq -> parent seq for display (0 = root)
|
||||
declare -A DISPLAY_CHILDREN # parent -> ordered children for display
|
||||
|
||||
for seq_i in $(seq 1 "$MAX_SEQ"); do
|
||||
[[ -z "${EVENT_TYPE[$seq_i]:-}" ]] && continue
|
||||
local_type="${EVENT_TYPE[$seq_i]}"
|
||||
parents_csv="${EVENT_PARENTS[$seq_i]:-}"
|
||||
|
||||
if [[ -z "$parents_csv" ]]; then
|
||||
# Root event (run.start)
|
||||
DISPLAY_PARENT[$seq_i]=0
|
||||
elif [[ "$local_type" == "phase.transition" || "$local_type" == "cycle.boundary" || "$local_type" == "run.complete" ]]; then
|
||||
# Promote structural events to be children of run.start (#1)
|
||||
DISPLAY_PARENT[$seq_i]=1
|
||||
else
|
||||
# Use first (lowest) parent as display parent
|
||||
IFS=',' read -ra parr <<< "$parents_csv"
|
||||
DISPLAY_PARENT[$seq_i]="${parr[0]}"
|
||||
fi
|
||||
|
||||
dp="${DISPLAY_PARENT[$seq_i]}"
|
||||
DISPLAY_CHILDREN[$dp]="${DISPLAY_CHILDREN[$dp]:-} $seq_i"
|
||||
done
|
||||
|
||||
# Sort display children
|
||||
for key in "${!DISPLAY_CHILDREN[@]}"; do
|
||||
DISPLAY_CHILDREN[$key]=$(echo "${DISPLAY_CHILDREN[$key]}" | tr ' ' '\n' | sort -n | tr '\n' ' ' | xargs)
|
||||
done
|
||||
|
||||
# Render the tree recursively using display hierarchy
|
||||
render_node() {
|
||||
local seq="$1"
|
||||
local prefix="$2"
|
||||
local is_last="$3"
|
||||
|
||||
local label="${EVENT_LABEL[$seq]:-unknown}"
|
||||
local phase="${EVENT_PHASE[$seq]:-}"
|
||||
local type="${EVENT_TYPE[$seq]:-}"
|
||||
local pc
|
||||
pc=$(phase_color "$phase")
|
||||
|
||||
# Format seq number with padding
|
||||
local seq_str
|
||||
seq_str=$(printf "#%-3s" "${seq}")
|
||||
|
||||
# Connector
|
||||
local connector
|
||||
if [[ -z "$prefix" && "$seq" == "1" ]]; then
|
||||
connector=""
|
||||
elif [[ "$is_last" == "true" ]]; then
|
||||
connector="└── "
|
||||
else
|
||||
connector="├── "
|
||||
fi
|
||||
|
||||
# Color the label based on type
|
||||
local colored_label
|
||||
case "$type" in
|
||||
phase.transition) colored_label="${C_TRANS}${label}${C_RESET}" ;;
|
||||
decision) colored_label="${C_DECISION}${label}${C_RESET}" ;;
|
||||
review.verdict) colored_label="${C_VERDICT}${label}${C_RESET}" ;;
|
||||
*) colored_label="${pc}${label}${C_RESET}" ;;
|
||||
esac
|
||||
|
||||
if [[ "$seq" == "1" ]]; then
|
||||
printf "%b\n" "${C_SEQ}#1${C_RESET} ${colored_label}"
|
||||
else
|
||||
printf "%b\n" "${prefix}${connector}${C_SEQ}${seq_str}${C_RESET}${colored_label}"
|
||||
fi
|
||||
|
||||
# Render children
|
||||
local children="${DISPLAY_CHILDREN[$seq]:-}"
|
||||
if [[ -z "$children" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
local child_arr=($children)
|
||||
local count=${#child_arr[@]}
|
||||
local i=0
|
||||
|
||||
for c in "${child_arr[@]}"; do
|
||||
i=$((i + 1))
|
||||
local child_is_last="false"
|
||||
if [[ $i -eq $count ]]; then
|
||||
child_is_last="true"
|
||||
fi
|
||||
|
||||
local child_prefix
|
||||
if [[ "$seq" == "1" ]]; then
|
||||
child_prefix=""
|
||||
elif [[ "$is_last" == "true" ]]; then
|
||||
child_prefix="${prefix} "
|
||||
else
|
||||
child_prefix="${prefix}│ "
|
||||
fi
|
||||
|
||||
render_node "$c" "$child_prefix" "$child_is_last"
|
||||
done
|
||||
}
|
||||
|
||||
# Find root nodes (display parent == 0 means top-level)
|
||||
root_children="${DISPLAY_CHILDREN[0]:-}"
|
||||
if [[ -z "$root_children" ]]; then
|
||||
echo "No events found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# The first root child should be #1 (run.start), render from there
|
||||
render_node 1 "" "true"
|
||||
@@ -1,26 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
# archeflow-report.sh — Generate a Markdown process report from ArcheFlow JSONL events.
|
||||
#
|
||||
# Usage: ./lib/archeflow-report.sh <events.jsonl> [--output <file.md>]
|
||||
# Usage: ./lib/archeflow-report.sh <events.jsonl> [--output <file.md>] [--dag] [--summary]
|
||||
#
|
||||
# Reads a JSONL event file and produces a structured Markdown report showing
|
||||
# the full orchestration process: phases, decisions, reviews, fixes, metrics.
|
||||
#
|
||||
# Flags:
|
||||
# --output <file.md> Write report to file instead of stdout
|
||||
# --dag Output ONLY the ASCII DAG (for quick terminal viewing)
|
||||
# --summary Output a one-line summary (for session logs)
|
||||
#
|
||||
# Requires: jq
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: $0 <events.jsonl> [--output <file.md>]" >&2
|
||||
echo "Usage: $0 <events.jsonl> [--output <file.md>] [--dag] [--summary]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
EVENT_FILE="$1"
|
||||
OUTPUT=""
|
||||
shift
|
||||
|
||||
if [[ "${2:-}" == "--output" && -n "${3:-}" ]]; then
|
||||
OUTPUT="$3"
|
||||
fi
|
||||
OUTPUT=""
|
||||
MODE="full" # full | dag | summary
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--output)
|
||||
OUTPUT="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--dag)
|
||||
MODE="dag"
|
||||
shift
|
||||
;;
|
||||
--summary)
|
||||
MODE="summary"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "Error: jq is required but not installed." >&2
|
||||
@@ -45,7 +71,74 @@ TASK=$(echo "$RUN_START" | jq -r '.data.task // "unknown"')
|
||||
WORKFLOW=$(echo "$RUN_START" | jq -r '.data.workflow // "unknown"')
|
||||
TEAM=$(echo "$RUN_START" | jq -r '.data.team // "unknown"')
|
||||
|
||||
# Generate report
|
||||
# --summary mode: one-line output and exit
|
||||
if [[ "$MODE" == "summary" ]]; then
|
||||
if [[ -n "$RUN_COMPLETE" ]]; then
|
||||
STATUS=$(echo "$RUN_COMPLETE" | jq -r '.data.status // "unknown"')
|
||||
CYCLES=$(echo "$RUN_COMPLETE" | jq -r '.data.cycles // "?"')
|
||||
# Handle both agents_total and agents field names
|
||||
AGENTS=$(echo "$RUN_COMPLETE" | jq -r '.data.agents_total // .data.agents // "?"')
|
||||
FIXES=$(echo "$RUN_COMPLETE" | jq -r '.data.fixes_total // .data.fixes // "?"')
|
||||
DURATION_MS=$(echo "$RUN_COMPLETE" | jq -r '.data.duration_ms // "0"')
|
||||
if [[ "$DURATION_MS" != "0" && "$DURATION_MS" != "null" ]]; then
|
||||
DURATION_MIN=$(( DURATION_MS / 60000 ))
|
||||
echo "[${STATUS}] ${TASK} — ${CYCLES} cycles, ${AGENTS} agents, ${FIXES} fixes (~${DURATION_MIN}min) [${RUN_ID}]"
|
||||
else
|
||||
echo "[${STATUS}] ${TASK} — ${CYCLES} cycles, ${AGENTS} agents, ${FIXES} fixes [${RUN_ID}]"
|
||||
fi
|
||||
else
|
||||
echo "[in-progress] ${TASK} [${RUN_ID}]"
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --dag mode: output DAG and exit
|
||||
if [[ "$MODE" == "dag" ]]; then
|
||||
if [[ -x "${SCRIPT_DIR}/archeflow-dag.sh" ]]; then
|
||||
"${SCRIPT_DIR}/archeflow-dag.sh" "$EVENT_FILE" "$@"
|
||||
else
|
||||
echo "Error: archeflow-dag.sh not found at ${SCRIPT_DIR}/archeflow-dag.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Full report mode ---
|
||||
|
||||
# Collect cycle data for cycle diff section
|
||||
CYCLE_BOUNDARIES=$(events_of_type "cycle.boundary" | jq -r '.data.cycle' 2>/dev/null || true)
|
||||
CYCLE_COUNT=0
|
||||
if [[ -n "$CYCLE_BOUNDARIES" ]]; then
|
||||
CYCLE_COUNT=$(echo "$CYCLE_BOUNDARIES" | grep -c '[0-9]' 2>/dev/null || true)
|
||||
CYCLE_COUNT=${CYCLE_COUNT:-0}
|
||||
fi
|
||||
|
||||
# Collect review findings per cycle for diff
|
||||
# A cycle's reviews are between two cycle.boundary events (or between start and first boundary)
|
||||
collect_cycle_findings() {
|
||||
# Returns JSON array of {cycle, archetype, findings[]} for all review.verdict events
|
||||
jq -s '
|
||||
# Assign cycle number to each event based on cycle.boundary positions
|
||||
(
|
||||
[.[] | select(.type == "cycle.boundary") | .seq] | sort
|
||||
) as $boundaries |
|
||||
[.[] | select(.type == "review.verdict")] |
|
||||
[.[] | {
|
||||
seq: .seq,
|
||||
archetype: (.data.archetype // .agent // "unknown"),
|
||||
verdict: .data.verdict,
|
||||
findings: (.data.findings // []),
|
||||
cycle: (
|
||||
.seq as $s |
|
||||
if ($boundaries | length) == 0 then 1
|
||||
else
|
||||
([1] + [$boundaries | to_entries[] | select(.value < $s) | .key + 2] | max)
|
||||
end
|
||||
)
|
||||
}]
|
||||
' "$EVENT_FILE"
|
||||
}
|
||||
|
||||
generate_report() {
|
||||
cat <<HEADER
|
||||
# Process Report: ${TASK}
|
||||
@@ -63,11 +156,17 @@ HEADER
|
||||
if [[ -n "$RUN_COMPLETE" ]]; then
|
||||
STATUS=$(echo "$RUN_COMPLETE" | jq -r '.data.status // "unknown"')
|
||||
CYCLES=$(echo "$RUN_COMPLETE" | jq -r '.data.cycles // "?"')
|
||||
AGENTS=$(echo "$RUN_COMPLETE" | jq -r '.data.agents_total // "?"')
|
||||
FIXES=$(echo "$RUN_COMPLETE" | jq -r '.data.fixes_total // "?"')
|
||||
# Handle both agents_total and agents field names
|
||||
AGENTS=$(echo "$RUN_COMPLETE" | jq -r '.data.agents_total // .data.agents // "?"')
|
||||
FIXES=$(echo "$RUN_COMPLETE" | jq -r '.data.fixes_total // .data.fixes // "?"')
|
||||
SHADOWS=$(echo "$RUN_COMPLETE" | jq -r '.data.shadows // "0"')
|
||||
DURATION_MS=$(echo "$RUN_COMPLETE" | jq -r '.data.duration_ms // "0"')
|
||||
DURATION_MIN=$(( DURATION_MS / 60000 ))
|
||||
if [[ "$DURATION_MS" != "0" && "$DURATION_MS" != "null" ]]; then
|
||||
DURATION_MIN=$(( DURATION_MS / 60000 ))
|
||||
DURATION_DISPLAY="~${DURATION_MIN} min"
|
||||
else
|
||||
DURATION_DISPLAY="n/a"
|
||||
fi
|
||||
|
||||
cat <<TABLE
|
||||
| Field | Value |
|
||||
@@ -77,7 +176,7 @@ HEADER
|
||||
| **Agents** | ${AGENTS} |
|
||||
| **Fixes** | ${FIXES} |
|
||||
| **Shadows** | ${SHADOWS} |
|
||||
| **Duration** | ~${DURATION_MIN} min |
|
||||
| **Duration** | ${DURATION_DISPLAY} |
|
||||
|
||||
TABLE
|
||||
fi
|
||||
@@ -95,6 +194,21 @@ TABLE
|
||||
echo "---"
|
||||
echo ""
|
||||
|
||||
# Process Flow (DAG)
|
||||
echo "## Process Flow"
|
||||
echo ""
|
||||
echo '```'
|
||||
if [[ -x "${SCRIPT_DIR}/archeflow-dag.sh" ]]; then
|
||||
"${SCRIPT_DIR}/archeflow-dag.sh" "$EVENT_FILE" --no-color
|
||||
else
|
||||
echo "(DAG renderer not available)"
|
||||
fi
|
||||
echo '```'
|
||||
echo ""
|
||||
|
||||
echo "---"
|
||||
echo ""
|
||||
|
||||
# Phase sections — iterate through phase transitions
|
||||
echo "## Phases"
|
||||
echo ""
|
||||
@@ -183,7 +297,7 @@ TABLE
|
||||
SHADOW=$(echo "$event" | jq -r '.data.shadow // "unknown"')
|
||||
ACTION=$(echo "$event" | jq -r '.data.action // "unknown"')
|
||||
|
||||
echo "- **Shadow** ⚠️ ${ARCHETYPE}: ${SHADOW} → ${ACTION}"
|
||||
echo "- **Shadow** ${ARCHETYPE}: ${SHADOW} → ${ACTION}"
|
||||
echo ""
|
||||
;;
|
||||
|
||||
@@ -203,6 +317,65 @@ TABLE
|
||||
|
||||
done < "$EVENT_FILE"
|
||||
|
||||
# Cycle Comparison section (only if multiple cycles detected)
|
||||
if [[ "$CYCLE_COUNT" -ge 2 ]]; then
|
||||
echo ""
|
||||
echo "---"
|
||||
echo ""
|
||||
echo "## Cycle Comparison"
|
||||
echo ""
|
||||
|
||||
# Collect all review findings with cycle assignment
|
||||
CYCLE_FINDINGS=$(collect_cycle_findings)
|
||||
|
||||
# Get unique cycle numbers
|
||||
CYCLE_NUMS=$(echo "$CYCLE_FINDINGS" | jq -r '[.[].cycle] | unique | .[]')
|
||||
|
||||
# Compare consecutive cycles
|
||||
PREV_CYCLE=""
|
||||
for CURR_CYCLE in $CYCLE_NUMS; do
|
||||
if [[ -n "$PREV_CYCLE" ]]; then
|
||||
echo "### Cycle ${PREV_CYCLE} → Cycle ${CURR_CYCLE}"
|
||||
echo ""
|
||||
|
||||
# Get findings for each cycle as JSON arrays
|
||||
PREV_FINDINGS=$(echo "$CYCLE_FINDINGS" | jq --argjson c "$PREV_CYCLE" \
|
||||
'[.[] | select(.cycle == $c) | .findings[] | {desc: .description, sev: .severity}]' 2>/dev/null || echo "[]")
|
||||
CURR_FINDINGS=$(echo "$CYCLE_FINDINGS" | jq --argjson c "$CURR_CYCLE" \
|
||||
'[.[] | select(.cycle == $c) | .findings[] | {desc: .description, sev: .severity}]' 2>/dev/null || echo "[]")
|
||||
|
||||
# Compute new, resolved, and persistent findings
|
||||
DIFF_OUTPUT=$(jq -rn --argjson prev "$PREV_FINDINGS" --argjson curr "$CURR_FINDINGS" '
|
||||
def descs: [.[].desc];
|
||||
($prev | descs) as $pd |
|
||||
($curr | descs) as $cd |
|
||||
($curr | [.[] | select(.desc as $d | $pd | all(. != $d))]) as $new |
|
||||
($prev | [.[] | select(.desc as $d | $cd | all(. != $d))]) as $resolved |
|
||||
($curr | [.[] | select(.desc as $d | $pd | any(. == $d))]) as $persistent |
|
||||
(
|
||||
(if ($new | length) > 0 then
|
||||
["**New findings:**"] + [$new[] | "- [" + .sev + "] " + .desc]
|
||||
else [] end) +
|
||||
(if ($resolved | length) > 0 then
|
||||
["", "**Resolved findings:**"] + [$resolved[] | "- [" + .sev + "] " + .desc]
|
||||
else [] end) +
|
||||
(if ($persistent | length) > 0 then
|
||||
["", "**Persistent findings:**"] + [$persistent[] | "- [" + .sev + "] " + .desc]
|
||||
else [] end)
|
||||
) | .[]
|
||||
' 2>/dev/null || true)
|
||||
|
||||
if [[ -n "$DIFF_OUTPUT" ]]; then
|
||||
echo "$DIFF_OUTPUT"
|
||||
else
|
||||
echo "(No findings to compare)"
|
||||
fi
|
||||
echo ""
|
||||
fi
|
||||
PREV_CYCLE="$CURR_CYCLE"
|
||||
done
|
||||
fi
|
||||
|
||||
# Artifacts list from run.complete
|
||||
if [[ -n "$RUN_COMPLETE" ]]; then
|
||||
echo ""
|
||||
|
||||
369
skills/act-phase/SKILL.md
Normal file
369
skills/act-phase/SKILL.md
Normal file
@@ -0,0 +1,369 @@
|
||||
---
|
||||
name: act-phase
|
||||
description: |
|
||||
Use after the Check phase completes. Collects reviewer findings, prioritizes them, routes fixes to the right agent or tool, applies fixes systematically, and decides whether to exit or cycle.
|
||||
<example>Automatically loaded during orchestration after Check phase</example>
|
||||
<example>User: "Run just the act phase on existing findings"</example>
|
||||
---
|
||||
|
||||
# Act Phase
|
||||
|
||||
After all reviewers complete, the Act phase turns findings into fixes and decides whether the cycle is done. This is the bridge between "what's wrong" and "what we do about it."
|
||||
|
||||
## Overview
|
||||
|
||||
```
|
||||
Check phase output → Collect → Prioritize → Route → Fix → Verify → Exit or Cycle
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Finding Collection
|
||||
|
||||
Parse all reviewer outputs into one consolidated findings table. Use the standardized format from the `check-phase` skill.
|
||||
|
||||
```markdown
|
||||
## Findings Summary — Cycle N
|
||||
|
||||
### CRITICAL (must fix before next cycle)
|
||||
| # | Source | Location | Category | Description | Suggested Fix |
|
||||
|---|--------|----------|----------|-------------|---------------|
|
||||
| 1 | guardian | src/auth/handler.ts:48 | security | Empty string bypasses validation | Add length check |
|
||||
| 2 | trickster | src/api/parse.ts:92 | reliability | Null input causes crash | Guard with null check |
|
||||
|
||||
### WARNING (should fix)
|
||||
| # | Source | Location | Category | Description | Suggested Fix |
|
||||
|---|--------|----------|----------|-------------|---------------|
|
||||
| 3 | sage | tests/auth.test.ts:15 | testing | Test names don't describe behavior | Rename to "should reject expired tokens" |
|
||||
| 4 | guardian | src/auth/handler.ts:52 | security | Missing rate limit | Add rate limiter middleware |
|
||||
|
||||
### INFO (nice to have)
|
||||
| # | Source | Location | Category | Description | Suggested Fix |
|
||||
|---|--------|----------|----------|-------------|---------------|
|
||||
| 5 | skeptic | src/auth/handler.ts:30 | design | Consider caching validated tokens | Add TTL cache |
|
||||
```
|
||||
|
||||
### Deduplication
|
||||
|
||||
Before listing findings, deduplicate across reviewers (same rule as `check-phase`):
|
||||
- Same file + same category + similar description = one finding
|
||||
- Use the higher severity
|
||||
- Credit all sources: `guardian + skeptic`
|
||||
- Don't double-count in severity tallies
|
||||
|
||||
### Cross-Cycle Tracking
|
||||
|
||||
Compare against prior cycle findings (if cycle > 1):
|
||||
- **Resolved:** Finding from cycle N-1 no longer present → mark resolved, do not re-raise
|
||||
- **Persisting:** Same location + category still present → increment `cycle_count`
|
||||
- **New:** Finding not seen before → add with `cycle_count: 1`
|
||||
|
||||
If a finding persists for 2+ consecutive cycles, flag for user escalation (see Step 5).
|
||||
|
||||
---
|
||||
|
||||
## Step 2: Fix Routing
|
||||
|
||||
Not all findings are fixed the same way. Route each finding based on its nature:
|
||||
|
||||
| Category | Fix Route | Rationale |
|
||||
|----------|-----------|-----------|
|
||||
| `security` | Spawn Maker with targeted instructions | Security fixes need tested code changes |
|
||||
| `reliability` | Spawn Maker with targeted instructions | Same — code-level fix with test |
|
||||
| `breaking-change` | Route to Creator in next cycle | Design decision needed |
|
||||
| `design` | Route to Creator in next cycle | Architecture change, not a patch |
|
||||
| `dependency` | Spawn Maker with targeted instructions | Package update or removal |
|
||||
| `quality` | Spawn Maker or apply directly | Depends on scope (see below) |
|
||||
| `testing` | Spawn Maker with targeted instructions | Tests need to be written and run |
|
||||
| `consistency` | Apply directly or spawn Maker | Naming/style → direct. Pattern change → Maker |
|
||||
|
||||
### Direct Fix (no agent)
|
||||
|
||||
Apply directly with Edit tool when **all** of these are true:
|
||||
- The fix is mechanical (typo, naming, formatting, import order)
|
||||
- No behavioral change
|
||||
- No test update needed
|
||||
- Exactly one file affected
|
||||
|
||||
Examples: rename a variable, fix a typo in a string, reorder imports, fix indentation.
|
||||
|
||||
### Maker Fix (spawn agent)
|
||||
|
||||
Spawn a targeted Maker when the fix involves:
|
||||
- Code logic changes
|
||||
- New or modified tests
|
||||
- Multiple files
|
||||
- Any behavioral change
|
||||
|
||||
Provide the Maker with:
|
||||
1. The specific finding(s) to address (not all findings — just the routed ones)
|
||||
2. The file and line location
|
||||
3. The suggested fix from the reviewer
|
||||
4. The Maker's original branch (to apply fixes on top)
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Fix: <finding description>",
|
||||
prompt: "You are the MAKER archetype.
|
||||
Apply this fix on branch: <maker's branch>
|
||||
|
||||
Finding: <source> | <severity> | <category>
|
||||
Location: <file:line>
|
||||
Issue: <description>
|
||||
Suggested fix: <fix>
|
||||
|
||||
Rules:
|
||||
1. Fix ONLY this issue — no other changes
|
||||
2. Add/update tests if the fix changes behavior
|
||||
3. Run existing tests — nothing may break
|
||||
4. Commit with message: 'fix: <description>'
|
||||
Do NOT refactor surrounding code.",
|
||||
isolation: "worktree",
|
||||
mode: "bypassPermissions"
|
||||
)
|
||||
```
|
||||
|
||||
### Writing/Prose Fix (domain-specific)
|
||||
|
||||
For writing projects (books, stories), voice or prose findings need special context:
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Fix: voice drift in <file>",
|
||||
prompt: "You are the MAKER archetype.
|
||||
Apply this prose fix on branch: <maker's branch>
|
||||
|
||||
Finding: <source> | <severity> | <category>
|
||||
Location: <file:line>
|
||||
Issue: <description>
|
||||
|
||||
Voice profile to match: <load from .archeflow/config.yaml or project voice profile>
|
||||
|
||||
Rules:
|
||||
1. Fix the flagged passage to match the voice profile
|
||||
2. Do not rewrite surrounding paragraphs
|
||||
3. Preserve the narrative intent — only change voice/style
|
||||
4. Commit with message: 'fix: <description>'",
|
||||
isolation: "worktree",
|
||||
mode: "bypassPermissions"
|
||||
)
|
||||
```
|
||||
|
||||
### Design Fix (route to next cycle)
|
||||
|
||||
Findings that require design changes are NOT fixed in the Act phase. They become structured feedback for the Creator in the next PDCA cycle. Collect them into `act-feedback.md` (see Step 5).
|
||||
|
||||
---
|
||||
|
||||
## Step 3: Fix Application Protocol
|
||||
|
||||
Apply fixes in severity order: CRITICAL first, then WARNING, then INFO. Within the same severity, fix in file order (reduces context switching).
|
||||
|
||||
### For each fix:
|
||||
|
||||
1. **Apply the change** (direct edit or via Maker agent)
|
||||
2. **Emit `fix.applied` event:**
|
||||
```json
|
||||
{
|
||||
"type": "fix.applied",
|
||||
"phase": "act",
|
||||
"agent": "maker",
|
||||
"data": {
|
||||
"source": "guardian",
|
||||
"finding": "Empty string bypasses validation",
|
||||
"file": "src/auth/handler.ts",
|
||||
"line": 48,
|
||||
"severity": "CRITICAL",
|
||||
"before": "<old code>",
|
||||
"after": "<new code>"
|
||||
},
|
||||
"parent": [<seq of the review.verdict that found it>]
|
||||
}
|
||||
```
|
||||
3. **Targeted re-check** (if the fix is non-trivial):
|
||||
- Re-run only the reviewer that raised the finding
|
||||
- Scope the re-check to just the changed file(s)
|
||||
- If the re-check raises new findings → add them to the findings list with source `re-check:<reviewer>`
|
||||
|
||||
### Batching Maker Fixes
|
||||
|
||||
If multiple findings route to the same Maker and affect the same file or tightly coupled files, batch them into a single Maker spawn:
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Fix: 3 findings in src/auth/",
|
||||
prompt: "You are the MAKER archetype.
|
||||
Apply these fixes on branch: <maker's branch>
|
||||
|
||||
1. [CRITICAL] src/auth/handler.ts:48 — Empty string bypass → Add length check
|
||||
2. [WARNING] src/auth/handler.ts:52 — Missing rate limit → Add middleware
|
||||
3. [WARNING] tests/auth.test.ts:15 — Bad test names → Rename to behavior descriptions
|
||||
|
||||
Fix all three. Commit each as a separate commit.
|
||||
Run tests after all fixes."
|
||||
)
|
||||
```
|
||||
|
||||
Batch only within the same functional area. Don't batch unrelated fixes — the Maker loses focus.
|
||||
|
||||
---
|
||||
|
||||
## Step 4: Exit Decision
|
||||
|
||||
After all fixes are applied, evaluate exit conditions:
|
||||
|
||||
### Decision Tree
|
||||
|
||||
```
|
||||
┌─ Count remaining CRITICAL findings (including from re-checks)
|
||||
│
|
||||
├─ CRITICAL = 0 AND completion criteria met (if defined)
|
||||
│ └─ EXIT: Proceed to merge
|
||||
│
|
||||
├─ CRITICAL = 0 AND completion criteria NOT met
|
||||
│ └─ CYCLE: Feed back "completion criteria failing" to Creator
|
||||
│
|
||||
├─ CRITICAL > 0 AND cycles_remaining > 0
|
||||
│ └─ CYCLE: Build feedback, go to Plan phase
|
||||
│
|
||||
├─ CRITICAL > 0 AND cycles_remaining = 0
|
||||
│ └─ STOP: Report to user with unresolved findings
|
||||
│
|
||||
└─ Same CRITICAL finding persisted 2+ cycles
|
||||
└─ ESCALATE: Stop and ask user for guidance
|
||||
```
|
||||
|
||||
### Emit `cycle.boundary` event:
|
||||
```json
|
||||
{
|
||||
"type": "cycle.boundary",
|
||||
"phase": "act",
|
||||
"data": {
|
||||
"cycle": 1,
|
||||
"max_cycles": 2,
|
||||
"exit_condition": "all_approved",
|
||||
"met": false,
|
||||
"critical_remaining": 1,
|
||||
"warning_remaining": 2,
|
||||
"info_remaining": 1,
|
||||
"fixes_applied": 3,
|
||||
"design_issues_forwarded": 1,
|
||||
"next_action": "cycle"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5: Cycle Feedback Protocol
|
||||
|
||||
When cycling back, produce `act-feedback.md` as a structured handoff. This replaces dumping raw findings.
|
||||
|
||||
```markdown
|
||||
## Cycle N Feedback → Cycle N+1
|
||||
|
||||
### For Creator (design changes needed)
|
||||
| # | Source | Severity | Category | Issue | Cycles Open |
|
||||
|---|--------|----------|----------|-------|-------------|
|
||||
| 1 | guardian | CRITICAL | security | SQL injection in user input | 1 |
|
||||
| 2 | skeptic | WARNING | design | Assumes single-tenant only | 1 |
|
||||
|
||||
### For Maker (implementation fixes needed)
|
||||
| # | Source | Severity | Category | Issue | Cycles Open |
|
||||
|---|--------|----------|----------|-------|-------------|
|
||||
| 3 | sage | WARNING | testing | Test assertions too weak | 1 |
|
||||
| 4 | trickster | WARNING | reliability | Error path not tested | 1 |
|
||||
|
||||
### Resolved in This Cycle
|
||||
| # | Source | Issue | How Resolved |
|
||||
|---|--------|-------|--------------|
|
||||
| 5 | guardian | Missing rate limit | Added rate limiter middleware (commit abc123) |
|
||||
| 6 | sage | Test names unclear | Renamed to behavior descriptions (commit def456) |
|
||||
|
||||
### Persisting Issues (escalation candidates)
|
||||
| # | Source | Issue | Cycles Open | Action |
|
||||
|---|--------|-------|-------------|--------|
|
||||
| — | — | — | — | — |
|
||||
```
|
||||
|
||||
**Routing rules** (same as orchestration skill, repeated here for self-containment):
|
||||
|
||||
| Finding Source | Routes to | When |
|
||||
|----------------|-----------|------|
|
||||
| Guardian (security, breaking-change) | Creator | Design must change |
|
||||
| Skeptic (design, scalability) | Creator | Assumptions need revision |
|
||||
| Sage (quality, consistency) | Maker | Implementation refinement |
|
||||
| Sage (design) | Creator | If it's an architectural concern |
|
||||
| Trickster (reliability) | Creator | If root cause is a design flaw |
|
||||
| Trickster (testing) | Maker | If root cause is a test gap |
|
||||
|
||||
When in doubt about routing: if the fix requires changing the approach, route to Creator. If the fix requires changing the code within the existing approach, route to Maker.
|
||||
|
||||
---
|
||||
|
||||
## Step 6: Incremental Runs
|
||||
|
||||
Support starting the orchestration from any phase by reusing existing artifacts.
|
||||
|
||||
### `--start-from check`
|
||||
|
||||
Re-run Check + Act on existing Do artifacts:
|
||||
1. Read `.archeflow/artifacts/<run_id>/` for Maker branch and implementation summary
|
||||
2. Verify the Maker branch still exists (`git branch --list`)
|
||||
3. Spawn reviewers against the existing branch
|
||||
4. Proceed through Act phase normally
|
||||
|
||||
### `--start-from act`
|
||||
|
||||
Re-run Act with existing Check findings:
|
||||
1. Read `.archeflow/artifacts/<run_id>/` for Check phase consolidated output
|
||||
2. Parse findings from the stored reviewer outputs
|
||||
3. Skip finding collection (already done) — proceed from Step 2 (Fix Routing)
|
||||
|
||||
### `--start-from do`
|
||||
|
||||
Re-run Do + Check + Act with existing Plan:
|
||||
1. Read `.archeflow/artifacts/<run_id>/` for Creator's proposal
|
||||
2. Verify proposal exists and is parseable
|
||||
3. Spawn Maker with the existing proposal
|
||||
4. Proceed through Check and Act normally
|
||||
|
||||
### Artifact Verification
|
||||
|
||||
Before starting from a mid-point, verify required artifacts exist:
|
||||
|
||||
```
|
||||
--start-from do → needs: proposal (Creator output)
|
||||
--start-from check → needs: proposal + implementation (Maker branch + summary)
|
||||
--start-from act → needs: proposal + implementation + review outputs
|
||||
```
|
||||
|
||||
If artifacts are missing, report which ones and abort. Don't guess or generate placeholders.
|
||||
|
||||
### Event Continuity
|
||||
|
||||
For incremental runs, emit events with `parent` pointing to the existing artifacts' events:
|
||||
1. Read the existing `<run_id>.jsonl` to find the last `seq` number
|
||||
2. Continue sequence numbering from there
|
||||
3. Set `parent` on the first new event to point to the last event of the prior phase
|
||||
|
||||
---
|
||||
|
||||
## Act Phase Checklist (Quick Reference)
|
||||
|
||||
```
|
||||
□ Parse all reviewer outputs into consolidated findings table
|
||||
□ Deduplicate across reviewers
|
||||
□ Compare against prior cycle findings (if cycle > 1)
|
||||
□ Route each finding: direct fix / Maker / Creator feedback
|
||||
□ Apply direct fixes first (fastest)
|
||||
□ Spawn Maker(s) for code fixes (batch by file area)
|
||||
□ Emit fix.applied event for each fix
|
||||
□ Re-check non-trivial fixes with the originating reviewer
|
||||
□ Count remaining CRITICALs after all fixes
|
||||
□ Check completion criteria (if defined)
|
||||
□ Decide: exit / cycle / escalate
|
||||
□ If cycling: produce act-feedback.md with routed findings
|
||||
□ If exiting: proceed to merge (see orchestration skill Step 4)
|
||||
□ Emit cycle.boundary event
|
||||
```
|
||||
285
skills/artifact-routing/SKILL.md
Normal file
285
skills/artifact-routing/SKILL.md
Normal file
@@ -0,0 +1,285 @@
|
||||
---
|
||||
name: artifact-routing
|
||||
description: |
|
||||
Inter-phase artifact protocol for ArcheFlow runs. Defines how artifacts are named, stored,
|
||||
routed between agents, and archived across PDCA cycles. Ensures each agent receives exactly
|
||||
the context it needs — no more, no less.
|
||||
<example>Automatically loaded by archeflow:run</example>
|
||||
<example>User: "What does the Maker receive as context?"</example>
|
||||
---
|
||||
|
||||
# Artifact Routing — Inter-Phase Context Protocol
|
||||
|
||||
Every ArcheFlow run produces artifacts — research notes, proposals, diffs, reviews, feedback. This skill defines how those artifacts are named, where they live, what each agent receives, and how they are preserved across cycles.
|
||||
|
||||
## Artifact Directory Structure
|
||||
|
||||
```
|
||||
.archeflow/artifacts/<run_id>/
|
||||
├── plan-explorer.md # Explorer research output
|
||||
├── plan-creator.md # Creator proposal/outline
|
||||
├── do-maker.md # Maker implementation summary
|
||||
├── do-maker-files.txt # List of files created/modified (one path per line)
|
||||
├── check-guardian.md # Guardian review verdict + findings
|
||||
├── check-sage.md # Sage review (if present)
|
||||
├── check-skeptic.md # Skeptic review (if present)
|
||||
├── check-trickster.md # Trickster review (if present)
|
||||
├── act-feedback.md # Structured feedback for next cycle (Cycle Feedback Protocol)
|
||||
├── act-fixes.jsonl # Applied fixes log (one JSON line per fix)
|
||||
├── cycle-1/ # Archived artifacts from cycle 1
|
||||
│ ├── plan-explorer.md
|
||||
│ ├── plan-creator.md
|
||||
│ ├── do-maker.md
|
||||
│ ├── do-maker-files.txt
|
||||
│ ├── check-guardian.md
|
||||
│ ├── check-sage.md
|
||||
│ └── act-feedback.md
|
||||
└── cycle-2/ # Archived artifacts from cycle 2 (if cycle 3 starts)
|
||||
└── ...
|
||||
```
|
||||
|
||||
## Naming Convention
|
||||
|
||||
Artifacts follow the pattern: `<phase>-<agent>.<ext>`
|
||||
|
||||
| Phase | Agent | Filename | Format |
|
||||
|-------|-------|----------|--------|
|
||||
| plan | explorer | `plan-explorer.md` | Markdown research report |
|
||||
| plan | creator | `plan-creator.md` | Markdown proposal with confidence scores |
|
||||
| do | maker | `do-maker.md` | Markdown implementation summary |
|
||||
| do | maker | `do-maker-files.txt` | Plain text, one file path per line |
|
||||
| check | guardian | `check-guardian.md` | Markdown verdict + findings table |
|
||||
| check | sage | `check-sage.md` | Markdown verdict + findings table |
|
||||
| check | skeptic | `check-skeptic.md` | Markdown verdict + findings table |
|
||||
| check | trickster | `check-trickster.md` | Markdown verdict + findings table |
|
||||
| act | (orchestrator) | `act-feedback.md` | Structured feedback (see Cycle Feedback Protocol) |
|
||||
| act | (orchestrator) | `act-fixes.jsonl` | JSONL fix log |
|
||||
|
||||
**Rule:** Never invent new artifact names during a run. If a reviewer is skipped (A2 fast-path, reviewer profile), its artifact simply does not exist. Downstream phases check for file existence before reading.
|
||||
|
||||
---
|
||||
|
||||
## Context Injection Rules
|
||||
|
||||
Each agent receives a filtered subset of artifacts. This is the **attention filter** — it controls what context is injected into the agent's prompt.
|
||||
|
||||
### Plan Phase
|
||||
|
||||
| Agent | Receives | Does NOT receive |
|
||||
|-------|----------|-----------------|
|
||||
| **Explorer** | Task description, relevant file paths, codebase access | Prior proposals, review outputs, implementation details |
|
||||
| **Creator** (cycle 1) | Task description, `plan-explorer.md` (if exists) | Raw file contents (Explorer summarized them), git diffs |
|
||||
| **Creator** (cycle 2+) | Task description, `plan-explorer.md`, `act-feedback.md` (Creator-routed findings only) | Raw reviewer outputs, Maker-routed findings |
|
||||
|
||||
**Creator context injection template (cycle 2+):**
|
||||
```markdown
|
||||
## Task
|
||||
<task description>
|
||||
|
||||
## Research (from Explorer)
|
||||
<contents of plan-explorer.md>
|
||||
|
||||
## Feedback from Prior Cycle
|
||||
<Creator-routed section of act-feedback.md only>
|
||||
|
||||
Note: Address each unresolved issue listed above. Explain how your revised proposal resolves it.
|
||||
```
|
||||
|
||||
### Do Phase
|
||||
|
||||
| Agent | Receives | Does NOT receive |
|
||||
|-------|----------|-----------------|
|
||||
| **Maker** (cycle 1) | `plan-creator.md` (the proposal) | `plan-explorer.md`, reviewer outputs, raw task description |
|
||||
| **Maker** (cycle 2+) | `plan-creator.md`, Maker-routed findings from `act-feedback.md` | Explorer research, Guardian/Skeptic findings (those went to Creator) |
|
||||
|
||||
**Maker context injection template (cycle 2+):**
|
||||
```markdown
|
||||
## Proposal
|
||||
<contents of plan-creator.md>
|
||||
|
||||
## Implementation Feedback from Prior Cycle
|
||||
<Maker-routed section of act-feedback.md only>
|
||||
|
||||
Note: The proposal has been revised to address design-level issues. Focus on the implementation
|
||||
feedback items above (code quality, test gaps, consistency).
|
||||
```
|
||||
|
||||
**Why Maker doesn't get Explorer output:** The Creator already distilled Explorer's research into a concrete proposal. Giving Maker raw research causes scope creep and "Rogue" shadow activation.
|
||||
|
||||
### Check Phase
|
||||
|
||||
| Agent | Receives | Does NOT receive |
|
||||
|-------|----------|-----------------|
|
||||
| **Guardian** | Maker's git diff, risk section from `plan-creator.md` | Full proposal, Explorer research, other reviewer outputs |
|
||||
| **Skeptic** | `plan-creator.md` (assumptions focus) | Git diff details, Explorer research, other reviewer outputs |
|
||||
| **Sage** | `plan-creator.md`, Maker's git diff, `do-maker.md` | Explorer research, other reviewer outputs |
|
||||
| **Trickster** | Maker's git diff only | Everything else |
|
||||
|
||||
**Guardian context injection template:**
|
||||
```markdown
|
||||
## Changes to Review
|
||||
<git diff from Maker's branch>
|
||||
|
||||
## Risk Assessment (from proposal)
|
||||
<risks section extracted from plan-creator.md>
|
||||
|
||||
Review these changes for security, reliability, breaking changes, and dependency risks.
|
||||
```
|
||||
|
||||
**Skeptic context injection template:**
|
||||
```markdown
|
||||
## Proposal to Challenge
|
||||
<contents of plan-creator.md>
|
||||
|
||||
Focus on assumptions, alternatives not considered, edge cases, and scalability.
|
||||
```
|
||||
|
||||
**Sage context injection template:**
|
||||
```markdown
|
||||
## Proposal
|
||||
<contents of plan-creator.md>
|
||||
|
||||
## Implementation Summary
|
||||
<contents of do-maker.md>
|
||||
|
||||
## Changes
|
||||
<git diff from Maker's branch>
|
||||
|
||||
Evaluate code quality, test coverage, documentation, and codebase consistency.
|
||||
```
|
||||
|
||||
**Trickster context injection template:**
|
||||
```markdown
|
||||
## Changes to Attack
|
||||
<git diff from Maker's branch>
|
||||
|
||||
Try to break this. Malformed input, boundaries, concurrency, error paths, dependency failures.
|
||||
```
|
||||
|
||||
### Act Phase
|
||||
|
||||
No agents are spawned in Act. The orchestrator reads all `check-*.md` artifacts directly.
|
||||
|
||||
---
|
||||
|
||||
## Feedback Routing
|
||||
|
||||
When building `act-feedback.md` after the Check phase, route each finding to the right agent for the next cycle:
|
||||
|
||||
| Finding Source | Finding Category | Routes To | Rationale |
|
||||
|---------------|-----------------|-----------|-----------|
|
||||
| Guardian | security, breaking-change | **Creator** | Design must change |
|
||||
| Guardian | reliability, dependency | **Creator** | Architectural decision needed |
|
||||
| Skeptic | design, scalability | **Creator** | Assumptions need revision |
|
||||
| Sage | quality, consistency | **Maker** | Implementation refinement |
|
||||
| Sage | testing | **Maker** | Test gap, not design flaw |
|
||||
| Trickster | reliability (design flaw) | **Creator** | Needs redesign |
|
||||
| Trickster | reliability (test gap) | **Maker** | Needs more tests |
|
||||
| Trickster | testing | **Maker** | Edge case not covered |
|
||||
|
||||
**Ambiguous cases:** If a Trickster finding could be either a design flaw or a test gap, check: does the fix require changing the proposal's architecture/approach, or just adding a test/validation? Architecture change → Creator. Additional test → Maker.
|
||||
|
||||
### Feedback File Format
|
||||
|
||||
`act-feedback.md` is split into two sections so each agent can be given only its portion:
|
||||
|
||||
```markdown
|
||||
# Cycle <N> Feedback
|
||||
|
||||
## Creator-Routed Issues
|
||||
| # | Source | Severity | Category | Issue | Suggested Fix |
|
||||
|---|--------|----------|----------|-------|---------------|
|
||||
| 1 | Guardian | CRITICAL | security | SQL injection in user input | Add parameterized queries |
|
||||
| 2 | Skeptic | WARNING | design | Assumes single-tenant only | Add tenant isolation |
|
||||
|
||||
## Maker-Routed Issues
|
||||
| # | Source | Severity | Category | Issue | Suggested Fix |
|
||||
|---|--------|----------|----------|-------|---------------|
|
||||
| 3 | Sage | WARNING | quality | Test names don't describe behavior | Rename to describe expected outcome |
|
||||
| 4 | Sage | INFO | consistency | Import order doesn't match codebase style | Re-order imports |
|
||||
|
||||
## Resolved (from prior cycles)
|
||||
| # | Source | Issue | Resolution | Resolved In |
|
||||
|---|--------|-------|------------|-------------|
|
||||
| 1 | Guardian | Missing rate limit | Added rate limiter middleware | Cycle 1 |
|
||||
|
||||
## Convergence Warnings
|
||||
<any finding that appeared unresolved in 2+ consecutive cycles — requires user input>
|
||||
```
|
||||
|
||||
When injecting feedback into Creator's prompt, include **only** the "Creator-Routed Issues" section.
|
||||
When injecting feedback into Maker's prompt, include **only** the "Maker-Routed Issues" section.
|
||||
|
||||
---
|
||||
|
||||
## Cycle Archiving
|
||||
|
||||
When a PDCA cycle completes and a new cycle begins, archive the current artifacts so they are preserved and the working directory is clean for the next iteration.
|
||||
|
||||
### Archive Procedure
|
||||
|
||||
At the end of each cycle (before starting the next):
|
||||
|
||||
```bash
|
||||
RUN_DIR=".archeflow/artifacts/${RUN_ID}"
|
||||
ARCHIVE_DIR="${RUN_DIR}/cycle-${CYCLE}"
|
||||
|
||||
mkdir -p "$ARCHIVE_DIR"
|
||||
|
||||
# Copy all phase artifacts to archive
|
||||
cp "${RUN_DIR}"/plan-*.md "$ARCHIVE_DIR/" 2>/dev/null || true
|
||||
cp "${RUN_DIR}"/do-*.md "$ARCHIVE_DIR/" 2>/dev/null || true
|
||||
cp "${RUN_DIR}"/do-*.txt "$ARCHIVE_DIR/" 2>/dev/null || true
|
||||
cp "${RUN_DIR}"/check-*.md "$ARCHIVE_DIR/" 2>/dev/null || true
|
||||
cp "${RUN_DIR}"/act-feedback.md "$ARCHIVE_DIR/" 2>/dev/null || true
|
||||
```
|
||||
|
||||
**Do NOT delete** the working-level artifacts after archiving. The next cycle's agents need `act-feedback.md` and `plan-explorer.md` (Explorer cache may reuse prior research). Old artifacts in the working directory get overwritten when the new cycle's agents produce their outputs.
|
||||
|
||||
### Archive Access
|
||||
|
||||
Archived artifacts are read-only references. Use them for:
|
||||
- **Resolution tracking:** Compare `cycle-1/check-guardian.md` findings against `cycle-2/check-guardian.md` to detect resolved/persisting issues
|
||||
- **Convergence detection:** Same finding in `cycle-N/act-feedback.md` and `cycle-N+1/act-feedback.md` → escalate to user
|
||||
- **Post-hoc analysis:** Understanding how a solution evolved across cycles
|
||||
|
||||
---
|
||||
|
||||
## Artifact Existence Checks
|
||||
|
||||
Before injecting an artifact into an agent's context, always check if the file exists. Missing artifacts are expected in certain workflows:
|
||||
|
||||
| Artifact | Missing when |
|
||||
|----------|-------------|
|
||||
| `plan-explorer.md` | Fast workflow (no Explorer) |
|
||||
| `check-skeptic.md` | Fast workflow, or A2 fast-path taken |
|
||||
| `check-sage.md` | Fast workflow, or A2 fast-path taken |
|
||||
| `check-trickster.md` | Non-thorough workflow, or A2 fast-path taken |
|
||||
| `act-feedback.md` | Cycle 1 (no prior feedback exists) |
|
||||
| `act-fixes.jsonl` | Cycle 1, or no fixes applied |
|
||||
|
||||
**Rule:** Never fail because an optional artifact is missing. Check existence, skip injection if absent, and note what was skipped in the event data.
|
||||
|
||||
---
|
||||
|
||||
## Git Diff as Artifact
|
||||
|
||||
The Maker's git diff is not saved as a file — it is generated on-the-fly from the Maker's worktree branch:
|
||||
|
||||
```bash
|
||||
git diff main...<maker-branch>
|
||||
```
|
||||
|
||||
This ensures reviewers always see the actual current diff, not a stale snapshot. The diff is injected directly into reviewer prompts, not saved to disk.
|
||||
|
||||
Exception: `do-maker-files.txt` IS saved to disk (just the file list, not the full diff) for quick reference by the orchestrator and for archiving purposes.
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Minimal context per agent.** Each agent gets only what it needs. Over-injection causes distraction, shadow activation, and wasted tokens.
|
||||
2. **Artifacts are the handoff mechanism.** Agents never communicate directly. All inter-agent data flows through saved artifacts.
|
||||
3. **Files over memory.** Everything is on disk. If a session crashes, artifacts survive. A `--start-from` resume reads artifacts, not session state.
|
||||
4. **Overwrite, don't accumulate.** Working-level artifacts get overwritten each cycle. Archives preserve history. This keeps the working directory simple.
|
||||
5. **Check before inject.** Always verify artifact existence. Gracefully handle missing optional artifacts.
|
||||
327
skills/cost-tracking/SKILL.md
Normal file
327
skills/cost-tracking/SKILL.md
Normal file
@@ -0,0 +1,327 @@
|
||||
---
|
||||
name: cost-tracking
|
||||
description: |
|
||||
Cost aggregation, budget enforcement, and model selection for ArcheFlow orchestrations.
|
||||
Tracks per-agent and per-run token costs, enforces budgets, and recommends the cheapest
|
||||
model that meets quality requirements per archetype and domain.
|
||||
<example>User: "How much did that orchestration cost?"</example>
|
||||
<example>Automatically active when budget is configured</example>
|
||||
---
|
||||
|
||||
# Cost Tracking — Budget-Aware Orchestration
|
||||
|
||||
Every ArcheFlow orchestration consumes LLM tokens. This skill tracks costs per agent and per run, enforces budgets, and recommends cost-optimal model assignments.
|
||||
|
||||
## Model Pricing Table
|
||||
|
||||
Current pricing (update when models change):
|
||||
|
||||
| Model | Input ($/M tokens) | Output ($/M tokens) | Notes |
|
||||
|-------|--------------------:|---------------------:|-------|
|
||||
| `claude-opus-4-6` | 15.00 | 75.00 | Highest quality, use sparingly |
|
||||
| `claude-sonnet-4-6` | 3.00 | 15.00 | Good balance of quality and cost |
|
||||
| `claude-haiku-4-5` | 0.80 | 4.00 | Cheap, fast, good for structured tasks |
|
||||
|
||||
**Prompt caching** (when applicable): 90% discount on cached input tokens. The orchestrator should structure system prompts to maximize cache hits (archetype instructions, voice profiles, and domain context are cache-friendly since they repeat across agents in a run).
|
||||
|
||||
**Batches API**: 50% discount on all tokens. Use for non-time-sensitive bulk operations (validation passes, consistency checks).
|
||||
|
||||
## Per-Agent Cost Tracking
|
||||
|
||||
Every `agent.complete` event includes cost data:
|
||||
|
||||
```jsonl
|
||||
{
|
||||
"type": "agent.complete",
|
||||
"data": {
|
||||
"archetype": "story-explorer",
|
||||
"duration_ms": 87605,
|
||||
"tokens_input": 15000,
|
||||
"tokens_output": 6000,
|
||||
"tokens_cache_read": 8000,
|
||||
"model": "haiku",
|
||||
"estimated_cost_usd": 0.02,
|
||||
"summary": "3 plot directions developed, recommended C"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Cost Calculation
|
||||
|
||||
```
|
||||
cost = (tokens_input - tokens_cache_read) * input_price / 1_000_000
|
||||
+ tokens_cache_read * input_price * 0.10 / 1_000_000
|
||||
+ tokens_output * output_price / 1_000_000
|
||||
```
|
||||
|
||||
If exact token counts are unavailable (Claude Code doesn't always expose them), estimate based on character count:
|
||||
|
||||
```
|
||||
estimated_tokens = character_count / 4 # rough heuristic
|
||||
```
|
||||
|
||||
Mark estimated costs with `"cost_estimated": true` in the event data so reports can distinguish measured from estimated values.
|
||||
|
||||
## Run-Level Aggregation
|
||||
|
||||
The `run.complete` event includes cost totals:
|
||||
|
||||
```jsonl
|
||||
{
|
||||
"type": "run.complete",
|
||||
"data": {
|
||||
"status": "completed",
|
||||
"total_tokens_input": 95000,
|
||||
"total_tokens_output": 33000,
|
||||
"total_tokens_cache_read": 42000,
|
||||
"total_cost_usd": 1.45,
|
||||
"budget_usd": 10.00,
|
||||
"budget_remaining_usd": 8.55,
|
||||
"agents_total": 5,
|
||||
"cost_by_phase": {
|
||||
"plan": 0.35,
|
||||
"do": 0.72,
|
||||
"check": 0.38
|
||||
},
|
||||
"cost_by_model": {
|
||||
"haiku": 0.12,
|
||||
"sonnet": 1.33
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Cost Summary in Orchestration Report
|
||||
|
||||
After each orchestration, the report includes a cost section:
|
||||
|
||||
```markdown
|
||||
## Cost Summary
|
||||
| Phase | Model(s) | Tokens (in/out) | Cost |
|
||||
|-------|----------|-----------------|------|
|
||||
| Plan | haiku, sonnet | 32k / 12k | $0.35 |
|
||||
| Do | sonnet | 40k / 15k | $0.72 |
|
||||
| Check | haiku, sonnet | 23k / 6k | $0.38 |
|
||||
| **Total** | | **95k / 33k** | **$1.45** |
|
||||
|
||||
Budget: $10.00 | Spent: $1.45 | Remaining: $8.55
|
||||
```
|
||||
|
||||
## Budget Configuration
|
||||
|
||||
Budgets are defined in team presets or `.archeflow/config.yaml`:
|
||||
|
||||
```yaml
|
||||
# .archeflow/config.yaml
|
||||
budget:
|
||||
per_run_usd: 10.00 # Max cost per orchestration run
|
||||
per_agent_usd: 3.00 # Max cost per individual agent
|
||||
daily_usd: 50.00 # Max daily spend across all runs
|
||||
warn_at_percent: 75 # Warn when this % of budget is consumed
|
||||
```
|
||||
|
||||
```yaml
|
||||
# Team preset override
|
||||
name: story-development
|
||||
domain: writing
|
||||
budget:
|
||||
per_run_usd: 5.00 # Writing runs are usually cheaper
|
||||
```
|
||||
|
||||
Team preset budget overrides the global config for that run.
|
||||
|
||||
### Budget Precedence
|
||||
|
||||
1. Team preset `budget` (if set)
|
||||
2. `.archeflow/config.yaml` `budget`
|
||||
3. No budget (unlimited) — costs are still tracked but not enforced
|
||||
|
||||
## Budget Enforcement
|
||||
|
||||
Budget checks happen at two points:
|
||||
|
||||
### 1. Pre-Agent Check (before spawning)
|
||||
|
||||
Before each agent is spawned, estimate its cost and check against remaining budget:
|
||||
|
||||
```
|
||||
estimated_agent_cost = estimate_tokens(archetype, task_complexity) * model_price
|
||||
remaining_budget = budget - sum(costs_so_far)
|
||||
|
||||
if estimated_agent_cost > remaining_budget:
|
||||
WARN: "Estimated cost for {archetype} (${estimated}) would exceed remaining budget (${remaining}). Continue? [y/N]"
|
||||
```
|
||||
|
||||
**In autonomous mode**: if budget would be exceeded, STOP the run and report. Do not prompt — there is no one to answer.
|
||||
|
||||
**In attended mode**: warn and ask the user. They can approve the overage or stop.
|
||||
|
||||
### 2. Post-Agent Check (after completion)
|
||||
|
||||
After each agent completes, update the running total and check:
|
||||
|
||||
```
|
||||
if total_cost > budget * warn_at_percent / 100:
|
||||
WARN: "Budget ${warn_at_percent}% consumed (${total_cost} of ${budget})"
|
||||
|
||||
if total_cost > budget:
|
||||
STOP: "Budget exceeded (${total_cost} of ${budget}). Run halted."
|
||||
```
|
||||
|
||||
### Pre-Agent Cost Estimation
|
||||
|
||||
Rough token estimates by archetype (calibrate over time with actual data from `metrics.jsonl`):
|
||||
|
||||
| Archetype | Typical Input | Typical Output | Notes |
|
||||
|-----------|-------------:|---------------:|-------|
|
||||
| Explorer | 8k | 4k | Research, reads many files |
|
||||
| Creator | 12k | 6k | Receives Explorer output, produces plan |
|
||||
| Maker | 15k | 12k | Largest output (implementation/prose) |
|
||||
| Guardian | 10k | 3k | Reads diff, structured output |
|
||||
| Skeptic | 8k | 3k | Reads proposal, structured challenges |
|
||||
| Sage | 12k | 4k | Reads diff + proposal |
|
||||
| Trickster | 8k | 4k | Reads diff, generates test cases |
|
||||
|
||||
These are starting estimates. After 10+ runs, use actual averages from `metrics.jsonl` instead.
|
||||
|
||||
## Cost-Aware Model Selection
|
||||
|
||||
Each archetype has a recommended model tier based on the quality requirements of its role:
|
||||
|
||||
### Default Model Assignments (Code Domain)
|
||||
|
||||
| Archetype | Model | Rationale |
|
||||
|-----------|-------|-----------|
|
||||
| Explorer | haiku | Research is structured extraction — cheap model handles it well |
|
||||
| Creator | sonnet | Design decisions need reasoning quality |
|
||||
| Maker | sonnet | Implementation needs quality to avoid rework cycles |
|
||||
| Guardian | haiku | Security/risk review is checklist-driven — structured and cheap |
|
||||
| Skeptic | haiku | Challenge generation follows patterns — cheap |
|
||||
| Sage | sonnet | Holistic quality judgment needs nuance |
|
||||
| Trickster | haiku | Adversarial testing is systematic — cheap |
|
||||
|
||||
### Writing Domain Overrides
|
||||
|
||||
Writing tasks need higher quality for prose-generating agents:
|
||||
|
||||
| Archetype | Model | Rationale |
|
||||
|-----------|-------|-----------|
|
||||
| Explorer / story-explorer | haiku | Research is still cheap |
|
||||
| Creator | sonnet | Outline design needs narrative judgment |
|
||||
| Maker | **sonnet** | Prose quality is the product — cannot be cheap |
|
||||
| Guardian | haiku | Plot/continuity checks are structured |
|
||||
| Skeptic | haiku | Premise challenges are structured |
|
||||
| Sage / story-sage | **sonnet** | Voice and craft judgment need taste |
|
||||
| Trickster | haiku | Reader-confusion analysis is systematic |
|
||||
|
||||
**When to escalate to opus**: Only for final-pass prose polishing on high-stakes content (book manuscripts, not short stories). Never for review or research agents. The user must explicitly opt in via:
|
||||
|
||||
```yaml
|
||||
# Team preset
|
||||
model_overrides:
|
||||
maker: opus # Only for final polish pass
|
||||
```
|
||||
|
||||
### Domain-Driven Model Selection
|
||||
|
||||
The effective model for each agent is resolved in this order:
|
||||
|
||||
1. **Team preset `model_overrides`** (highest priority — explicit choice)
|
||||
2. **Domain `model_overrides`** (from `.archeflow/domains/<name>.yaml`)
|
||||
3. **Archetype default** (from the table above)
|
||||
4. **Custom archetype `model` field** (from archetype YAML frontmatter)
|
||||
|
||||
Example resolution for `story-sage` in a writing run:
|
||||
- Team preset says nothing about story-sage → skip
|
||||
- Writing domain says `story-sage: sonnet` → **use sonnet**
|
||||
- Archetype YAML says `model: sonnet` → would have been used if domain didn't specify
|
||||
|
||||
## Cost Optimization Strategies
|
||||
|
||||
### 1. Prompt Caching
|
||||
|
||||
Structure prompts so that stable content comes first (maximizes cache prefix hits):
|
||||
|
||||
```
|
||||
[System prompt — archetype instructions] ← cached across agents in same run
|
||||
[Domain context — voice profile, persona] ← cached across agents in same run
|
||||
[Phase context — Explorer output, proposal] ← changes per agent
|
||||
[Task-specific instructions] ← changes per agent
|
||||
```
|
||||
|
||||
Estimated savings: 30-50% on input tokens for runs with 5+ agents.
|
||||
|
||||
### 2. Guardian Fast-Path (A2)
|
||||
|
||||
When Guardian approves with 0 issues, skip Skeptic/Sage/Trickster. This saves 2-3 agent calls per cycle. See `archeflow:orchestration` skill, rule A2.
|
||||
|
||||
Typical savings: $0.30-0.80 per skipped cycle (depending on models).
|
||||
|
||||
### 3. Explorer Cache
|
||||
|
||||
Reuse recent Explorer research instead of re-running. See `archeflow:orchestration` skill, Explorer Cache section.
|
||||
|
||||
Typical savings: $0.02-0.05 per cache hit (haiku Explorer).
|
||||
|
||||
### 4. Batches API for Bulk Operations
|
||||
|
||||
When running consistency checks, validation passes, or other non-time-sensitive work across multiple files, use the Batches API (50% discount):
|
||||
|
||||
```yaml
|
||||
# Mark agents as batch-eligible in team presets
|
||||
batch_eligible:
|
||||
- guardian # Structured review, can wait
|
||||
- skeptic # Challenge generation, can wait
|
||||
```
|
||||
|
||||
Only use batches when the user is not waiting for real-time results (overnight runs, autonomous mode).
|
||||
|
||||
### 5. Early Termination
|
||||
|
||||
If the first cycle produces a clean Guardian pass (A2 fast-path) AND the Maker's self-review checklist is clean, skip the remaining cycles even if `max_cycles > 1`. This avoids spending tokens on unnecessary verification.
|
||||
|
||||
## Daily Cost Tracking
|
||||
|
||||
Across runs, maintain a daily cost ledger:
|
||||
|
||||
```
|
||||
.archeflow/costs/<YYYY-MM-DD>.jsonl
|
||||
```
|
||||
|
||||
Each line is one run's cost summary:
|
||||
|
||||
```jsonl
|
||||
{"run_id":"2026-04-03-der-huster","cost_usd":1.45,"tokens_input":95000,"tokens_output":33000,"models":{"haiku":2,"sonnet":3},"domain":"writing"}
|
||||
{"run_id":"2026-04-03-auth-refactor","cost_usd":2.10,"tokens_input":120000,"tokens_output":45000,"models":{"haiku":3,"sonnet":2},"domain":"code"}
|
||||
```
|
||||
|
||||
Daily budget enforcement reads this file to check `daily_usd` limits before starting new runs.
|
||||
|
||||
### Cost Report Command
|
||||
|
||||
```bash
|
||||
# Show today's costs
|
||||
./lib/archeflow-costs.sh today
|
||||
|
||||
# Show costs for a date range
|
||||
./lib/archeflow-costs.sh 2026-04-01 2026-04-03
|
||||
|
||||
# Show costs for a specific run
|
||||
./lib/archeflow-costs.sh run 2026-04-03-der-huster
|
||||
```
|
||||
|
||||
## Integration with Other Skills
|
||||
|
||||
- **`orchestration`**: Calls pre-agent and post-agent budget checks. Includes cost summary in orchestration report.
|
||||
- **`process-log`**: Cost data is embedded in `agent.complete` and `run.complete` events. No separate cost events needed.
|
||||
- **`domains`**: Reads `model_overrides` from the active domain to determine effective model per agent.
|
||||
- **`autonomous-mode`**: Enforces budget strictly (no prompts — just stop on budget exceeded). Uses daily budget to limit overnight spend.
|
||||
- **`workflow-design`**: Custom workflows can specify per-phase model assignments that override domain defaults.
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Track always, enforce optionally.** Cost data is in every event regardless of whether a budget is set. Budget enforcement is opt-in.
|
||||
2. **Estimate before spend.** Always estimate before spawning an agent. Surprises are worse than slightly inaccurate estimates.
|
||||
3. **Cheapest model that works.** Default to haiku. Upgrade to sonnet only when the task demonstrably needs it. Opus is user-opt-in only.
|
||||
4. **Transparent.** Every cost shows up in the orchestration report. No hidden token spend.
|
||||
5. **Learn from history.** After enough runs, replace estimates with actual averages from `metrics.jsonl`.
|
||||
372
skills/domains/SKILL.md
Normal file
372
skills/domains/SKILL.md
Normal file
@@ -0,0 +1,372 @@
|
||||
---
|
||||
name: domains
|
||||
description: |
|
||||
Domain adapter system that maps ArcheFlow concepts (code-oriented by default) to domain-specific
|
||||
equivalents. Enables writing, research, and other non-code workflows to use the same PDCA pipeline
|
||||
with domain-appropriate terminology, metrics, review focus, and context injection.
|
||||
<example>User: "Use ArcheFlow for my short story"</example>
|
||||
<example>Automatically loaded when colette.yaml is detected</example>
|
||||
---
|
||||
|
||||
# Domain Adapter System
|
||||
|
||||
ArcheFlow's PDCA pipeline and archetype system are domain-agnostic. This skill defines how to adapt them to specific domains (writing, code, research, etc.) so that events, metrics, reviews, and context use terminology that makes sense for the work being done.
|
||||
|
||||
## Domain Registry
|
||||
|
||||
Domain definitions live in `.archeflow/domains/<name>.yaml`. Each domain maps ArcheFlow's generic concepts to domain-specific equivalents and configures what metrics to track, what reviewers should focus on, and what context agents need.
|
||||
|
||||
### Writing Domain
|
||||
|
||||
```yaml
|
||||
# .archeflow/domains/writing.yaml
|
||||
name: writing
|
||||
description: "Creative writing — stories, novels, non-fiction"
|
||||
|
||||
# Concept mapping — how generic ArcheFlow terms translate
|
||||
concepts:
|
||||
implementation: "draft/prose"
|
||||
tests: "consistency checks"
|
||||
files_changed: "word count delta"
|
||||
test_coverage: "voice drift score"
|
||||
code_review: "prose review"
|
||||
build: "compile/export"
|
||||
deploy: "publish"
|
||||
refactor: "revision"
|
||||
bug: "continuity error"
|
||||
feature: "scene/chapter"
|
||||
PR: "manuscript submission"
|
||||
|
||||
# Metrics — what to track instead of lines/files/tests
|
||||
metrics:
|
||||
- word_count
|
||||
- voice_drift_score
|
||||
- dialect_density
|
||||
- essen_count # Giesing Gschichten rule: food in every scene
|
||||
- scene_count
|
||||
- dialogue_ratio
|
||||
|
||||
# Review focus areas — override default Guardian/Sage lenses
|
||||
review_focus:
|
||||
guardian:
|
||||
- plot_coherence
|
||||
- character_consistency
|
||||
- timeline_accuracy
|
||||
- continuity
|
||||
sage:
|
||||
- voice_consistency
|
||||
- prose_quality
|
||||
- dialect_authenticity
|
||||
- forbidden_pattern_violations
|
||||
skeptic:
|
||||
- premise_strength
|
||||
- character_motivation
|
||||
- ending_satisfaction
|
||||
trickster:
|
||||
- reader_confusion_points
|
||||
- pacing_dead_spots
|
||||
- suspension_of_disbelief_breaks
|
||||
|
||||
# Context injection — what extra files agents should read per phase
|
||||
context:
|
||||
always:
|
||||
- "voice profile YAML (profiles/*.yaml)"
|
||||
- "persona YAML (personas/*.yaml)"
|
||||
- "character sheets (characters/*.yaml)"
|
||||
plan_phase:
|
||||
- "series config (colette.yaml)"
|
||||
- "previous stories (if series, for continuity)"
|
||||
- "story brief / premise"
|
||||
do_phase:
|
||||
- "scene outline from Creator"
|
||||
- "voice profile (for style reference)"
|
||||
check_phase:
|
||||
- "voice profile (for Sage drift scoring)"
|
||||
- "outline (for Guardian coherence check)"
|
||||
- "character sheets (for consistency)"
|
||||
|
||||
# Model preferences — domain-specific overrides
|
||||
model_overrides:
|
||||
maker: sonnet # Prose quality matters more than for code
|
||||
story-sage: sonnet # Needs taste for voice evaluation
|
||||
```
|
||||
|
||||
### Code Domain (Default)
|
||||
|
||||
```yaml
|
||||
# .archeflow/domains/code.yaml
|
||||
name: code
|
||||
description: "Software development — applications, libraries, infrastructure"
|
||||
|
||||
concepts:
|
||||
implementation: "code changes"
|
||||
tests: "automated tests"
|
||||
files_changed: "files changed"
|
||||
test_coverage: "test coverage %"
|
||||
code_review: "code review"
|
||||
build: "build/compile"
|
||||
deploy: "deploy"
|
||||
refactor: "refactor"
|
||||
bug: "bug"
|
||||
feature: "feature"
|
||||
PR: "pull request"
|
||||
|
||||
metrics:
|
||||
- files_changed
|
||||
- lines_added
|
||||
- lines_removed
|
||||
- tests_added
|
||||
- tests_passing
|
||||
- coverage_delta
|
||||
|
||||
review_focus:
|
||||
guardian:
|
||||
- security_vulnerabilities
|
||||
- breaking_changes
|
||||
- dependency_risks
|
||||
- error_handling
|
||||
sage:
|
||||
- code_quality
|
||||
- test_coverage
|
||||
- documentation
|
||||
- pattern_consistency
|
||||
skeptic:
|
||||
- design_assumptions
|
||||
- scalability
|
||||
- alternative_approaches
|
||||
- edge_cases
|
||||
trickster:
|
||||
- malformed_input
|
||||
- concurrency_races
|
||||
- error_path_exploitation
|
||||
- dependency_failures
|
||||
|
||||
context:
|
||||
always:
|
||||
- "README.md"
|
||||
- ".archeflow/config.yaml"
|
||||
plan_phase:
|
||||
- "relevant source files (Explorer identifies)"
|
||||
- "existing tests for affected area"
|
||||
do_phase:
|
||||
- "Creator's proposal"
|
||||
- "test fixtures and helpers"
|
||||
check_phase:
|
||||
- "git diff from Maker"
|
||||
- "proposal risk section"
|
||||
|
||||
model_overrides: {}
|
||||
# Code domain uses default archetype model assignments
|
||||
```
|
||||
|
||||
### Research Domain (Example Extension)
|
||||
|
||||
```yaml
|
||||
# .archeflow/domains/research.yaml
|
||||
name: research
|
||||
description: "Academic or technical research — papers, analysis, literature review"
|
||||
|
||||
concepts:
|
||||
implementation: "draft/analysis"
|
||||
tests: "citation verification"
|
||||
files_changed: "section count"
|
||||
test_coverage: "source coverage"
|
||||
code_review: "peer review"
|
||||
build: "compile (LaTeX/PDF)"
|
||||
deploy: "submit/publish"
|
||||
|
||||
metrics:
|
||||
- word_count
|
||||
- citation_count
|
||||
- source_diversity
|
||||
- claim_count
|
||||
- unsupported_claims
|
||||
|
||||
review_focus:
|
||||
guardian:
|
||||
- factual_accuracy
|
||||
- citation_validity
|
||||
- logical_coherence
|
||||
- methodology_soundness
|
||||
sage:
|
||||
- argument_structure
|
||||
- prose_clarity
|
||||
- academic_tone
|
||||
- completeness
|
||||
|
||||
context:
|
||||
always:
|
||||
- "bibliography/references"
|
||||
- "research brief"
|
||||
plan_phase:
|
||||
- "prior literature notes"
|
||||
- "methodology constraints"
|
||||
check_phase:
|
||||
- "citation database"
|
||||
- "claims vs. evidence mapping"
|
||||
|
||||
model_overrides:
|
||||
maker: sonnet # Research writing needs quality
|
||||
```
|
||||
|
||||
## Domain Detection
|
||||
|
||||
ArcheFlow auto-detects the domain based on project markers. Detection runs once at `run.start` and the result is stored in the run's event stream.
|
||||
|
||||
### Detection Priority (highest first)
|
||||
|
||||
| Priority | Signal | Domain | Rationale |
|
||||
|----------|--------|--------|-----------|
|
||||
| 1 | CLI flag `--domain <name>` | as specified | Explicit override always wins |
|
||||
| 2 | Team preset has `domain: <name>` | as specified | Preset knows its domain |
|
||||
| 3 | `colette.yaml` exists in project root | `writing` | Colette is the writing platform |
|
||||
| 4 | `*.bib` or `references/` exists | `research` | Bibliography signals research |
|
||||
| 5 | `package.json` exists | `code` | Node.js project |
|
||||
| 6 | `Cargo.toml` exists | `code` | Rust project |
|
||||
| 7 | `pyproject.toml` exists | `code` | Python project |
|
||||
| 8 | `go.mod` exists | `code` | Go project |
|
||||
| 9 | `Makefile` or `CMakeLists.txt` exists | `code` | C/C++ project |
|
||||
| 10 | No markers found | `code` | Default fallback |
|
||||
|
||||
### Detection in Team Presets
|
||||
|
||||
Team presets can declare their domain explicitly:
|
||||
|
||||
```yaml
|
||||
# .archeflow/teams/story-development.yaml
|
||||
name: story-development
|
||||
domain: writing # <-- explicit domain
|
||||
description: "Kurzgeschichten-Entwicklung"
|
||||
plan: [story-explorer, creator]
|
||||
do: [maker]
|
||||
check: [guardian, story-sage]
|
||||
```
|
||||
|
||||
When `domain` is set in the preset, detection is skipped entirely.
|
||||
|
||||
### Detection Event
|
||||
|
||||
Domain detection emits a decision event:
|
||||
|
||||
```jsonl
|
||||
{"ts":"...","run_id":"...","seq":1,"parent":[],"type":"decision","phase":"init","agent":null,"data":{"what":"domain_detection","chosen":"writing","signal":"colette.yaml exists","alternatives":[{"id":"code","reason_rejected":"No code project markers found"}]}}
|
||||
```
|
||||
|
||||
## How Domains Affect Orchestration
|
||||
|
||||
### 1. Concept Translation in Reports
|
||||
|
||||
The orchestration report and session log use domain-translated terms:
|
||||
|
||||
```markdown
|
||||
# Code domain report
|
||||
- **Files changed:** 4 files, +120 -30 lines
|
||||
- **Tests added:** 8 new tests
|
||||
|
||||
# Writing domain report (same data, different framing)
|
||||
- **Word count delta:** +6004 words across 7 scenes
|
||||
- **Consistency checks:** voice drift 0.12, 2 continuity fixes applied
|
||||
```
|
||||
|
||||
### 2. Domain-Specific Event Data
|
||||
|
||||
Events include domain-relevant metrics in their `data` payload:
|
||||
|
||||
```jsonl
|
||||
// Writing domain — agent.complete
|
||||
{"type":"agent.complete","data":{"archetype":"maker","duration_ms":180000,"word_count":6004,"voice_drift":0.12,"scenes":7,"dialogue_ratio":0.35,"essen_count":4}}
|
||||
|
||||
// Code domain — agent.complete
|
||||
{"type":"agent.complete","data":{"archetype":"maker","duration_ms":90000,"files_changed":5,"tests_added":12,"coverage_delta":"+3%","lines_added":245,"lines_removed":80}}
|
||||
|
||||
// Writing domain — run.complete
|
||||
{"type":"run.complete","data":{"status":"completed","word_count":6004,"voice_drift_final":0.08,"scenes":7,"dialect_density":0.15,"cycles":1}}
|
||||
|
||||
// Code domain — run.complete
|
||||
{"type":"run.complete","data":{"status":"completed","files_changed":4,"tests_total":20,"coverage":"87%","cycles":2}}
|
||||
```
|
||||
|
||||
### 3. Review Focus Override
|
||||
|
||||
When a domain defines `review_focus`, reviewers receive domain-specific instructions instead of the defaults:
|
||||
|
||||
```
|
||||
# Without domain adapter (code defaults):
|
||||
Guardian → "Check for security vulnerabilities, breaking changes..."
|
||||
|
||||
# With writing domain adapter:
|
||||
Guardian → "Check for plot coherence, character consistency, timeline accuracy, continuity..."
|
||||
```
|
||||
|
||||
The orchestration skill reads the domain's `review_focus` and injects it into the reviewer prompt. The archetype's base personality (virtue, shadow, lens) stays the same — only the checklist changes.
|
||||
|
||||
### 4. Context Injection
|
||||
|
||||
The domain's `context` config tells the orchestrator which additional files to pass to each agent:
|
||||
|
||||
```
|
||||
# Plan phase in writing domain:
|
||||
# Orchestrator automatically includes voice profile, persona, character sheets, series config
|
||||
# alongside the standard task description and Explorer output
|
||||
|
||||
# Check phase in writing domain:
|
||||
# Guardian gets the outline (for coherence)
|
||||
# Sage gets the voice profile (for drift scoring)
|
||||
```
|
||||
|
||||
Context injection is additive — domain context is added on top of ArcheFlow's standard context rules (task description, prior phase output, etc.).
|
||||
|
||||
### 5. Model Overrides
|
||||
|
||||
If the domain specifies `model_overrides`, those override the default model assignment for the listed archetypes:
|
||||
|
||||
```
|
||||
# Default: Maker uses whatever the workflow assigns (often haiku for cheap tasks)
|
||||
# Writing domain: Maker uses sonnet (prose quality matters)
|
||||
# Research domain: Maker uses sonnet (analysis quality matters)
|
||||
```
|
||||
|
||||
Model overrides interact with cost tracking — the cost-tracking skill reads the effective model assignment (after domain overrides) for its estimates.
|
||||
|
||||
## Adding a New Domain
|
||||
|
||||
1. Create `.archeflow/domains/<name>.yaml` following the schema above
|
||||
2. Add detection signals to the priority table (or rely on `--domain` / team preset)
|
||||
3. Define custom archetypes if needed (e.g., `story-explorer` for writing)
|
||||
4. Test with `--domain <name> --dry-run` to verify detection and context injection
|
||||
|
||||
### Minimum Viable Domain
|
||||
|
||||
Only `name`, `concepts`, and `metrics` are required. Everything else has sensible defaults:
|
||||
|
||||
```yaml
|
||||
name: legal
|
||||
description: "Legal document drafting and review"
|
||||
|
||||
concepts:
|
||||
implementation: "draft"
|
||||
tests: "compliance checks"
|
||||
code_review: "legal review"
|
||||
|
||||
metrics:
|
||||
- clause_count
|
||||
- citation_count
|
||||
- compliance_score
|
||||
```
|
||||
|
||||
Missing sections fall back to the `code` domain defaults.
|
||||
|
||||
## Integration with Other Skills
|
||||
|
||||
- **`orchestration`**: Reads domain config at `run.start`, applies concept translation, context injection, model overrides, and review focus throughout the run
|
||||
- **`process-log`**: Domain-specific event data fields are included in `agent.complete` and `run.complete` payloads
|
||||
- **`cost-tracking`**: Reads `model_overrides` from the active domain to calculate accurate cost estimates
|
||||
- **`custom-archetypes`**: Domain-specific archetypes (e.g., `story-explorer`, `story-sage`) are defined per-project and referenced in team presets
|
||||
- **`workflow-design`**: Custom workflows can reference a domain explicitly
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Additive, not replacing.** Domains add context and translate terms. They do not change the PDCA cycle, archetype system, or event schema.
|
||||
2. **Graceful degradation.** If no domain config exists, everything works as before (code domain defaults).
|
||||
3. **One domain per run.** A run operates in exactly one domain. Multi-domain projects use separate runs.
|
||||
4. **Domain config is data, not code.** YAML files, no scripts. Portable across projects.
|
||||
460
skills/run/SKILL.md
Normal file
460
skills/run/SKILL.md
Normal file
@@ -0,0 +1,460 @@
|
||||
---
|
||||
name: run
|
||||
description: |
|
||||
Automated PDCA execution loop. Single-command orchestration that initializes a run, flows through
|
||||
Plan/Do/Check/Act phases, emits events at every step, saves artifacts to disk, and handles
|
||||
cycle-back with structured feedback. Use instead of manually following orchestration steps.
|
||||
<example>User: "archeflow:run"</example>
|
||||
<example>User: "Run this through ArcheFlow"</example>
|
||||
<example>User: "archeflow:run --start-from check"</example>
|
||||
<example>User: "archeflow:run --dry-run"</example>
|
||||
---
|
||||
|
||||
# ArcheFlow Run — Automated PDCA Execution Loop
|
||||
|
||||
This skill automates the full orchestration cycle. When invoked, Claude executes all PDCA phases end-to-end, emitting events and saving artifacts at every step. No manual phase-by-phase intervention needed.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Load these skills (they are referenced throughout):
|
||||
- `archeflow:orchestration` — agent prompts, workflow selection, adaptation rules
|
||||
- `archeflow:process-log` — event schema and DAG parent rules
|
||||
- `archeflow:artifact-routing` — artifact naming, context injection, cycle archiving
|
||||
|
||||
## Invocation
|
||||
|
||||
```
|
||||
archeflow:run # Full run, auto-select workflow
|
||||
archeflow:run --workflow standard # Force a specific workflow
|
||||
archeflow:run --start-from do # Resume from Do phase (requires prior artifacts)
|
||||
archeflow:run --start-from check # Resume from Check phase
|
||||
archeflow:run --dry-run # Plan phase only, show cost estimate
|
||||
archeflow:run --max-cycles 1 # Override max cycles
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution Steps
|
||||
|
||||
### 0. Initialize
|
||||
|
||||
Generate a run ID and set up the artifact directory.
|
||||
|
||||
```bash
|
||||
# Generate run_id
|
||||
RUN_ID="$(date -u +%Y-%m-%d)-<task-slug>"
|
||||
|
||||
# Create artifact directory
|
||||
mkdir -p .archeflow/artifacts/${RUN_ID}
|
||||
|
||||
# Emit run.start event (seq=1, parent=[])
|
||||
./lib/archeflow-event.sh "$RUN_ID" run.start plan "" \
|
||||
'{"task":"<task description>","workflow":"<fast|standard|thorough>","max_cycles":<N>}'
|
||||
```
|
||||
|
||||
**Track state:** Maintain these variables throughout the run:
|
||||
- `RUN_ID` — unique run identifier
|
||||
- `SEQ` — current sequence number (read from event file line count after each emit)
|
||||
- `CYCLE` — current PDCA cycle number (starts at 1)
|
||||
- `WORKFLOW` — fast/standard/thorough (may change via adaptation rules)
|
||||
- `ESCALATED` — boolean, set true if A1 triggers
|
||||
|
||||
After emitting `run.start`, record `SEQ_RUN_START=1`.
|
||||
|
||||
If `--start-from` is specified, verify that the required prior artifacts exist in `.archeflow/artifacts/${RUN_ID}/` before skipping phases. If missing, abort with an error.
|
||||
|
||||
---
|
||||
|
||||
### 1. Plan Phase
|
||||
|
||||
#### 1a. Explorer (if standard or thorough)
|
||||
|
||||
```bash
|
||||
# Emit agent.start
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.start plan explorer \
|
||||
'{"archetype":"explorer","prompt_summary":"Research codebase context for task"}' "$SEQ_RUN_START"
|
||||
```
|
||||
|
||||
Spawn the Explorer agent using the prompt from `archeflow:orchestration` Step 1.
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Explorer: research context for <task>",
|
||||
prompt: "<Explorer prompt from orchestration skill>",
|
||||
subagent_type: "Explore"
|
||||
)
|
||||
```
|
||||
|
||||
After Explorer returns:
|
||||
1. Save output to `.archeflow/artifacts/${RUN_ID}/plan-explorer.md`
|
||||
2. Emit `agent.complete`:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.complete plan explorer \
|
||||
'{"archetype":"explorer","duration_ms":<ms>,"artifacts":["plan-explorer.md"],"summary":"<1-line summary>"}' "$SEQ_EXPLORER_START"
|
||||
```
|
||||
3. Record `SEQ_EXPLORER_COMPLETE` for DAG references.
|
||||
|
||||
#### 1b. Creator
|
||||
|
||||
The Creator receives Explorer output (if it exists) or performs Mini-Reflect (fast workflow).
|
||||
|
||||
```bash
|
||||
# Emit agent.start — parent is explorer.complete (or run.start for fast)
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.start plan creator \
|
||||
'{"archetype":"creator","prompt_summary":"Design solution proposal"}' "$SEQ_EXPLORER_COMPLETE"
|
||||
```
|
||||
|
||||
Spawn the Creator agent using the prompt from `archeflow:orchestration` Step 1.
|
||||
|
||||
**Context injection (from artifact-routing skill):**
|
||||
- Fast workflow: task description only
|
||||
- Standard/thorough: task description + contents of `plan-explorer.md`
|
||||
- Cycle 2+: task description + `plan-explorer.md` + `act-feedback.md` from prior cycle
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Creator: design proposal for <task>",
|
||||
prompt: "<Creator prompt from orchestration skill, with context injected per above>",
|
||||
subagent_type: "Plan"
|
||||
)
|
||||
```
|
||||
|
||||
After Creator returns:
|
||||
1. Save output to `.archeflow/artifacts/${RUN_ID}/plan-creator.md`
|
||||
2. Emit `agent.complete`
|
||||
3. Record `SEQ_CREATOR_COMPLETE`
|
||||
|
||||
#### 1c. Confidence Gate (Adaptation Rule A3)
|
||||
|
||||
Read Creator's confidence scores from `plan-creator.md`. Apply A3 per `archeflow:orchestration`:
|
||||
- Task understanding < 0.5 → **Pause**, ask user
|
||||
- Solution completeness < 0.5 → **Upgrade** to standard, spawn Explorer
|
||||
- Risk coverage < 0.5 → **Spawn mini-Explorer** for risky area (parallel, 5 min max)
|
||||
|
||||
If A3 triggers, emit a `decision` event:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision plan "" \
|
||||
'{"what":"confidence_gate","chosen":"<action>","rationale":"<axis> scored <score>"}' "$SEQ_CREATOR_COMPLETE"
|
||||
```
|
||||
|
||||
#### 1d. Phase Transition: Plan to Do
|
||||
|
||||
```bash
|
||||
# Parent = all completing events in Plan phase
|
||||
./lib/archeflow-event.sh "$RUN_ID" phase.transition do "" \
|
||||
'{"from":"plan","to":"do","artifacts_so_far":["plan-explorer.md","plan-creator.md"]}' "$SEQ_CREATOR_COMPLETE"
|
||||
```
|
||||
|
||||
Record `SEQ_PLAN_TO_DO`.
|
||||
|
||||
---
|
||||
|
||||
### 2. Do Phase
|
||||
|
||||
#### 2a. Maker
|
||||
|
||||
**Context injection (from artifact-routing skill):**
|
||||
- Contents of `plan-creator.md` (the proposal)
|
||||
- Cycle 2+: also contents of `act-feedback.md` filtered to Maker-routed findings only
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.start do maker \
|
||||
'{"archetype":"maker","prompt_summary":"Implement proposal in isolated worktree"}' "$SEQ_PLAN_TO_DO"
|
||||
```
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Maker: implement <task>",
|
||||
prompt: "<Maker prompt from orchestration skill, with Creator proposal injected>
|
||||
<if cycle 2+: Implementation feedback: <Maker-routed findings from act-feedback.md>>",
|
||||
isolation: "worktree",
|
||||
mode: "bypassPermissions"
|
||||
)
|
||||
```
|
||||
|
||||
After Maker returns:
|
||||
1. Save implementation summary to `.archeflow/artifacts/${RUN_ID}/do-maker.md`
|
||||
2. Capture list of changed files: `git diff --name-only` on the Maker's branch, save to `.archeflow/artifacts/${RUN_ID}/do-maker-files.txt`
|
||||
3. Emit `agent.complete`:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.complete do maker \
|
||||
'{"archetype":"maker","duration_ms":<ms>,"artifacts":["do-maker.md","do-maker-files.txt"],"summary":"<files changed, tests added>"}' "$SEQ_MAKER_START"
|
||||
```
|
||||
4. Record `SEQ_MAKER_COMPLETE`
|
||||
|
||||
**Critical:** Verify the Maker committed its changes before proceeding. If uncommitted changes exist, instruct the Maker to commit.
|
||||
|
||||
#### 2b. Phase Transition: Do to Check
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" phase.transition check "" \
|
||||
'{"from":"do","to":"check","artifacts_so_far":["plan-explorer.md","plan-creator.md","do-maker.md","do-maker-files.txt"]}' "$SEQ_MAKER_COMPLETE"
|
||||
```
|
||||
|
||||
Record `SEQ_DO_TO_CHECK`.
|
||||
|
||||
---
|
||||
|
||||
### 3. Check Phase
|
||||
|
||||
**Important:** Spawn Guardian FIRST, then evaluate A2 before spawning other reviewers.
|
||||
|
||||
#### 3a. Guardian (always first)
|
||||
|
||||
**Context injection:** Maker's git diff + proposal risk section only (not full proposal, not Explorer research).
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.start check guardian \
|
||||
'{"archetype":"guardian","prompt_summary":"Security and risk review of changes"}' "$SEQ_DO_TO_CHECK"
|
||||
```
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Guardian: security review for <task>",
|
||||
prompt: "<Guardian prompt from orchestration skill, with Maker's diff injected>"
|
||||
)
|
||||
```
|
||||
|
||||
After Guardian returns:
|
||||
1. Save to `.archeflow/artifacts/${RUN_ID}/check-guardian.md`
|
||||
2. Emit `review.verdict`:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" review.verdict check guardian \
|
||||
'{"archetype":"guardian","verdict":"<approved|rejected|approved_with_fixes>","findings":[...]}' "$SEQ_GUARDIAN_START"
|
||||
```
|
||||
3. Record `SEQ_GUARDIAN_VERDICT`
|
||||
|
||||
#### 3b. Guardian Fast-Path Check (Adaptation Rule A2)
|
||||
|
||||
Parse Guardian's output. If **0 CRITICAL and 0 WARNING** AND workflow is not escalated AND not first cycle of thorough:
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision check "" \
|
||||
'{"what":"guardian_fast_path","chosen":"skip_remaining_reviewers","rationale":"0 CRITICAL, 0 WARNING"}' "$SEQ_GUARDIAN_VERDICT"
|
||||
```
|
||||
|
||||
Skip to Phase Transition (3d). Log "Guardian fast-path taken" in report.
|
||||
|
||||
Otherwise, proceed to spawn remaining reviewers.
|
||||
|
||||
#### 3c. Remaining Reviewers (in parallel)
|
||||
|
||||
Spawn these based on workflow (see `archeflow:orchestration` for which reviewers apply):
|
||||
|
||||
**Skeptic** (standard/thorough):
|
||||
- Context: Creator's proposal (assumptions section focus)
|
||||
- Save to: `check-skeptic.md`
|
||||
|
||||
**Sage** (standard/thorough):
|
||||
- Context: Creator's proposal + Maker's diff + implementation summary
|
||||
- Save to: `check-sage.md`
|
||||
|
||||
**Trickster** (thorough only):
|
||||
- Context: Maker's diff only
|
||||
- Save to: `check-trickster.md`
|
||||
|
||||
Spawn all applicable reviewers in parallel (multiple Agent calls in one message). For each:
|
||||
|
||||
```bash
|
||||
# Emit agent.start with parent = SEQ_DO_TO_CHECK
|
||||
./lib/archeflow-event.sh "$RUN_ID" agent.start check <archetype> \
|
||||
'{"archetype":"<archetype>","prompt_summary":"<review focus>"}' "$SEQ_DO_TO_CHECK"
|
||||
```
|
||||
|
||||
After each returns, emit `review.verdict` and save artifact.
|
||||
|
||||
#### 3d. Phase Transition: Check to Act
|
||||
|
||||
Collect all verdict seq numbers for the parent array.
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" phase.transition act "" \
|
||||
'{"from":"check","to":"act"}' "<all_verdict_seqs>"
|
||||
```
|
||||
|
||||
Record `SEQ_CHECK_TO_ACT`.
|
||||
|
||||
---
|
||||
|
||||
### 4. Act Phase
|
||||
|
||||
#### 4a. Collect Verdicts
|
||||
|
||||
Read all `check-*.md` artifacts. Tally findings:
|
||||
- Count CRITICAL, WARNING, INFO per reviewer
|
||||
- Check for unanimous approval
|
||||
|
||||
#### 4b. Escalation Check (Adaptation Rule A1)
|
||||
|
||||
If workflow is `fast` and Guardian found 2+ CRITICAL:
|
||||
- Set `ESCALATED=true`
|
||||
- Upgrade next cycle to `standard` (add Skeptic + Sage)
|
||||
- Emit decision event
|
||||
|
||||
#### 4c. Branch: All Approved
|
||||
|
||||
If all reviewers approved (and completion criteria met, if defined):
|
||||
|
||||
1. Emit `cycle.boundary`:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" cycle.boundary act "" \
|
||||
'{"cycle":<N>,"max_cycles":<M>,"exit_condition":"all_approved","met":true,"next_action":"complete"}' "$SEQ_CHECK_TO_ACT"
|
||||
```
|
||||
|
||||
2. Run pre-merge hooks (check `.archeflow/hooks.yaml`)
|
||||
3. Merge Maker's worktree branch: `git merge --no-ff <branch>`
|
||||
4. Run post-merge hooks + test suite
|
||||
- Tests pass → continue
|
||||
- Tests fail → auto-revert, cycle back with "integration test failure" feedback
|
||||
5. Clean up worktree
|
||||
6. Proceed to Completion (step 5)
|
||||
|
||||
#### 4d. Branch: Issues Found (cycles remaining)
|
||||
|
||||
If any reviewer rejected and `CYCLE < MAX_CYCLES`:
|
||||
|
||||
1. Build structured feedback using the Cycle Feedback Protocol from `archeflow:orchestration`:
|
||||
- Extract findings from all `check-*.md` artifacts
|
||||
- Route findings: Guardian/Skeptic issues → Creator, Sage issues → Maker
|
||||
- Check convergence: same finding in 2 consecutive cycles → escalate to user
|
||||
- Dedup cross-archetype findings
|
||||
|
||||
2. Save to `.archeflow/artifacts/${RUN_ID}/act-feedback.md`
|
||||
|
||||
3. Save applied fixes log (initially empty, populated during next Do phase):
|
||||
```bash
|
||||
touch .archeflow/artifacts/${RUN_ID}/act-fixes.jsonl
|
||||
```
|
||||
|
||||
4. Emit `cycle.boundary`:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" cycle.boundary act "" \
|
||||
'{"cycle":<N>,"max_cycles":<M>,"exit_condition":"all_approved","met":false,"next_action":"cycle_back"}' "$SEQ_CHECK_TO_ACT"
|
||||
```
|
||||
|
||||
5. Archive current cycle artifacts:
|
||||
```bash
|
||||
mkdir -p .archeflow/artifacts/${RUN_ID}/cycle-${CYCLE}
|
||||
cp .archeflow/artifacts/${RUN_ID}/plan-*.md .archeflow/artifacts/${RUN_ID}/cycle-${CYCLE}/
|
||||
cp .archeflow/artifacts/${RUN_ID}/do-*.md .archeflow/artifacts/${RUN_ID}/do-*.txt .archeflow/artifacts/${RUN_ID}/cycle-${CYCLE}/ 2>/dev/null || true
|
||||
cp .archeflow/artifacts/${RUN_ID}/check-*.md .archeflow/artifacts/${RUN_ID}/cycle-${CYCLE}/
|
||||
```
|
||||
|
||||
6. Increment `CYCLE`, go back to Step 1 (Plan Phase)
|
||||
|
||||
#### 4e. Branch: Max Cycles Reached
|
||||
|
||||
If `CYCLE >= MAX_CYCLES` and issues remain:
|
||||
|
||||
1. Report all unresolved findings to the user
|
||||
2. Present the best implementation (on its branch, not merged)
|
||||
3. Let the user decide: merge as-is, fix manually, or abandon
|
||||
4. Emit `cycle.boundary` with `"met": false, "next_action": "user_decision"`
|
||||
|
||||
---
|
||||
|
||||
### 5. Completion
|
||||
|
||||
```bash
|
||||
# Emit run.complete
|
||||
./lib/archeflow-event.sh "$RUN_ID" run.complete act "" \
|
||||
'{"status":"completed","cycles":<N>,"agents_total":<count>,"fixes_total":<count>,"shadows":0,"artifacts":[<list>]}'
|
||||
|
||||
# Generate report
|
||||
./lib/archeflow-report.sh .archeflow/events/${RUN_ID}.jsonl
|
||||
|
||||
# Update run index
|
||||
echo '{"run_id":"'$RUN_ID'","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","task":"<task>","workflow":"<wf>","status":"completed","cycles":<N>}' \
|
||||
>> .archeflow/events/index.jsonl
|
||||
```
|
||||
|
||||
Display the orchestration report to the user (see `archeflow:orchestration` report format).
|
||||
|
||||
---
|
||||
|
||||
## Fix Tracking
|
||||
|
||||
When the Maker addresses review findings in cycle 2+, emit `fix.applied` for each:
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" fix.applied act "" \
|
||||
'{"source":"<reviewer>","finding":"<description>","file":"<path>","line":<n>}' "$SEQ_OF_REVIEW"
|
||||
```
|
||||
|
||||
Also append to `.archeflow/artifacts/${RUN_ID}/act-fixes.jsonl`:
|
||||
```jsonl
|
||||
{"source":"guardian","finding":"SQL injection","file":"src/auth.ts","line":48,"fixed_in_cycle":2}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dry-Run Mode
|
||||
|
||||
When `--dry-run` is specified:
|
||||
|
||||
1. Run **only the Plan phase** (Explorer + Creator)
|
||||
2. Display:
|
||||
```
|
||||
Dry run for: "<task>"
|
||||
Workflow: <standard> (<N> cycles max)
|
||||
Agents per cycle: <count>
|
||||
Max agents total: <count * cycles>
|
||||
Plan phase result: see .archeflow/artifacts/<run_id>/plan-creator.md
|
||||
Creator confidence: <scores>
|
||||
Estimated phases: Plan (done) -> Do -> Check -> Act
|
||||
Proceed with full run? [y/n]
|
||||
```
|
||||
3. Do NOT emit `run.complete` — the run is paused, not finished
|
||||
4. If user says yes, continue from `--start-from do` using the saved artifacts
|
||||
|
||||
---
|
||||
|
||||
## Start-From Mode
|
||||
|
||||
When `--start-from <phase>` is specified:
|
||||
|
||||
| Start from | Required artifacts in `.archeflow/artifacts/<run_id>/` |
|
||||
|------------|-------------------------------------------------------|
|
||||
| `plan` | None (equivalent to full run) |
|
||||
| `do` | `plan-creator.md` |
|
||||
| `check` | `plan-creator.md`, `do-maker.md`, `do-maker-files.txt` |
|
||||
| `act` | All `check-*.md` files |
|
||||
|
||||
Validate required artifacts exist. If missing, error:
|
||||
```
|
||||
Cannot start from <phase>: missing artifact <name>. Run the prior phase first.
|
||||
```
|
||||
|
||||
When resuming, emit a `run.start` event with `{"resumed_from":"<phase>"}` in data.
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **Agent fails to return:** Wait up to 5 minutes. If no response, emit `agent.complete` with `"error": true`, log the failure, and abort the run. Do not retry blindly.
|
||||
- **Event emitter fails:** Log a warning but do not block orchestration. Events are observation, not control flow.
|
||||
- **Artifact write fails:** This IS blocking. Artifacts are required for phase handoff. Abort and report.
|
||||
- **Merge conflict:** Do not force-resolve. Report the conflict, leave the branch intact, let the user decide.
|
||||
|
||||
---
|
||||
|
||||
## Progress Display
|
||||
|
||||
Throughout the run, display live progress using the format from `archeflow:using-archeflow`:
|
||||
|
||||
```
|
||||
━━━ ArcheFlow Run: <task> ━━━━━━━━━━━━━━━━━━━
|
||||
Run ID: <run_id> | Workflow: <standard> | Cycle: 1/<max>
|
||||
|
||||
[Plan] Explorer researching... -> done (35s)
|
||||
[Plan] Creator designing proposal... -> done (25s, confidence: 0.8)
|
||||
[Do] Maker implementing... -> done (90s, 4 files, 8 tests)
|
||||
[Check] Guardian reviewing... -> APPROVED
|
||||
[Check] Skeptic challenging... -> APPROVED (1 INFO)
|
||||
[Check] Sage reviewing... -> APPROVED
|
||||
[Act] All approved — merging... -> merged to main
|
||||
|
||||
━━━ Complete: 3m 10s, 1 cycle ━━━━━━━━━━━━━━━
|
||||
Artifacts: .archeflow/artifacts/<run_id>/
|
||||
Report: .archeflow/events/<run_id>.jsonl
|
||||
```
|
||||
Reference in New Issue
Block a user