Compare commits
39 Commits
6bc5e48357
...
refactor/c
| Author | SHA1 | Date | |
|---|---|---|---|
| af1f4e7da7 | |||
| 55a6ba14c9 | |||
| da13dfba85 | |||
| e19ff0acc3 | |||
| 1bf1376a80 | |||
| 6309614bfa | |||
| aebf55a9a7 | |||
| b72eed3157 | |||
| 35c9f8269b | |||
| 6854e858a4 | |||
| 44f0896e3c | |||
| cfd3267272 | |||
| 29762a8464 | |||
| a6dcd2c956 | |||
| 516fe11710 | |||
| f10e853d8e | |||
| eabf13b9b0 | |||
| 9b2b4b3527 | |||
| 6cb7dad600 | |||
| 57e95ba151 | |||
| 4e20dc277c | |||
| 3c7d336c93 | |||
| 12575b5a47 | |||
| 362fb9ada9 | |||
| c3f5df8161 | |||
| c5174e88eb | |||
| 5e2117c9be | |||
| 30ddc6a2c4 | |||
| e09538e5e0 | |||
| 92b56e714b | |||
| 008315b0c4 | |||
| d9ec148bb3 | |||
| f2b886880a | |||
| dd82944529 | |||
| 8af9db2c12 | |||
| 7f99d52a09 | |||
| 34f101c166 | |||
| 960aba5faa | |||
| 2247e52ae4 |
@@ -1,7 +1,10 @@
|
||||
# ArcheFlow Configuration
|
||||
# Copy to your project's .archeflow/config.yaml and customize
|
||||
|
||||
version: "0.3.0"
|
||||
version: "0.7.0"
|
||||
|
||||
# Strategy — execution shape: pdca (cyclic), pipeline (linear), auto (task-based selection)
|
||||
strategy: auto
|
||||
|
||||
# Budget
|
||||
costs:
|
||||
@@ -26,7 +29,54 @@ memory:
|
||||
max_lessons: 10
|
||||
decay_after_runs: 10
|
||||
|
||||
# Models — default and per-archetype/per-workflow model selection.
|
||||
# ArcheFlow reads this to assign models to agents. The default applies unless overridden.
|
||||
models:
|
||||
default: sonnet
|
||||
# Per-archetype overrides (uncomment to customize):
|
||||
# archetypes:
|
||||
# explorer: haiku # Cheap model for research/exploration
|
||||
# creator: sonnet # Creative tasks need stronger model
|
||||
# maker: sonnet # Implementation needs full capability
|
||||
# guardian: sonnet # Security review — don't skimp
|
||||
# skeptic: haiku # Assumption checking is analytical
|
||||
# sage: haiku # Quality review can use cheaper model
|
||||
# trickster: sonnet # Adversarial testing benefits from stronger model
|
||||
# Per-workflow overrides (uncomment to customize):
|
||||
# workflows:
|
||||
# fast:
|
||||
# default: haiku # Fast workflow uses cheaper models by default
|
||||
# archetypes:
|
||||
# guardian: sonnet # Except Guardian — always needs strong model
|
||||
# standard:
|
||||
# default: sonnet
|
||||
# thorough:
|
||||
# default: sonnet
|
||||
|
||||
# Progress
|
||||
progress:
|
||||
enabled: true
|
||||
file: .archeflow/progress.md
|
||||
|
||||
# Hooks — commands to run at orchestration lifecycle events.
|
||||
# Uncomment and customize as needed.
|
||||
#
|
||||
# hooks:
|
||||
# run-start:
|
||||
# command: "echo 'ArcheFlow run starting'"
|
||||
# fail_action: warn # warn | abort
|
||||
# phase-complete:
|
||||
# command: "./scripts/on-phase-complete.sh"
|
||||
# fail_action: warn
|
||||
# agent-complete:
|
||||
# command: "./scripts/on-agent-complete.sh"
|
||||
# fail_action: warn
|
||||
# pre-merge:
|
||||
# command: "./scripts/pre-merge-checks.sh"
|
||||
# fail_action: abort # abort recommended — blocks bad merges
|
||||
# post-merge:
|
||||
# command: "./scripts/post-merge-notify.sh"
|
||||
# fail_action: warn
|
||||
# run-complete:
|
||||
# command: "./scripts/on-run-complete.sh"
|
||||
# fail_action: warn
|
||||
|
||||
16
.claude-plugin/marketplace.json
Normal file
16
.claude-plugin/marketplace.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "claude-archeflow-plugin",
|
||||
"description": "ArcheFlow plugin marketplace",
|
||||
"plugins": [
|
||||
{
|
||||
"name": "archeflow",
|
||||
"description": "Multi-agent orchestration with Jungian archetypes. PDCA quality cycles, shadow detection, git worktree isolation.",
|
||||
"version": "0.3.0",
|
||||
"path": ".",
|
||||
"keywords": [
|
||||
"orchestration", "multi-agent", "archetypes", "pdca",
|
||||
"code-review", "quality", "worktrees", "shadow-detection"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "archeflow",
|
||||
"description": "Multi-agent orchestration with Jungian archetypes. PDCA quality cycles, shadow detection, git worktree isolation. Zero dependencies — works with any Claude Code session.",
|
||||
"version": "0.3.0",
|
||||
"version": "0.7.0",
|
||||
"author": {
|
||||
"name": "Chris Nennemann"
|
||||
},
|
||||
@@ -12,5 +12,14 @@
|
||||
"orchestration", "multi-agent", "archetypes", "pdca",
|
||||
"code-review", "quality", "worktrees", "jungian",
|
||||
"shadow-detection", "workflows"
|
||||
]
|
||||
],
|
||||
"skills": [
|
||||
"run", "orchestration", "plan-phase", "do-phase", "check-phase", "act-phase",
|
||||
"shadow-detection", "convergence", "artifact-routing",
|
||||
"process-log", "memory", "effectiveness", "progress",
|
||||
"colette-bridge", "git-integration", "multi-project",
|
||||
"custom-archetypes", "workflow-design", "domains", "cost-tracking",
|
||||
"templates", "autonomous-mode", "using-archeflow", "presence"
|
||||
],
|
||||
"hooks": "hooks/hooks.json"
|
||||
}
|
||||
|
||||
43
CHANGELOG.md
43
CHANGELOG.md
@@ -2,6 +2,49 @@
|
||||
|
||||
All notable changes to ArcheFlow are documented in this file.
|
||||
|
||||
## [0.7.0] -- 2026-04-04
|
||||
|
||||
### Added
|
||||
- Context isolation protocol in attention-filters skill and all 7 agent personas — agents receive only orchestrator-constructed context, no session bleed or cross-agent contamination
|
||||
- Structured status tokens (`STATUS: DONE`, `DONE_WITH_CONCERNS`, `NEEDS_CONTEXT`, `BLOCKED`) for all agents with orchestrator parsing protocol in run skill
|
||||
- Evidence-gated verification in check-phase — CRITICAL/WARNING findings require concrete evidence (command output, code citations, reproduction steps); banned speculative phrases auto-downgrade to INFO
|
||||
- Plan granularity constraint in plan-phase and Creator — each change item must be a 2-5 minute task with exact file path, code block, and verify command
|
||||
- Strategy abstraction with `pdca` (cyclic) and `pipeline` (linear) execution strategies, auto-selection by task type, and pipeline execution flow in run skill
|
||||
- Experimental status and interdisciplinary framing in README
|
||||
|
||||
## [0.6.0] -- 2026-04-04
|
||||
|
||||
### Added
|
||||
- Expanded attention-filters skill with prompt templates, token budgets, cycle-back filtering, and verification checklist
|
||||
- Explorer skip heuristic in plan-phase with decision table for when to skip/require research
|
||||
- Runnable quickstart example (`examples/runnable-quickstart.md`)
|
||||
|
||||
### Fixed
|
||||
- Normalized agent persona frontmatter: added examples, moved isolation note to Rules, documented model choices
|
||||
|
||||
## [0.5.0] -- 2026-04-04
|
||||
|
||||
### Added
|
||||
- Lib script validation at run initialization — fail fast if required scripts or `jq` are missing
|
||||
- Hook points documentation with 6 lifecycle events (run-start, phase-complete, agent-complete, pre-merge, post-merge, run-complete) and config template
|
||||
- Phase rollback support in `archeflow-rollback.sh` via `--to <phase>` flag
|
||||
- Per-workflow model assignment configuration with fallback chain (per-workflow per-archetype > per-workflow default > per-archetype > global default)
|
||||
- Cross-run finding regression detection in `archeflow-memory.sh` — compares current findings against previously resolved fixes
|
||||
- Check-phase parallel reviewer spawning protocol with Guardian-first sequence, A2 fast-path evaluation, timeout handling, and re-check protocol
|
||||
|
||||
## [0.4.0] -- 2026-04-04
|
||||
|
||||
### Added
|
||||
- Confidence gate parsing with bash snippets for extracting scores from `plan-creator.md`
|
||||
- Mini-Explorer spawning when risk coverage < 0.5
|
||||
- Worktree merge flow with explicit pre-merge hooks and post-merge test validation
|
||||
- `archeflow-rollback.sh` for post-merge test failure auto-revert
|
||||
- Test-first validation gate in Do phase
|
||||
- Memory injection audit trail with `--audit` flag and `audit-check` command
|
||||
|
||||
### Fixed
|
||||
- Unified feedback routing tables across orchestration, act-phase, artifact-routing
|
||||
|
||||
## [0.3.0] -- 2026-04-03
|
||||
|
||||
### Added
|
||||
|
||||
71
CLAUDE.md
Normal file
71
CLAUDE.md
Normal file
@@ -0,0 +1,71 @@
|
||||
# archeflow — Multi-Agent Orchestration Plugin for Claude Code
|
||||
|
||||
Workspace-level orchestration: parallel agent teams across project portfolios, PDCA cycles with Jungian archetype roles, sprint runner, and post-implementation review. Installed as a Claude Code plugin.
|
||||
|
||||
## Tech Stack
|
||||
|
||||
- **Runtime:** Bash (lib scripts) + Claude Code skill system (Markdown skills)
|
||||
- **No build step, no dependencies** — pure bash + markdown
|
||||
- **Plugin format:** Claude Code plugin (skills/, hooks/, agents/, templates/)
|
||||
|
||||
## Key Commands
|
||||
|
||||
```bash
|
||||
# Use via Claude Code slash commands:
|
||||
/af-sprint # Main mode: work the queue across projects
|
||||
/af-run <task> # Deep orchestration with PDCA cycles
|
||||
/af-review # Post-implementation security/quality review
|
||||
/af-status # Current run status
|
||||
/af-init # Initialize ArcheFlow in a project
|
||||
/af-score # Archetype effectiveness scores
|
||||
/af-memory # Cross-run lesson memory
|
||||
/af-report # Full process report
|
||||
/af-fanout # Colette book fanout via agents
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
skills/ Slash command implementations (one dir per skill)
|
||||
sprint/ /af-sprint — queue-driven parallel agent runner
|
||||
run/ /af-run — PDCA orchestration
|
||||
review/ /af-review — Guardian-led code review
|
||||
plan-phase/ PDCA Plan phase
|
||||
do-phase/ PDCA Do phase
|
||||
check-phase/ PDCA Check phase
|
||||
act-phase/ PDCA Act phase
|
||||
memory/ Cross-run lessons learned
|
||||
cost-tracking/ Token/cost awareness
|
||||
domains/ Domain detection (code, writing, research)
|
||||
... ~25 skill directories
|
||||
hooks/
|
||||
hooks.json Hook definitions
|
||||
session-start/ Auto-activation on session start
|
||||
agents/ Archetype agent definitions
|
||||
explorer.md Divergent thinking, research
|
||||
creator.md Design, architecture
|
||||
maker.md Implementation
|
||||
guardian.md Security, risk, quality gates
|
||||
sage.md Wisdom, patterns, trade-offs
|
||||
skeptic.md Devil's advocate
|
||||
trickster.md Edge cases, unconventional approaches
|
||||
lib/ Bash helper scripts (git, DAG, events, progress, etc.)
|
||||
templates/bundles/ Pre-configured workflow bundles
|
||||
docs/ Roadmap, dogfood notes, test reports
|
||||
```
|
||||
|
||||
## Domain Rules
|
||||
|
||||
- Skills are Markdown files with frontmatter — follow existing skill format exactly
|
||||
- Agents are archetype personas — maintain their distinct voice and perspective
|
||||
- Dogfood observations go to `archeflow/.archeflow/memory/lessons.jsonl`
|
||||
- Cost tracking: prefer cheap models for bulk ops, expensive for creative/review
|
||||
- PDCA cycle order is mandatory: Plan -> Do -> Check -> Act
|
||||
|
||||
## Do NOT
|
||||
|
||||
- Add runtime dependencies — this must stay zero-dependency
|
||||
- Change archetype personalities without updating all referencing skills
|
||||
- Skip the Check phase in PDCA cycles (quality gate)
|
||||
- Modify hooks.json format without testing plugin reload
|
||||
- Use ArcheFlow to orchestrate simple single-file tasks (overhead not justified)
|
||||
136
README.md
136
README.md
@@ -1,73 +1,135 @@
|
||||
# ArcheFlow -- Multi-Agent Orchestration for Claude Code
|
||||
# ArcheFlow -- Workspace Orchestration for Claude Code
|
||||
|
||||
**Structured quality through archetypal collaboration.** ArcheFlow coordinates multiple Claude Code agents through PDCA cycles, where each agent embodies a Jungian archetype with defined strengths and known failure modes.
|
||||
**Run parallel agent teams across your entire project portfolio.** ArcheFlow reads a task queue, spawns agents across multiple projects simultaneously, collects results, commits, and keeps going. Built for developers managing 10-30 repos who want throughput, not ceremony.
|
||||
|
||||
Zero dependencies. No build step. Install and go.
|
||||
|
||||
> **Status: Experimental.** ArcheFlow is a research prototype exploring the intersection of
|
||||
> analytical psychology (Jungian archetypes), process engineering (PDCA cycles), and
|
||||
> multi-agent software engineering. It is functional and actively developed, but not production-ready.
|
||||
> APIs, skill formats, and orchestration behavior may change between versions.
|
||||
|
||||
## What It Does
|
||||
|
||||
Large coding tasks benefit from multiple perspectives, but "just spawn more agents" creates chaos. Agents duplicate work, miss each other's output, argue in circles, or go rogue. The problem is not intelligence -- it is coordination.
|
||||
ArcheFlow solves three problems:
|
||||
|
||||
ArcheFlow solves this by giving each agent an *archetype*: a behavioral protocol that defines what the agent cares about, what context it receives, and how its output feeds into the next phase. Seven archetypes collaborate through **Plan-Do-Check-Act cycles**, where each iteration builds on structured feedback from the last. No unreviewed code reaches your main branch.
|
||||
**1. Workspace Sprint Runner** (`/af-sprint`) -- The primary mode. Reads your task queue, picks the highest-priority items across different projects, spawns 3-5 agents in parallel, collects results, commits+pushes, and immediately starts the next batch. Turns a 25-item backlog into done work while you watch (or don't).
|
||||
|
||||
The key insight: archetypes are not just system prompts. Each one has a **virtue** (its unique contribution) and a **shadow** (the dysfunction it falls into when pushed too far). ArcheFlow monitors for shadow activation and course-corrects automatically -- replacing an agent that blocks everything, reining in one that researches forever, or escalating when a maker goes off-script.
|
||||
**2. Post-Implementation Review** (`/af-review`) -- Run security and quality review on any diff, branch, or commit range. No planning, no implementation orchestration -- just Guardian analysis of what could go wrong. The highest-ROI mode for catching design-level bugs that linters miss.
|
||||
|
||||
**3. Deep Orchestration** (`/af-run`) -- For complex tasks that need structured exploration, design, implementation, and multi-perspective review. Uses archetypal roles (Explorer, Creator, Maker, Guardian) through PDCA cycles. Best for security-sensitive changes, multi-module refactors, and creative writing.
|
||||
|
||||
### When to use what
|
||||
|
||||
| Situation | Command | Why |
|
||||
|-----------|---------|-----|
|
||||
| Work the backlog | `/af-sprint` | Parallel agents, maximum throughput |
|
||||
| Review before merging | `/af-review` | Catch design bugs, not style nits |
|
||||
| Complex feature (L/XL) | `/af-run` or `feature-dev` | Structured exploration + review |
|
||||
| Simple fix (S/M) | Just do it | No orchestration overhead needed |
|
||||
| Creative writing | `/af-run --domain writing` | Archetypes shine here -- no linters exist for prose |
|
||||
|
||||
### What ArcheFlow is NOT
|
||||
|
||||
ArcheFlow is not a feature development tool. For single-feature implementation with user interaction at every step (clarify requirements, choose architecture, review), use Claude Code's `feature-dev` plugin or work directly. ArcheFlow adds value through **parallel execution across projects** and **domain-specific quality review** (writing, research), not by competing with single-task development tools.
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Install
|
||||
|
||||
```bash
|
||||
# From Git
|
||||
claude plugin install --url https://git.xorwell.de/c/claude-archeflow-plugin
|
||||
**From the marketplace** (recommended):
|
||||
|
||||
# Local development
|
||||
```bash
|
||||
# Add the marketplace (one time)
|
||||
/plugin marketplace add https://git.xorwell.de/c/claude-archeflow-plugin
|
||||
|
||||
# Install the plugin
|
||||
/plugin install archeflow@claude-archeflow-plugin
|
||||
```
|
||||
|
||||
**From Git URL directly:**
|
||||
|
||||
```bash
|
||||
/plugin marketplace add https://git.xorwell.de/c/claude-archeflow-plugin.git
|
||||
/plugin install archeflow --scope user
|
||||
```
|
||||
|
||||
**Local development:**
|
||||
|
||||
```bash
|
||||
claude --plugin-dir ./archeflow
|
||||
```
|
||||
|
||||
### 2. Run your first orchestration
|
||||
After installing, run `/reload-plugins` or restart Claude Code. ArcheFlow activates automatically on session start.
|
||||
|
||||
Just describe a task. ArcheFlow activates automatically for multi-file changes:
|
||||
#### Verify installation
|
||||
|
||||
```
|
||||
> Add input validation to all API endpoints
|
||||
/plugin # Opens plugin manager — check "Installed" tab
|
||||
/af-status # Should show "no active run"
|
||||
```
|
||||
|
||||
Or invoke it explicitly:
|
||||
#### Scopes
|
||||
|
||||
- `--scope user` — available in all your projects (recommended)
|
||||
- `--scope project` — only in the current project
|
||||
- `--scope local` — only in the current directory
|
||||
|
||||
### 2. Run your first sprint
|
||||
|
||||
```
|
||||
> archeflow:run "Add JWT authentication" --workflow standard
|
||||
> /af-sprint
|
||||
```
|
||||
|
||||
### 3. What happens
|
||||
|
||||
ArcheFlow selects a workflow (fast, standard, or thorough) and runs a PDCA cycle:
|
||||
ArcheFlow reads your task queue (`docs/orchestra/queue.json`), picks the highest-priority items, and spawns parallel agents:
|
||||
|
||||
```
|
||||
Plan --> Explorer researches codebase context, Creator designs a proposal
|
||||
Do --> Maker implements in an isolated git worktree
|
||||
Check --> Reviewers assess in parallel (Guardian, Skeptic, Sage, Trickster)
|
||||
Act --> All approved? Merge. Issues found? Cycle back with structured feedback.
|
||||
── af-sprint: Batch 1 ──────────────────────────
|
||||
🔸 writing.colette config parser expansion [P2, M] running
|
||||
🔸 product.jobradar search API endpoint [P3, M] running
|
||||
🔸 tool.git-alm SVG export + minimap [P3, M] running
|
||||
🔸 product.game-factory completion tracking [P3, S] running
|
||||
────────────────────────────────────────────────
|
||||
|
||||
Each cycle catches what the last one missed.
|
||||
[5 min later]
|
||||
|
||||
── Batch 1 complete ────────────────────────────
|
||||
✓ writing.colette config parser done (3m24s)
|
||||
✓ product.jobradar search API done (5m01s)
|
||||
✓ tool.git-alm SVG export done (4m30s)
|
||||
✓ product.game-factory tracking done (2m15s)
|
||||
|
||||
4 tasks · 4 projects · all committed + pushed
|
||||
Next batch: 2 items ready → dispatching...
|
||||
────────────────────────────────────────────────
|
||||
```
|
||||
|
||||
Progress is visible in real time:
|
||||
### 3. Review before merging
|
||||
|
||||
```
|
||||
--- ArcheFlow: Add JWT authentication ---------
|
||||
Workflow: standard (2 cycles max)
|
||||
|
||||
🔍 [Plan] Explorer researching... done (35s)
|
||||
🏗️ [Plan] Creator designing proposal... done (25s, confidence: 0.8)
|
||||
⚒️ [Do] Maker implementing... done (90s, 4 files, 8 tests)
|
||||
🛡️ [Check] Guardian reviewing... APPROVED
|
||||
🤔 [Check] Skeptic challenging... APPROVED (1 INFO)
|
||||
📚 [Check] Sage reviewing... APPROVED
|
||||
[Act] All approved -- merging... merged to main
|
||||
|
||||
--- Complete: 3m 10s, 1 cycle -----------------
|
||||
> /af-review --branch feat/batch-api
|
||||
```
|
||||
|
||||
Guardian analyzes the diff for error handling gaps, security issues, and data loss scenarios:
|
||||
|
||||
```
|
||||
── af-review: writing.colette ─────────────────
|
||||
🛡️ Guardian: 2 findings (1 HIGH, 1 MEDIUM)
|
||||
[HIGH] Timeout marks variant as done — loses batch state (fanout.py:552)
|
||||
[MEDIUM] No JSON error handling on corrupted state (batch.py:310)
|
||||
────────────────────────────────────────────────
|
||||
```
|
||||
|
||||
### 4. Deep orchestration (when needed)
|
||||
|
||||
For complex, security-sensitive, or creative tasks:
|
||||
|
||||
```
|
||||
> /af-run "Add JWT authentication" --workflow standard
|
||||
```
|
||||
|
||||
This runs the full PDCA cycle with archetypal roles. See "Deep Orchestration" below for details.
|
||||
|
||||
## The Seven Archetypes
|
||||
|
||||
| Archetype | Phase | Virtue | Shadow | Role |
|
||||
@@ -102,7 +164,6 @@ ArcheFlow ships with 24 skills organized by function.
|
||||
| Skill | Description |
|
||||
|-------|-------------|
|
||||
| `archeflow:shadow-detection` | Quantitative dysfunction detection and automatic correction |
|
||||
| `archeflow:attention-filters` | Context optimization per archetype -- each agent gets only what it needs |
|
||||
| `archeflow:convergence` | Detects convergence, stalling, and oscillation in multi-cycle runs |
|
||||
| `archeflow:artifact-routing` | Inter-phase artifact protocol -- naming, storage, routing, archiving |
|
||||
|
||||
@@ -279,7 +340,7 @@ archetypes: [explorer, creator, maker, guardian, db-specialist]
|
||||
|
||||
```
|
||||
archeflow/
|
||||
├── .claude-plugin/plugin.json # Plugin manifest (v0.3.0)
|
||||
├── .claude-plugin/plugin.json # Plugin manifest (v0.5.0)
|
||||
├── agents/ # 7 archetype personas (behavioral protocols)
|
||||
│ ├── explorer.md # Plan: research and context mapping
|
||||
│ ├── creator.md # Plan: solution design and proposals
|
||||
@@ -296,7 +357,6 @@ archeflow/
|
||||
│ ├── check-phase/ # Check protocols
|
||||
│ ├── act-phase/ # Act phase decision logic
|
||||
│ ├── shadow-detection/ # Dysfunction detection
|
||||
│ ├── attention-filters/ # Context optimization
|
||||
│ ├── convergence/ # Cycle convergence detection
|
||||
│ ├── artifact-routing/ # Inter-phase artifact protocol
|
||||
│ ├── process-log/ # Event-sourced JSONL logging
|
||||
|
||||
@@ -46,8 +46,16 @@ For the full output format (including Mini-Reflect, Alternatives Considered, and
|
||||
| <option B> | <reason> |
|
||||
|
||||
### Changes
|
||||
1. **`path/file.ext`** — What changes and why
|
||||
1. **`path/file.ext:line`** — What changes and why
|
||||
```language
|
||||
<target code state>
|
||||
```
|
||||
**Verify:** `<command to confirm correctness>`
|
||||
2. **`path/test.ext`** — What tests to add
|
||||
```language
|
||||
<test code>
|
||||
```
|
||||
**Verify:** `<test command>`
|
||||
|
||||
### Test Strategy
|
||||
- <specific test cases>
|
||||
@@ -67,11 +75,24 @@ For the full output format (including Mini-Reflect, Alternatives Considered, and
|
||||
```
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- Be decisive. One proposal, not three alternatives (but list alternatives you rejected).
|
||||
- Name every file. The Maker needs exact paths.
|
||||
- Scope ruthlessly. Adjacent problems go under "Not Doing."
|
||||
- Include test strategy. No proposal is complete without it.
|
||||
- **Granularity:** Each change item must be a 2-5 minute task with exact file path, code block showing the target state, and a verify command. If an item would take >5 minutes, split it. If a non-trivial task has <2 items, you under-specified.
|
||||
- Any Confidence axis < 0.5? Flag it — the orchestrator may pause or escalate.
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — proposal ready with confidence scores
|
||||
- `STATUS: DONE_WITH_CONCERNS` — proposal ready but low confidence on one or more axes
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Over-Architect
|
||||
You design for a space shuttle when the task needs a bicycle. Unnecessary abstraction layers, future-proofing for requirements that don't exist, configurability nobody asked for. If the proposal has more infrastructure than business logic — simplify. Design for the current order of magnitude, not 100x.
|
||||
|
||||
@@ -4,7 +4,7 @@ description: |
|
||||
Spawn as the Explorer archetype for the Plan phase — researches codebase context, maps dependencies, identifies patterns, and synthesizes findings.
|
||||
<example>User: "Research the auth module before we redesign it"</example>
|
||||
<example>Part of ArcheFlow Plan phase</example>
|
||||
model: haiku
|
||||
model: haiku # Cost optimization: research/exploration is analytical, cheaper model suffices
|
||||
---
|
||||
|
||||
You are the **Explorer** archetype 🔍. You gather context so the team can make informed decisions.
|
||||
@@ -45,9 +45,21 @@ You see the landscape before anyone acts. You map dependencies, spot existing pa
|
||||
```
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- Synthesize, don't dump. Raw file lists are useless.
|
||||
- Stay focused on the task. Interesting tangents go in a "See Also" footnote, not the main report.
|
||||
- Cap your research at 15 files. If you need more, the task is too broad.
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — research complete, findings ready
|
||||
- `STATUS: DONE_WITH_CONCERNS` — research complete but gaps remain (noted in output)
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Rabbit Hole
|
||||
Your curiosity becomes compulsive investigation. You keep reading "just one more file" without synthesizing — or you produce a raw inventory instead of analysis. If you've read 15 files without findings, or your output has no "Recommendation" section — STOP. Synthesize what you have. A dump is not research. Good-enough now beats perfect never.
|
||||
|
||||
@@ -36,9 +36,22 @@ You see attack surfaces others walk past. You calibrate your response to actual
|
||||
- **INFO** — Minor hardening opportunity.
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- APPROVED = zero CRITICAL findings
|
||||
- Every finding needs a suggested fix, not just a complaint
|
||||
- **Evidence required:** Every CRITICAL or WARNING must cite a specific command output, exit code, or exact code with file path and line numbers. Findings without evidence are downgraded to INFO by the orchestrator.
|
||||
- Be rigorous but practical — flag real risks, not science fiction
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — review complete, verdict and findings ready
|
||||
- `STATUS: DONE_WITH_CONCERNS` — review complete but some areas could not be fully assessed
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Paranoid
|
||||
Your risk awareness becomes blocking everything. Every finding is CRITICAL, every risk is existential, and you reject without suggesting how to fix it. Ask: "Would a senior engineer block this PR for this?" If no, downgrade. Every rejection MUST include a specific fix — if you can't suggest one, you don't understand the problem well enough to reject.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
name: maker
|
||||
description: |
|
||||
Spawn as the Maker archetype for the Do phase — implements code from the Creator's proposal in an isolated git worktree. Always use with isolation: "worktree".
|
||||
Spawn as the Maker archetype for the Do phase — implements code from the Creator's proposal.
|
||||
<example>Part of ArcheFlow Do phase</example>
|
||||
model: inherit
|
||||
---
|
||||
@@ -45,6 +45,8 @@ You turn plans into working, tested, committed code. Small steps, steady progres
|
||||
```
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- **Isolation:** Always spawn with `isolation: "worktree"` to work in a dedicated git worktree.
|
||||
- Follow the proposal. Don't redesign.
|
||||
- Tests before implementation. Always.
|
||||
- Commit after each logical step. Not one big commit at the end.
|
||||
@@ -52,5 +54,16 @@ You turn plans into working, tested, committed code. Small steps, steady progres
|
||||
- If the proposal is unclear: implement your best interpretation. Note what you assumed.
|
||||
- If you find a blocker: document it and stop. Don't silently work around it.
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — implementation complete, all commits made
|
||||
- `STATUS: DONE_WITH_CONCERNS` — implementation complete but assumptions were made (noted in output)
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Rogue
|
||||
Your bias for action becomes reckless shipping. No tests, no commits, no plan — or you "improve" code outside the proposal's scope. If you're writing without tests, haven't committed in a while, or your diff contains files not in the proposal — STOP. Read the proposal. Write a test. Commit. Revert extras.
|
||||
|
||||
@@ -46,10 +46,23 @@ You see the forest, not just the trees. "Will a new team member understand this
|
||||
- Are existing docs/comments still accurate after the change?
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- APPROVED = code is readable, tested, consistent, and complete
|
||||
- REJECTED = significant quality issues that affect maintainability
|
||||
- **Evidence required:** Quality findings must cite specific code (file:line, exact construct) or measurable criteria. Do not raise vague suggestions — if you cannot point to the code, do not raise the finding.
|
||||
- Focus on the next 6 months. Not the next 6 years.
|
||||
- Your review should be shorter than the code change. If it's not, you're over-reviewing.
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — review complete, verdict and findings ready
|
||||
- `STATUS: DONE_WITH_CONCERNS` — review complete but some quality dimensions could not be assessed
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Bureaucrat
|
||||
Your thoroughness becomes bloat. Your review is longer than the code change, you're suggesting improvements to untouched code, or producing deep-sounding analysis without actionable findings. If you can't state the consequence of NOT fixing it, don't raise it. If a finding doesn't end with a specific action, delete it. Insight without action is noise.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
name: skeptic
|
||||
description: |
|
||||
Spawn as the Skeptic archetype for the Check phase — challenges assumptions, identifies untested scenarios, and proposes alternatives the team hasn't considered.
|
||||
<example>User: "Challenge the assumptions in this proposal"</example>
|
||||
<example>Part of ArcheFlow Check phase</example>
|
||||
model: inherit
|
||||
---
|
||||
@@ -32,11 +33,24 @@ You make the implicit explicit. "The plan assumes X — but does X actually hold
|
||||
```
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- Every challenge MUST include an alternative. "This might not work" alone is not helpful.
|
||||
- Limit to 3-5 challenges. More than 7 is shadow behavior.
|
||||
- **Evidence required:** Every challenge must reference specific code (file:line) or describe a concrete scenario with reproduction steps. Vague concerns without evidence are downgraded to INFO by the orchestrator.
|
||||
- Stay in scope. Challenge the task's assumptions, not the universe's.
|
||||
- APPROVED = no fundamental design flaws
|
||||
- REJECTED = the approach is wrong, and you have a better one
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — review complete, verdict and findings ready
|
||||
- `STATUS: DONE_WITH_CONCERNS` — review complete but some assumptions could not be verified
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: Paralytic
|
||||
Your critical thinking becomes inability to approve anything. You list 7+ challenges, chain "what about X?" tangents, or question things outside the task — each plausible alone, none actionable together. STOP. Rank by impact. Keep top 3. Each must include an alternative. Delete the rest.
|
||||
|
||||
@@ -4,7 +4,7 @@ description: |
|
||||
Spawn as the Trickster archetype for the Check phase (thorough workflow only) — adversarial testing, boundary attacks, edge case exploitation, and chaos engineering.
|
||||
<example>User: "Try to break the new input handler"</example>
|
||||
<example>Part of ArcheFlow thorough Check phase</example>
|
||||
model: haiku
|
||||
model: haiku # Cost optimization: adversarial testing is pattern-matching, cheaper model suffices
|
||||
---
|
||||
|
||||
You are the **Trickster** archetype 🃏. You break things so users don't have to.
|
||||
@@ -39,10 +39,22 @@ You think like an attacker, a clumsy user, a failing network. You find the edges
|
||||
```
|
||||
|
||||
## Rules
|
||||
- **Context isolation:** You receive only what the orchestrator provides. Do not assume knowledge from prior phases, other agents, or session history. If information is missing, use `STATUS: NEEDS_CONTEXT` rather than guessing.
|
||||
- Test ONLY the changed code, not the entire system
|
||||
- Every finding needs exact reproduction steps
|
||||
- If you can't break it after 5 serious attempts — APPROVED. The code is resilient.
|
||||
- Constructive chaos only. Your goal is quality, not destruction.
|
||||
|
||||
## Status Token
|
||||
|
||||
End your output with exactly one status line:
|
||||
|
||||
- `STATUS: DONE` — review complete, verdict and findings ready
|
||||
- `STATUS: DONE_WITH_CONCERNS` — testing complete but some attack vectors could not be exercised
|
||||
- `STATUS: NEEDS_CONTEXT` — cannot proceed without additional information (describe what is missing)
|
||||
- `STATUS: BLOCKED` — unresolvable obstacle (describe it)
|
||||
|
||||
This line MUST be the last non-empty line of your output.
|
||||
|
||||
## Shadow: False Alarm
|
||||
You flood with low-signal findings. Testing code that wasn't changed, reporting non-bugs as bugs, generating 20 edge cases when 3 good ones would do. If your findings reference files not in the Maker's diff — delete them. Quality over quantity. Three real findings beat twenty noise.
|
||||
|
||||
181
docs/dogfood-2026-04-04-batch.md
Normal file
181
docs/dogfood-2026-04-04-batch.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# ArcheFlow Dogfood Report #2: Batch API Integration
|
||||
|
||||
Date: 2026-04-04
|
||||
Task: Wire Anthropic Batch API into Colette's fanout pipeline with CLI commands and state persistence
|
||||
Project: writing.colette (Python, 27 modules, 457 tests)
|
||||
Complexity: High — 4 files, async API, state persistence, error recovery, CLI commands
|
||||
|
||||
## Experimental Setup
|
||||
|
||||
Same task, same starting commit, two conditions:
|
||||
1. **Baseline**: Plain Claude, no orchestration, single pass
|
||||
2. **ArcheFlow**: PDCA standard workflow (Maker + Guardian review)
|
||||
|
||||
No Explorer or Creator used this time — task scope was clear enough to skip planning and go directly to Maker + Guardian (effectively a fast workflow).
|
||||
|
||||
## Quantitative Comparison
|
||||
|
||||
| Metric | Baseline | ArcheFlow | Delta |
|
||||
|--------|----------|-----------|-------|
|
||||
| Lines added | 189 | 279 | +48% |
|
||||
| Files touched | 4 | 4 | same |
|
||||
| Time | ~5 min | ~12 min | +140% |
|
||||
| Commits | 1 | 4 | cleaner history |
|
||||
| Tests written | 1 | 2 | +1 |
|
||||
| Tests passing | 13/13 | 14/14 | +1 |
|
||||
| Bugs introduced | 0 | 1 | worse |
|
||||
| Bugs caught by review | 0 | 5 | better |
|
||||
| **Real bugs in final code** | **1** | **0** (after fix) | **ArcheFlow wins** |
|
||||
|
||||
## Bug Analysis
|
||||
|
||||
### Bugs found only by Guardian (not present in baseline)
|
||||
|
||||
| # | Bug | Severity | Impact |
|
||||
|---|-----|----------|--------|
|
||||
| 3 | `hash()` non-deterministic across processes for chapter index mapping | HIGH | Data loss on resume — chapters mapped to wrong files |
|
||||
|
||||
This bug was **introduced by ArcheFlow's Maker** and caught by the Guardian. Baseline used `enumerate(i)` and avoided it entirely. Net: zero value.
|
||||
|
||||
### Bugs present in BOTH versions, caught only by Guardian
|
||||
|
||||
| # | Bug | Severity | Impact |
|
||||
|---|-----|----------|--------|
|
||||
| 4 | Timeout marks variant as "done" — permanently loses batch state | HIGH | Silent data loss — timed-out batches can never be resumed |
|
||||
|
||||
This is the **key finding**. Both implementations had this design-level bug. Only ArcheFlow's Guardian caught it. Plain Claude missed it because there was no review step.
|
||||
|
||||
### Bugs in both, not caught by either initially
|
||||
|
||||
| # | Bug | Severity | Impact |
|
||||
|---|-----|----------|--------|
|
||||
| 1 | API key resolution inconsistency (env vs config) | CRITICAL | Wrong key used under mixed-key environments |
|
||||
| 5 | No JSON error handling on corrupted state files | HIGH | Crash on truncated state file |
|
||||
|
||||
Guardian flagged these. Baseline would have shipped them silently.
|
||||
|
||||
## Qualitative Observations
|
||||
|
||||
### Where Guardian added real value
|
||||
|
||||
1. **Error path analysis**: Guardian systematically checked "what happens when X fails?" for timeout, cancellation, corruption, and cross-process resume. Plain Claude focused on the happy path.
|
||||
2. **Cross-process state**: The `hash()` non-determinism finding required reasoning about Python's hash randomization across interpreter invocations — a subtle runtime property that isn't visible from reading the code in isolation.
|
||||
3. **Data loss scenarios**: Finding #4 (timeout → "done" → lost forever) requires understanding the interaction between `wait_and_retrieve`'s timeout branch and the caller's unconditional status assignment. This is a 2-module interaction that single-pass implementation doesn't systematically check.
|
||||
|
||||
### Where Guardian added noise
|
||||
|
||||
1. **Finding #2 (batch_id validation)**: Technically valid but the Anthropic SDK already rejects malformed IDs. Low practical risk.
|
||||
2. **Finding #1 (API key source)**: Valid but matches existing patterns throughout the codebase — flagging it here without flagging it elsewhere is inconsistent.
|
||||
|
||||
### The Maker problem
|
||||
|
||||
The ArcheFlow Maker introduced a bug (hash-based indexing) that the baseline avoided. This happened because:
|
||||
- The Maker was working from a task description, not reading the existing sequential rewrite code as closely
|
||||
- The Creator's plan (when used in dogfood #1) over-specified some things and under-specified others
|
||||
- Working through an intermediary (plan → implementation) introduces information loss
|
||||
|
||||
This is a structural weakness of the PDCA model: the Plan-to-Do handoff can corrupt information.
|
||||
|
||||
## Conclusions
|
||||
|
||||
### Complexity threshold confirmed
|
||||
|
||||
| Task type | Orchestration value |
|
||||
|-----------|-------------------|
|
||||
| Simple (pattern-following, single file) | **Negative** — adds cost, Maker introduces bugs |
|
||||
| Medium (multi-file feature, clear scope) | **Neutral** — extra code but similar outcome |
|
||||
| Complex (error handling, state, async, resume) | **Positive** — Guardian catches design-level bugs |
|
||||
|
||||
The differentiator is **error path coverage**. Guardian's systematic "what if this fails?" analysis catches bugs that single-pass implementation misses because implementers focus on making things work, not on making failures safe.
|
||||
|
||||
### The honest ROI question
|
||||
|
||||
For this task: Guardian caught 1 bug the baseline missed (timeout data loss). That bug would have caused real data loss in production when a batch times out. The cost was ~7 extra minutes and a Maker-introduced bug that had to be fixed.
|
||||
|
||||
Is preventing a production data loss bug worth 7 extra minutes? Yes. But only because this was a task where data loss was possible. For a pure UI change or a refactor with no persistence, the answer would be no.
|
||||
|
||||
---
|
||||
|
||||
## Improvement Hypotheses
|
||||
|
||||
Based on both dogfood runs, here are concrete hypotheses about how to improve ArcheFlow's value-to-cost ratio:
|
||||
|
||||
### H1: Guardian-Only Mode (skip Plan/Do orchestration)
|
||||
|
||||
**Observation**: In both dogfoods, the Maker produced equivalent-or-worse code than plain Claude. The value came entirely from the Guardian review.
|
||||
|
||||
**Hypothesis**: A "review-only" mode where the user implements normally and then runs ArcheFlow as a post-implementation review would capture the Guardian's value without the Maker's overhead.
|
||||
|
||||
**Test**: Implement the same task plain, then run `af-review` (Guardian + Skeptic on the diff). Compare bug catch rate to full PDCA.
|
||||
|
||||
**Expected outcome**: Same bug catch rate, ~60% less cost.
|
||||
|
||||
### H2: Pre-Implementation Threat Modeling (Guardian before Maker)
|
||||
|
||||
**Observation**: Guardian found error-handling bugs (timeout, corruption) that the Maker didn't anticipate. If Guardian's "what could go wrong?" analysis ran BEFORE implementation, the Maker could build in error handling from the start.
|
||||
|
||||
**Hypothesis**: Running a lightweight Guardian analysis on the Creator's plan (not the code) would produce a "threat list" that the Maker addresses during implementation, eliminating the need for a fix cycle.
|
||||
|
||||
**Sequence**: Creator → Guardian(plan) → Maker(plan + threats) → Guardian(code)
|
||||
|
||||
**Expected outcome**: Fewer Maker-introduced bugs, shorter fix cycle, Guardian's code review focuses on implementation correctness rather than missing error paths.
|
||||
|
||||
### H3: Differential Review (only review what the Maker DIDN'T get from the plan)
|
||||
|
||||
**Observation**: The Maker copies most of the plan correctly. The bugs are in the gaps — things the plan didn't specify (error handling, cross-process state, timeout recovery).
|
||||
|
||||
**Hypothesis**: Instead of reviewing the entire diff, focus the Guardian on the delta between the plan and the implementation — what the Maker added, changed, or skipped that wasn't in the plan.
|
||||
|
||||
**Test**: Extract the plan's explicit instructions, diff against the implementation, and give Guardian only the unplanned additions.
|
||||
|
||||
**Expected outcome**: Higher signal-to-noise ratio (fewer false positives on code that correctly follows the plan), focused attention on the dangerous gaps.
|
||||
|
||||
### H4: Project Convention Calibration (reduce false positives)
|
||||
|
||||
**Observation**: Guardian flagged API key handling (finding #1) and batch_id validation (finding #2) — both valid in absolute terms but inconsistent with the project's existing patterns. The project doesn't validate IDs or centralize key management anywhere else.
|
||||
|
||||
**Hypothesis**: Injecting a "project conventions" summary before Guardian review (e.g., "this project uses env vars for API keys, does not validate external IDs, handles errors via outer try/except") would let Guardian calibrate its expectations and only flag deviations from convention, not the convention itself.
|
||||
|
||||
**Test**: Run Guardian with and without convention context on the same diff. Count false positives.
|
||||
|
||||
**Expected outcome**: 30-50% reduction in noise findings without missing real bugs.
|
||||
|
||||
### H5: Abandon PDCA for Implementation, Keep It for Review
|
||||
|
||||
**Observation**: Across both dogfoods, the cycle-back mechanism (Plan→Do→Check→Act→cycle back) never triggered. All reviews were APPROVED_WITH_FIXES, and fixes were applied in a single pass. The cyclic model added structural overhead (event tracking, artifact routing, convergence detection) that was never used.
|
||||
|
||||
**Hypothesis**: For most tasks, a linear pipeline (implement → multi-reviewer check → targeted fix) is sufficient. Reserve cyclic PDCA for tasks where reviewers fundamentally reject the approach (not just the implementation).
|
||||
|
||||
**Test**: Compare PDCA standard (cycle-back enabled) vs pipeline (no cycle-back) on 10 tasks. Measure: how often does cycle-back actually improve the outcome?
|
||||
|
||||
**Expected outcome**: Cycle-back triggers in <10% of tasks. Pipeline matches PDCA quality for 90%+ of cases at lower cost.
|
||||
|
||||
### H6: Evidence-Gated Findings Actually Work
|
||||
|
||||
**Observation**: Of Guardian's 5 findings in this dogfood, 3 were substantive (timeout data loss, hash non-determinism, no JSON error handling) and 2 were low-value (API key pattern, batch_id format). The substantive ones cited specific code paths and failure scenarios. The low-value ones cited general principles without evidence of actual exploitation.
|
||||
|
||||
**Hypothesis**: The evidence-gating mechanism added in v0.7.0 (ban hedged phrases, require command output or code citation) would have automatically downgraded finding #2 ("could corrupt log output") while preserving findings #3 and #4 (which cite specific code paths and failure mechanisms).
|
||||
|
||||
**Test**: Re-run the Guardian review with evidence-gating active. Count how many findings survive vs. get downgraded.
|
||||
|
||||
**Expected outcome**: 1-2 findings correctly downgraded, 0 real bugs missed.
|
||||
|
||||
### H7: Shadow Detection for the Maker
|
||||
|
||||
**Observation**: The Maker introduced a bug (hash-based indexing) because it deviated from the existing codebase pattern (enumerate-based indexing). This is the "Rogue" shadow — the Maker going off-script from what the codebase already does.
|
||||
|
||||
**Hypothesis**: A pre-commit check that compares the Maker's implementation against the existing codebase patterns (e.g., "how are chapter indices computed elsewhere in fanout.py?") would catch Rogue deviations before the Guardian review.
|
||||
|
||||
**Test**: Add a "pattern conformance" check to the Do phase that greps for how the modified variables/functions are used elsewhere in the file.
|
||||
|
||||
**Expected outcome**: Catches Rogue shadow bugs at implementation time rather than review time, saving a review cycle.
|
||||
|
||||
---
|
||||
|
||||
## Recommended Next Steps (Priority Order)
|
||||
|
||||
1. **H1**: Build `af-review` mode (Guardian-only on existing diff) — lowest effort, highest expected ROI
|
||||
2. **H4**: Project convention injection — reduce noise without missing signal
|
||||
3. **H2**: Pre-implementation threat modeling — address the root cause of missing error handling
|
||||
4. **H5**: Default to pipeline strategy, reserve PDCA for rejections
|
||||
5. **H7**: Maker pattern conformance check — reduce Maker-introduced bugs
|
||||
78
docs/dogfood-2026-04-04.md
Normal file
78
docs/dogfood-2026-04-04.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# ArcheFlow Dogfood Report: Colette Expose/Pitch Generation
|
||||
|
||||
Date: 2026-04-04
|
||||
Task: Implement expose and pitch generation steps in Colette's fanout pipeline
|
||||
Project: writing.colette (Python, 27 modules, 457 tests)
|
||||
|
||||
## Task Description
|
||||
|
||||
The fanout pipeline in `src/colette/fanout.py` had two placeholder steps (`generate_expose`, `generate_pitch`) that logged "not yet implemented". The task was to replace them with real LLM-powered implementations that generate publishing proposals and pitch letters.
|
||||
|
||||
## Conditions
|
||||
|
||||
| Condition | Strategy | Agents | Time | Lines |
|
||||
|-----------|----------|--------|------|-------|
|
||||
| **Plain Claude** (no orchestration) | None | 0 | ~3 min | 107 (+75 impl, +32 test) |
|
||||
| **ArcheFlow PDCA** (standard workflow) | pdca | 4 (Explorer, Creator, Maker, Guardian) | ~15 min | 230 (+145 impl, +85 test) |
|
||||
|
||||
## Findings
|
||||
|
||||
### Bugs introduced
|
||||
|
||||
| Condition | Bug | Caught by | Severity |
|
||||
|-----------|-----|-----------|----------|
|
||||
| Plain Claude | None | N/A | N/A |
|
||||
| ArcheFlow | `task_type`/`file_path` kwargs passed to `LLMClient.create()` but only exist on `GuardedLLMClient` | Guardian review | CRITICAL (runtime crash on non-guarded clients) |
|
||||
|
||||
**Key observation:** ArcheFlow's Maker introduced a bug that plain Claude avoided. The Guardian caught it, but the net result was: introduce bug + catch bug = extra work for the same outcome.
|
||||
|
||||
### Code comparison
|
||||
|
||||
| Metric | Plain Claude | ArcheFlow |
|
||||
|--------|-------------|-----------|
|
||||
| Implementation lines | 75 | 145 |
|
||||
| Test lines | 32 | 85 |
|
||||
| LLMClient compatibility | Clean (protocol args only) | Needed fix (extra kwargs) |
|
||||
| Prompt detail | Adequate (10 sections listed) | More detailed (explicit section descriptions) |
|
||||
| Defensive coding | Minimal (follows existing patterns) | More (mkdir guards, fallback paths) |
|
||||
| Test thoroughness | Basic (file existence, call count) | More thorough (token accumulation, error states) |
|
||||
|
||||
### Process overhead
|
||||
|
||||
| Phase | Time | Value added |
|
||||
|-------|------|-------------|
|
||||
| Explorer research | ~60s | Low — task was well-scoped, pattern was obvious from reading 2 lines |
|
||||
| Creator proposal | ~45s | Low — 300-line plan for 75-line task, mostly restated what the code already showed |
|
||||
| Maker implementation | ~90s | Same as plain Claude, but produced more verbose code + a bug |
|
||||
| Guardian review | ~30s | Mixed — caught 1 real bug (out of 5 findings, 80% noise) |
|
||||
|
||||
### Why plain Claude won
|
||||
|
||||
1. **Pattern-following task.** Two placeholder functions, one existing pattern to copy. No ambiguity, no design decisions, no security concerns.
|
||||
2. **Direct protocol reading.** Plain Claude checked the `LLMClient.create()` signature and used only standard args. The Maker, working from the Creator's plan (which didn't mention the protocol), used extra kwargs it saw in the `GuardedLLMClient`.
|
||||
3. **Less indirection = fewer errors.** The Creator-to-Maker handoff introduced information loss. The Creator specified "call llm_client.create()" but didn't specify the exact signature constraints. Plain Claude read the source of truth directly.
|
||||
|
||||
### When ArcheFlow would have been worth it
|
||||
|
||||
This task had none of these signals:
|
||||
- Ambiguous requirements (need Explorer)
|
||||
- Multiple valid approaches (need Creator to evaluate)
|
||||
- Security-sensitive code (need Guardian for real threats)
|
||||
- Cross-cutting changes (5+ files, interaction risks)
|
||||
- Unfamiliar codebase (need research phase)
|
||||
|
||||
### Improvement opportunities
|
||||
|
||||
1. **Auto-select should skip orchestration** for pattern-following tasks (placeholder + existing pattern in same file)
|
||||
2. **Creator compact mode** — for simple tasks, emit a 10-line diff-style plan, not a 300-line essay
|
||||
3. **Explorer budget cap** — 60s max for single-file tasks
|
||||
4. **Guardian calibration** — inject project conventions to reduce false positives from 80% to ~40%
|
||||
5. **Baseline capture** — run the same task without ArcheFlow to enable A/B comparison
|
||||
|
||||
## Conclusion
|
||||
|
||||
For this specific task (simple, pattern-following, single-file, well-scoped), ArcheFlow added cost without adding quality. Plain Claude was faster, produced less code, and avoided a bug that the Maker introduced.
|
||||
|
||||
This is not a failure of ArcheFlow's design — it's a calibration problem. The auto-select heuristic should have detected this as a skip-orchestration task. The complexity threshold for ArcheFlow activation needs to be higher than "touches 2+ files."
|
||||
|
||||
**Honest assessment:** ArcheFlow's value-add starts at tasks requiring genuine design decisions, security review, or cross-module coordination. Below that threshold, it's ceremony.
|
||||
88
docs/hooks.md
Normal file
88
docs/hooks.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# ArcheFlow Hook Points
|
||||
|
||||
Hooks let you run custom commands at key points during an ArcheFlow orchestration run. Use them for notifications, custom validation, CI integration, or project-specific checks.
|
||||
|
||||
## Available Hooks
|
||||
|
||||
| Hook | When | Env Vars | Default `fail_action` |
|
||||
|------|------|----------|----------------------|
|
||||
| `run-start` | After initialization, before Plan phase begins | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_WORKFLOW`, `ARCHEFLOW_TASK` | `warn` |
|
||||
| `phase-complete` | After each PDCA phase finishes | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_PHASE`, `ARCHEFLOW_CYCLE` | `warn` |
|
||||
| `agent-complete` | After each agent returns | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_AGENT`, `ARCHEFLOW_PHASE`, `ARCHEFLOW_DURATION_MS` | `warn` |
|
||||
| `pre-merge` | After all reviewers approve, before merging to target branch | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_BRANCH`, `ARCHEFLOW_TARGET` | `abort` |
|
||||
| `post-merge` | After successful merge to target branch | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_BRANCH`, `ARCHEFLOW_MERGE_COMMIT` | `warn` |
|
||||
| `run-complete` | After the run finishes (success or failure) | `ARCHEFLOW_RUN_ID`, `ARCHEFLOW_STATUS`, `ARCHEFLOW_CYCLES`, `ARCHEFLOW_DURATION_S` | `warn` |
|
||||
|
||||
## Configuration
|
||||
|
||||
Add a `hooks:` section to your project's `.archeflow/config.yaml`:
|
||||
|
||||
```yaml
|
||||
hooks:
|
||||
run-start:
|
||||
command: "echo 'Run starting: $ARCHEFLOW_RUN_ID'"
|
||||
fail_action: warn
|
||||
pre-merge:
|
||||
command: "./scripts/lint-check.sh"
|
||||
fail_action: abort
|
||||
run-complete:
|
||||
command: "curl -X POST https://slack.example.com/webhook -d '{\"text\": \"ArcheFlow run $ARCHEFLOW_STATUS\"}'"
|
||||
fail_action: warn
|
||||
```
|
||||
|
||||
Each hook entry has two fields:
|
||||
|
||||
- **`command`** -- shell command to execute. Env vars are available. Runs with `bash -c`.
|
||||
- **`fail_action`** -- what happens if the command exits non-zero:
|
||||
- `warn` -- log a warning, continue the run
|
||||
- `abort` -- stop the run immediately, report the failure
|
||||
|
||||
## `fail_action` Semantics
|
||||
|
||||
| `fail_action` | On command exit 0 | On command exit non-zero |
|
||||
|---------------|-------------------|------------------------|
|
||||
| `warn` | Continue silently | Log warning, continue |
|
||||
| `abort` | Continue silently | Emit `decision` event with `"chosen":"hook_abort"`, halt run, report to user |
|
||||
|
||||
**Recommended settings:**
|
||||
- Use `abort` for `pre-merge` -- a failing pre-merge check should block the merge
|
||||
- Use `warn` for informational hooks (`run-start`, `run-complete`, `post-merge`)
|
||||
- Use `warn` for `agent-complete` and `phase-complete` unless you have strict SLA requirements
|
||||
|
||||
## Examples
|
||||
|
||||
### Slack notification on run complete
|
||||
|
||||
```yaml
|
||||
hooks:
|
||||
run-complete:
|
||||
command: >
|
||||
curl -s -X POST "$SLACK_WEBHOOK_URL"
|
||||
-H 'Content-Type: application/json'
|
||||
-d '{"text":"ArcheFlow run '"$ARCHEFLOW_RUN_ID"' '"$ARCHEFLOW_STATUS"' ('"$ARCHEFLOW_CYCLES"' cycles, '"$ARCHEFLOW_DURATION_S"'s)"}'
|
||||
fail_action: warn
|
||||
```
|
||||
|
||||
### Pre-merge lint gate
|
||||
|
||||
```yaml
|
||||
hooks:
|
||||
pre-merge:
|
||||
command: "npm run lint && npm run typecheck"
|
||||
fail_action: abort
|
||||
```
|
||||
|
||||
### Log phase timing
|
||||
|
||||
```yaml
|
||||
hooks:
|
||||
phase-complete:
|
||||
command: "echo \"$(date -u +%H:%M:%S) phase=$ARCHEFLOW_PHASE cycle=$ARCHEFLOW_CYCLE run=$ARCHEFLOW_RUN_ID\" >> .archeflow/phase-timing.log"
|
||||
fail_action: warn
|
||||
```
|
||||
|
||||
## Hook Execution
|
||||
|
||||
Hooks are executed by the `archeflow:run` skill at the corresponding lifecycle point. The command runs in the project root directory with `bash -c`. A 30-second timeout applies to each hook -- if a hook exceeds this, it is killed and treated as a failure (subject to `fail_action`).
|
||||
|
||||
Hooks are optional. If no `hooks:` section exists in config, no hooks run. If a specific hook event is not configured, it is silently skipped.
|
||||
@@ -2,6 +2,36 @@
|
||||
|
||||
## Completed
|
||||
|
||||
### v0.7.0 (2026-04-04)
|
||||
- [x] Context isolation protocol for attention filters and all agent personas
|
||||
- [x] Structured status tokens with orchestrator parsing protocol
|
||||
- [x] Evidence-gated verification with banned phrases and auto-downgrade
|
||||
- [x] Plan granularity constraint (2-5 min tasks with file path, code block, verify command)
|
||||
- [x] Strategy abstraction (PDCA cyclic, pipeline linear, auto-selection)
|
||||
- [x] Experimental status and interdisciplinary framing in README
|
||||
|
||||
### v0.6.0 (2026-04-04)
|
||||
- [x] Expanded attention-filters skill (prompt templates, token budgets, cycle-back filtering, verification checklist)
|
||||
- [x] Explorer skip heuristic in plan-phase skill
|
||||
- [x] Agent persona normalization (frontmatter examples, model comments, isolation notes)
|
||||
- [x] Runnable quickstart example
|
||||
|
||||
### v0.5.0 (2026-04-04)
|
||||
- [x] Lib script validation at run initialization
|
||||
- [x] Hook points documentation with 6 lifecycle events
|
||||
- [x] Phase rollback support via `--to <phase>` flag
|
||||
- [x] Per-workflow model assignment with fallback chain
|
||||
- [x] Cross-run finding regression detection
|
||||
- [x] Check-phase parallel reviewer spawning protocol
|
||||
|
||||
### v0.4.0 (2026-04-04)
|
||||
- [x] Confidence gate parsing with bash snippets
|
||||
- [x] Mini-Explorer spawning when risk coverage < 0.5
|
||||
- [x] Worktree merge flow with pre-merge hooks and post-merge test validation
|
||||
- [x] `archeflow-rollback.sh` for post-merge test failure auto-revert
|
||||
- [x] Test-first validation gate in Do phase
|
||||
- [x] Memory injection audit trail
|
||||
|
||||
### v0.3.0 (2026-04-03)
|
||||
- [x] Automated PDCA loop (`archeflow:run`) with `--start-from` and `--dry-run`
|
||||
- [x] Event-sourced process logging with DAG parent relationships
|
||||
@@ -52,6 +82,10 @@
|
||||
|
||||
| Date | Version | Changes |
|
||||
|------|---------|---------|
|
||||
| 2026-04-04 | v0.7.0 | Process rigor: context isolation, status tokens, evidence-gated verification, plan granularity, strategy abstraction |
|
||||
| 2026-04-04 | v0.6.0 | Quality/polish: expanded attention filters, Explorer skip heuristic, agent persona normalization, quickstart example |
|
||||
| 2026-04-04 | v0.5.0 | Robustness: lib validation, hook points, phase rollback, per-workflow models, regression detection, parallel reviewers |
|
||||
| 2026-04-04 | v0.4.0 | Confidence gates, mini-Explorer, worktree merge flow, rollback script, test-first gate, memory audit |
|
||||
| 2026-04-03 | v0.3.0 | Process infrastructure: run automation, event sourcing, domain adapters, memory, multi-project, 8 lib scripts |
|
||||
| 2026-04-03 | v0.2.0 | Plugin consolidation, workflow intelligence, quality loop, parallel teams, extensibility |
|
||||
| 2026-04-02 | v0.1.0 | Initial release: 7 archetypes, 9 core skills, PDCA workflows, shadow detection, autonomous mode |
|
||||
|
||||
@@ -1,5 +1,61 @@
|
||||
# ArcheFlow — Status Log
|
||||
|
||||
## 2026-04-04: Triple Release Sprint (v0.4 → v0.6)
|
||||
|
||||
### What happened
|
||||
Three ArcheFlow PDCA cycles in one session, each using ArcheFlow's own orchestration to develop itself (dogfooding). Each cycle: Explorer→Creator→Maker→Guardian+Skeptic+Sage→fixes→merge→push.
|
||||
|
||||
### v0.4.0 — Gap Fixes (8 commits, 541 lines, 15 files)
|
||||
- Unified feedback routing tables across 3 skills (canonical 8-row version)
|
||||
- Confidence gate with concrete bash parsing, 3 branches (pause/upgrade/mini-Explorer)
|
||||
- `archeflow-rollback.sh` — post-merge auto-revert with `--mainline 1`
|
||||
- Test-first validation gate in Do phase (word-boundary patterns)
|
||||
- Memory injection audit trail (`--audit` flag, `audit-check` command)
|
||||
- Review fixes: safe jq `--arg`, confidence fallback→0.0, pattern hardening
|
||||
|
||||
### v0.5.0 — Infrastructure (8 commits, 483 lines, 12 files)
|
||||
- Lib script validation at run initialization (0a)
|
||||
- Hook points documentation (`docs/hooks.md` + config template with 6 events)
|
||||
- Phase rollback via `--to <phase>` in rollback script
|
||||
- Per-workflow model assignment configuration
|
||||
- Cross-run finding regression detection
|
||||
- Check-phase fleshed out with parallel reviewer spawning protocol
|
||||
- Review fixes: mutual exclusivity guard, jq --arg everywhere, table-row grep
|
||||
|
||||
### v0.6.0 — Quality Polish (5 commits, 253 lines, 13 files)
|
||||
- Attention-filters expanded from 39-line stub to full skill (prompt templates, token budgets, cycle-back rules, verification checklist)
|
||||
- Explorer skip heuristic in plan-phase skill
|
||||
- Agent persona normalization (4 agents: examples, model comments, isolation note)
|
||||
- Runnable quickstart example (`examples/runnable-quickstart.md`)
|
||||
- CHANGELOG completed with missing v0.4.0 entry + roadmap version history
|
||||
|
||||
### v0.7.0 — Superpowers-Inspired + Strategy Abstraction (8 commits, 485 lines, 20 files)
|
||||
- Context isolation protocol (attention-filters + all 7 agents)
|
||||
- Structured status tokens: DONE/DONE_WITH_CONCERNS/NEEDS_CONTEXT/BLOCKED
|
||||
- Evidence-gated verification: banned phrases, evidence markers, downgrade-to-INFO
|
||||
- Plan granularity constraint: 2-5 min tasks with file:line + code block + verify
|
||||
- Strategy abstraction: `pdca` (cyclic) vs `pipeline` (linear) vs `auto` (selected by task)
|
||||
- README: experimental status + interdisciplinary framing (psychology + process eng + software eng)
|
||||
- Review fixes: fast→pipeline auto-select, merge guard, evidence check completeness
|
||||
|
||||
### Key numbers
|
||||
| Metric | v0.3 → v0.7 delta |
|
||||
|--------|-------------------|
|
||||
| Commits this session | 29 |
|
||||
| Lines added | ~1,762 |
|
||||
| Files touched | 30+ |
|
||||
| Lib scripts | 8 → 9 (archeflow-rollback.sh) |
|
||||
| Skills | 24 (all fleshed out, no stubs remain) |
|
||||
| Review cycles | 4 (v0.4: full, v0.5: full, v0.6: fast, v0.7: Guardian-only) |
|
||||
| Review findings fixed | 15 |
|
||||
|
||||
### What to do next
|
||||
1. **End-to-end dogfood** — run `af-run` on a real task (not ArcheFlow itself) to test both strategies
|
||||
2. **Hook execution runtime** — config documents 6 hook events but no runner yet
|
||||
3. **Pipeline strategy testing** — exercise the `--strategy pipeline` path on a bug fix
|
||||
4. **Publish** — tag v0.7.0, consider claude.com/plugins marketplace listing
|
||||
5. **GitHub Action** — automated PR review (roadmap item, low effort)
|
||||
|
||||
## 2026-04-03: Major Feature Sprint (v0.1 → v0.3)
|
||||
|
||||
### What happened
|
||||
|
||||
109
examples/runnable-quickstart.md
Normal file
109
examples/runnable-quickstart.md
Normal file
@@ -0,0 +1,109 @@
|
||||
# Runnable Quickstart
|
||||
|
||||
A step-by-step walkthrough of an ArcheFlow run from scratch.
|
||||
|
||||
## 1. Create a temp project
|
||||
|
||||
```bash
|
||||
mkdir /tmp/af-demo && cd /tmp/af-demo
|
||||
git init && echo "# Demo" > README.md && git add . && git commit -m "init"
|
||||
```
|
||||
|
||||
## 2. Initialize ArcheFlow
|
||||
|
||||
```
|
||||
/af-init quick-fix
|
||||
```
|
||||
|
||||
This creates `.archeflow/config.yaml` with sensible defaults (fast workflow, budget $5).
|
||||
|
||||
Expected output:
|
||||
```
|
||||
archeflow v0.6.0 initialized (quick-fix bundle)
|
||||
config: .archeflow/config.yaml
|
||||
workflow: fast (Creator -> Maker -> Guardian)
|
||||
```
|
||||
|
||||
## 3. Run a task
|
||||
|
||||
```
|
||||
/af-run "Create a fibonacci function with edge case tests" --workflow fast
|
||||
```
|
||||
|
||||
## 4. Expected output at each phase
|
||||
|
||||
### Plan phase (Creator only -- Explorer skipped)
|
||||
|
||||
The fast workflow skips Explorer because the task is small and specific.
|
||||
Creator produces a proposal:
|
||||
|
||||
```
|
||||
-- archeflow -- Create fibonacci function -- fast --
|
||||
Creator: fibonacci(n) with memoization, handles n<0 and n>46 overflow
|
||||
```
|
||||
|
||||
Behind the scenes, Creator wrote a proposal with:
|
||||
- Architecture decision: iterative approach with memoization
|
||||
- File list: `fibonacci.py`, `test_fibonacci.py`
|
||||
- Confidence: task understanding 0.9, solution completeness 0.9, risk coverage 0.8
|
||||
|
||||
### Do phase (Maker)
|
||||
|
||||
Maker implements in an isolated worktree:
|
||||
|
||||
```
|
||||
Maker: 2 files, 4 tests, all passing
|
||||
```
|
||||
|
||||
Maker followed the proposal: wrote tests first (negative input, zero, small values, large values), then implemented.
|
||||
|
||||
### Check phase (Guardian)
|
||||
|
||||
Guardian reviews the diff:
|
||||
|
||||
```
|
||||
Guardian: APPROVED (1 INFO -- consider adding type hints)
|
||||
```
|
||||
|
||||
### Act phase
|
||||
|
||||
All reviewers approved. Merge to main:
|
||||
|
||||
```
|
||||
-- done -- 1 cycle . 3 agents . ~4 min --
|
||||
fibonacci.py + test_fibonacci.py merged
|
||||
```
|
||||
|
||||
## 5. Expected file tree
|
||||
|
||||
```
|
||||
/tmp/af-demo/
|
||||
README.md
|
||||
fibonacci.py # iterative fibonacci with memoization
|
||||
test_fibonacci.py # 4 test cases (negative, zero, small, overflow)
|
||||
.archeflow/
|
||||
config.yaml # ArcheFlow configuration
|
||||
runs/
|
||||
run-001.jsonl # event log for this run
|
||||
progress.md # final progress snapshot
|
||||
```
|
||||
|
||||
## 6. What just happened
|
||||
|
||||
Each phase maps to an archetype with a specific role:
|
||||
|
||||
| Phase | Archetype | What it did |
|
||||
|-------|-----------|-------------|
|
||||
| Plan | Creator | Designed the solution: iterative fibonacci, memoization, test cases. Skipped Explorer (task is specific, files are known). |
|
||||
| Do | Maker | Implemented in isolated worktree. Tests first, then code. Committed after each step. |
|
||||
| Check | Guardian | Reviewed the diff for security, correctness, and quality. Found no blockers. |
|
||||
| Act | Orchestrator | All approved -- merged Maker's worktree branch into main. |
|
||||
|
||||
The fast workflow used 3 agents in 1 cycle. A `standard` workflow would add Explorer (research) + Skeptic (assumptions) + Sage (quality). A `thorough` workflow adds Trickster (adversarial testing) on top.
|
||||
|
||||
## Next steps
|
||||
|
||||
- Try `--workflow standard` for a more thorough run
|
||||
- Try `/af-status` to see run details after completion
|
||||
- Try `/af-dag` to see the process DAG
|
||||
- Try `/af-report` for a full markdown report
|
||||
@@ -25,7 +25,10 @@ try {
|
||||
}
|
||||
|
||||
console.log(JSON.stringify({
|
||||
hookSpecificOutput: { additionalContext: stripped }
|
||||
hookSpecificOutput: {
|
||||
hookEventName: "SessionStart",
|
||||
additionalContext: stripped
|
||||
}
|
||||
}));
|
||||
} catch (e) {
|
||||
console.log("{}");
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
# ./lib/archeflow-memory.sh list # List all active lessons
|
||||
# ./lib/archeflow-memory.sh decay # Apply decay to all lessons
|
||||
# ./lib/archeflow-memory.sh forget <id> # Archive a lesson by ID
|
||||
# ./lib/archeflow-memory.sh regression-check <events> # Detect regressions from previously fixed findings
|
||||
#
|
||||
# Dependencies: jq, bash 4+
|
||||
|
||||
@@ -140,14 +141,14 @@ cmd_extract() {
|
||||
if [[ "$overlap" -ge 50 ]]; then
|
||||
# Match found — update existing lesson
|
||||
local tmp_file="${LESSONS_FILE}.tmp"
|
||||
jq -c "
|
||||
if .id == \"$lesson_id\" then
|
||||
jq -c --arg lid "$lesson_id" --arg ts "$(now_ts)" --arg rid "$run_id" '
|
||||
if .id == $lid then
|
||||
.frequency += 1 |
|
||||
.ts = \"$(now_ts)\" |
|
||||
.last_seen_run = \"$run_id\" |
|
||||
.ts = $ts |
|
||||
.last_seen_run = $rid |
|
||||
.runs_since_last_seen = 0
|
||||
else . end
|
||||
" "$LESSONS_FILE" > "$tmp_file"
|
||||
' "$LESSONS_FILE" > "$tmp_file"
|
||||
mv "$tmp_file" "$LESSONS_FILE"
|
||||
matched=true
|
||||
updated=$((updated + 1))
|
||||
@@ -201,6 +202,16 @@ cmd_inject() {
|
||||
local domain="${1:-}"
|
||||
local archetype="${2:-}"
|
||||
|
||||
# Parse optional --audit <run_id>
|
||||
local audit_run_id=""
|
||||
shift 2 2>/dev/null || true
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--audit) audit_run_id="$2"; shift 2 ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! -f "$LESSONS_FILE" ]]; then
|
||||
return 0
|
||||
fi
|
||||
@@ -213,38 +224,224 @@ cmd_inject() {
|
||||
# - Filter by domain (match or "general") and archetype (if provided)
|
||||
# - Sort by frequency desc, cap at 10
|
||||
local lessons
|
||||
lessons=$(jq -c "
|
||||
lessons=$(jq -c --arg domain "$domain" --arg archetype "$archetype" '
|
||||
select(
|
||||
(.type == \"preference\") or
|
||||
(.type == "preference") or
|
||||
(.frequency >= 5) or
|
||||
(
|
||||
(.frequency >= 2) and
|
||||
(
|
||||
(\"$domain\" == \"\") or
|
||||
(.domain == \"$domain\") or
|
||||
(.domain == \"general\")
|
||||
($domain == "") or
|
||||
(.domain == $domain) or
|
||||
(.domain == "general")
|
||||
) and
|
||||
(
|
||||
(\"$archetype\" == \"\") or
|
||||
($archetype == "") or
|
||||
(.archetype == null) or
|
||||
(.archetype == \"$archetype\")
|
||||
(.archetype == $archetype)
|
||||
)
|
||||
)
|
||||
)
|
||||
" "$LESSONS_FILE" 2>/dev/null | jq -sc 'sort_by(-.frequency) | .[:10][]' 2>/dev/null || true)
|
||||
' "$LESSONS_FILE" 2>/dev/null | jq -sc 'sort_by(-.frequency) | .[:10][]' 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$lessons" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Collect injected lesson IDs for audit
|
||||
local injected_ids=()
|
||||
|
||||
echo "## Known Issues (from past runs)"
|
||||
while IFS= read -r lesson; do
|
||||
local desc freq src
|
||||
local desc freq src lid
|
||||
desc=$(echo "$lesson" | jq -r '.description')
|
||||
freq=$(echo "$lesson" | jq -r '.frequency')
|
||||
src=$(echo "$lesson" | jq -r '.source')
|
||||
lid=$(echo "$lesson" | jq -r '.id')
|
||||
injected_ids+=("$lid")
|
||||
echo "- ${desc} [seen ${freq}x, ${src}]"
|
||||
done <<< "$lessons"
|
||||
|
||||
# Write audit record if --audit was passed
|
||||
if [[ -n "$audit_run_id" && ${#injected_ids[@]} -gt 0 ]]; then
|
||||
ensure_dir
|
||||
local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
|
||||
local ids_json
|
||||
ids_json=$(printf '%s\n' "${injected_ids[@]}" | jq -R . | jq -sc .)
|
||||
jq -cn \
|
||||
--arg ts "$(now_ts)" \
|
||||
--arg run_id "$audit_run_id" \
|
||||
--arg domain "$domain" \
|
||||
--arg archetype "$archetype" \
|
||||
--argjson lessons_injected "$ids_json" \
|
||||
--argjson lesson_count "${#injected_ids[@]}" \
|
||||
'{ts:$ts,run_id:$run_id,domain:$domain,archetype:$archetype,lessons_injected:$lessons_injected,lesson_count:$lesson_count}' \
|
||||
>> "$AUDIT_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_audit_check() {
|
||||
local run_id="${1:?Usage: $0 audit-check <run_id>}"
|
||||
local AUDIT_FILE="${MEMORY_DIR}/audit.jsonl"
|
||||
local EVENTS_FILE=".archeflow/events/${run_id}.jsonl"
|
||||
|
||||
if [[ ! -f "$AUDIT_FILE" ]]; then
|
||||
echo "No audit records found." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ ! -f "$EVENTS_FILE" ]]; then
|
||||
echo "No events file found for run $run_id." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get lessons injected for this run
|
||||
local injected
|
||||
injected=$(jq -c --arg rid "$run_id" 'select(.run_id == $rid)' "$AUDIT_FILE" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$injected" ]]; then
|
||||
echo "No audit records for run $run_id." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Get all finding descriptions from review.verdict events
|
||||
local finding_descs
|
||||
finding_descs=$(jq -r '
|
||||
select(.type == "review.verdict") |
|
||||
.data.findings[]? | .description // empty
|
||||
' "$EVENTS_FILE" 2>/dev/null | tr '[:upper:]' '[:lower:]' || true)
|
||||
|
||||
# For each injected lesson, check if findings match the lesson's topic
|
||||
local lesson_ids
|
||||
lesson_ids=$(echo "$injected" | jq -r '.lessons_injected[]' 2>/dev/null | sort -u)
|
||||
|
||||
while IFS= read -r lid; do
|
||||
[[ -z "$lid" ]] && continue
|
||||
|
||||
# Get lesson description
|
||||
local lesson_desc
|
||||
lesson_desc=$(jq -r --arg lid "$lid" 'select(.id == $lid) | .description' "$LESSONS_FILE" 2>/dev/null | head -1)
|
||||
[[ -z "$lesson_desc" ]] && continue
|
||||
|
||||
# Check keyword overlap between lesson and findings
|
||||
local lesson_tokens finding_overlap
|
||||
lesson_tokens=$(tokenize "$lesson_desc")
|
||||
finding_overlap=0
|
||||
|
||||
if [[ -n "$finding_descs" ]]; then
|
||||
local finding_tokens
|
||||
finding_tokens=$(echo "$finding_descs" | tr -cs '[:alnum:]' '\n' | awk 'length >= 3' | sort -u)
|
||||
local common
|
||||
common=$(comm -12 <(echo "$lesson_tokens") <(echo "$finding_tokens") | wc -l)
|
||||
local total
|
||||
total=$(echo "$lesson_tokens" | wc -l)
|
||||
if [[ "$total" -gt 0 ]]; then
|
||||
finding_overlap=$(( common * 100 / total ))
|
||||
fi
|
||||
fi
|
||||
|
||||
local effectiveness
|
||||
if [[ "$finding_overlap" -ge 30 ]]; then
|
||||
effectiveness="ineffective" # Issue repeated despite lesson injection
|
||||
else
|
||||
effectiveness="helpful" # Issue was prevented (no matching finding)
|
||||
fi
|
||||
|
||||
# Append result to audit.jsonl
|
||||
jq -cn \
|
||||
--arg ts "$(now_ts)" \
|
||||
--arg run_id "$run_id" \
|
||||
--arg lesson_id "$lid" \
|
||||
--arg lesson_desc "$lesson_desc" \
|
||||
--arg effectiveness "$effectiveness" \
|
||||
--argjson overlap "$finding_overlap" \
|
||||
'{ts:$ts,run_id:$run_id,type:"effectiveness_check",lesson_id:$lesson_id,lesson_desc:$lesson_desc,effectiveness:$effectiveness,keyword_overlap_pct:$overlap}' \
|
||||
>> "$AUDIT_FILE"
|
||||
|
||||
echo "[archeflow-memory] Lesson $lid ($effectiveness): $lesson_desc" >&2
|
||||
done <<< "$lesson_ids"
|
||||
}
|
||||
|
||||
cmd_regression_check() {
|
||||
local events_file="${1:?Usage: $0 regression-check <events.jsonl>}"
|
||||
|
||||
if [[ ! -f "$events_file" ]]; then
|
||||
echo "Error: events file not found: $events_file" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract current run_id
|
||||
local run_id
|
||||
run_id=$(jq -r '.run_id' "$events_file" | head -1)
|
||||
|
||||
# Find the previous run from index.jsonl
|
||||
local INDEX_FILE=".archeflow/events/index.jsonl"
|
||||
if [[ ! -f "$INDEX_FILE" ]]; then
|
||||
echo "[archeflow-memory] No index.jsonl found — skipping regression check." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
local prev_run_id
|
||||
# Get the most recent run that is not the current one (index is append-newest-last)
|
||||
prev_run_id=$(jq -r --arg rid "$run_id" 'select(.run_id != $rid) | .run_id' "$INDEX_FILE" 2>/dev/null | tail -1)
|
||||
# Note: tail -1 gives the last non-current entry, which is the most recent previous run
|
||||
|
||||
if [[ -z "$prev_run_id" ]]; then
|
||||
echo "[archeflow-memory] No previous run found — skipping regression check." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
local prev_events=".archeflow/events/${prev_run_id}.jsonl"
|
||||
if [[ ! -f "$prev_events" ]]; then
|
||||
echo "[archeflow-memory] Previous run events not found: $prev_events" >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Extract resolved findings from previous run (fix.applied events)
|
||||
local resolved_findings
|
||||
resolved_findings=$(jq -r 'select(.type == "fix.applied") | .data.finding // empty' "$prev_events" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$resolved_findings" ]]; then
|
||||
echo "[archeflow-memory] No resolved findings in previous run — nothing to regress." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Extract current run findings from review.verdict events
|
||||
local current_findings
|
||||
current_findings=$(jq -r '
|
||||
select(.type == "review.verdict") |
|
||||
.data.findings[]? | .description // empty
|
||||
' "$events_file" 2>/dev/null || true)
|
||||
|
||||
if [[ -z "$current_findings" ]]; then
|
||||
echo "[archeflow-memory] No findings in current run — no regressions." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Compare: for each resolved finding, check if it reappeared
|
||||
local regressions=0
|
||||
while IFS= read -r resolved; do
|
||||
[[ -z "$resolved" ]] && continue
|
||||
|
||||
while IFS= read -r current; do
|
||||
[[ -z "$current" ]] && continue
|
||||
local overlap
|
||||
overlap=$(keyword_overlap "$resolved" "$current")
|
||||
if [[ "$overlap" -ge 50 ]]; then
|
||||
echo "REGRESSION: \"$resolved\" (fixed in $prev_run_id) reappeared as \"$current\""
|
||||
regressions=$((regressions + 1))
|
||||
break
|
||||
fi
|
||||
done <<< "$current_findings"
|
||||
done <<< "$resolved_findings"
|
||||
|
||||
if [[ "$regressions" -gt 0 ]]; then
|
||||
echo "[archeflow-memory] $regressions regression(s) detected from run $prev_run_id." >&2
|
||||
return 1
|
||||
else
|
||||
echo "[archeflow-memory] No regressions detected." >&2
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
cmd_add() {
|
||||
@@ -360,17 +557,17 @@ cmd_forget() {
|
||||
ensure_dir
|
||||
|
||||
# Check if the lesson exists
|
||||
if ! jq -e "select(.id == \"$target_id\")" "$LESSONS_FILE" > /dev/null 2>&1; then
|
||||
if ! jq -e --arg tid "$target_id" 'select(.id == $tid)' "$LESSONS_FILE" > /dev/null 2>&1; then
|
||||
echo "Error: lesson $target_id not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Archive the lesson
|
||||
jq -c "select(.id == \"$target_id\")" "$LESSONS_FILE" >> "$ARCHIVE_FILE"
|
||||
jq -c --arg tid "$target_id" 'select(.id == $tid)' "$LESSONS_FILE" >> "$ARCHIVE_FILE"
|
||||
|
||||
# Remove from lessons
|
||||
local tmp_file="${LESSONS_FILE}.tmp"
|
||||
jq -c "select(.id != \"$target_id\")" "$LESSONS_FILE" > "$tmp_file"
|
||||
jq -c --arg tid "$target_id" 'select(.id != $tid)' "$LESSONS_FILE" > "$tmp_file"
|
||||
mv "$tmp_file" "$LESSONS_FILE"
|
||||
|
||||
echo "[archeflow-memory] Forgot lesson $target_id (moved to archive)" >&2
|
||||
@@ -383,11 +580,13 @@ if [[ $# -lt 1 ]]; then
|
||||
echo "" >&2
|
||||
echo "Commands:" >&2
|
||||
echo " extract <events.jsonl> Extract lessons from a completed run" >&2
|
||||
echo " inject <domain> <archetype> Output relevant lessons for injection" >&2
|
||||
echo " inject <domain> <archetype> [--audit <run_id>] Output relevant lessons for injection" >&2
|
||||
echo " add <type> <description> Manually add a lesson" >&2
|
||||
echo " list List all active lessons" >&2
|
||||
echo " decay Apply decay to all lessons" >&2
|
||||
echo " forget <id> Archive a lesson by ID" >&2
|
||||
echo " audit-check <run_id> Check lesson effectiveness for a run" >&2
|
||||
echo " regression-check <events.jsonl> Detect regressions from previously fixed findings" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -400,7 +599,7 @@ case "$COMMAND" in
|
||||
cmd_extract "$1"
|
||||
;;
|
||||
inject)
|
||||
cmd_inject "${1:-}" "${2:-}"
|
||||
cmd_inject "$@"
|
||||
;;
|
||||
add)
|
||||
[[ $# -lt 2 ]] && { echo "Usage: $0 add <type> <description>" >&2; exit 1; }
|
||||
@@ -416,6 +615,14 @@ case "$COMMAND" in
|
||||
[[ $# -lt 1 ]] && { echo "Usage: $0 forget <id>" >&2; exit 1; }
|
||||
cmd_forget "$1"
|
||||
;;
|
||||
audit-check)
|
||||
[[ $# -lt 1 ]] && { echo "Usage: $0 audit-check <run_id>" >&2; exit 1; }
|
||||
cmd_audit_check "$1"
|
||||
;;
|
||||
regression-check)
|
||||
[[ $# -lt 1 ]] && { echo "Usage: $0 regression-check <events.jsonl>" >&2; exit 1; }
|
||||
cmd_regression_check "$1"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: $COMMAND" >&2
|
||||
exit 1
|
||||
|
||||
197
lib/archeflow-review.sh
Executable file
197
lib/archeflow-review.sh
Executable file
@@ -0,0 +1,197 @@
|
||||
#!/usr/bin/env bash
|
||||
# archeflow-review.sh — Get a git diff for Guardian review, with stats.
|
||||
#
|
||||
# Standalone diff helper for af-review. No PDCA orchestration — just extracts
|
||||
# the right diff and reports stats so the Claude Code agent can feed it to
|
||||
# Guardian (or other reviewers).
|
||||
#
|
||||
# Usage:
|
||||
# archeflow-review.sh # Uncommitted changes (staged + unstaged)
|
||||
# archeflow-review.sh --branch feat/batch-api # Branch diff vs main
|
||||
# archeflow-review.sh --commit HEAD~3..HEAD # Commit range
|
||||
# archeflow-review.sh --base develop # Override base branch (default: main)
|
||||
# archeflow-review.sh --stat-only # Only print stats, no diff output
|
||||
#
|
||||
# Output:
|
||||
# Prints the diff to stdout. Stats go to stderr so they don't pollute the diff.
|
||||
# Exit code 0 if diff is non-empty, 1 if empty (nothing to review).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Globals
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BASE_BRANCH="main"
|
||||
MODE="uncommitted" # uncommitted | branch | commit
|
||||
TARGET=""
|
||||
STAT_ONLY="false"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
die() {
|
||||
echo "[af-review] ERROR: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
info() {
|
||||
echo "[af-review] $*" >&2
|
||||
}
|
||||
|
||||
# Print diff stats (files changed, insertions, deletions) to stderr.
|
||||
print_stats() {
|
||||
local diff_text="$1"
|
||||
|
||||
local files_changed lines_added lines_removed total_lines
|
||||
files_changed=$(echo "$diff_text" | grep -c '^diff --git' || true)
|
||||
lines_added=$(echo "$diff_text" | grep -c '^+[^+]' || true)
|
||||
lines_removed=$(echo "$diff_text" | grep -c '^-[^-]' || true)
|
||||
total_lines=$(echo "$diff_text" | wc -l | tr -d ' ')
|
||||
|
||||
info "--- Review Stats ---"
|
||||
info "Files changed: ${files_changed}"
|
||||
info "Lines added: +${lines_added}"
|
||||
info "Lines removed: -${lines_removed}"
|
||||
info "Diff size: ${total_lines} lines"
|
||||
|
||||
if [[ "$total_lines" -gt 500 ]]; then
|
||||
info "Warning: large diff (>500 lines). Consider reviewing per-file."
|
||||
fi
|
||||
}
|
||||
|
||||
# Detect the default base branch (main or master).
|
||||
detect_base_branch() {
|
||||
if git show-ref --verify --quiet "refs/heads/main" 2>/dev/null; then
|
||||
echo "main"
|
||||
elif git show-ref --verify --quiet "refs/heads/master" 2>/dev/null; then
|
||||
echo "master"
|
||||
else
|
||||
echo "main"
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
parse_args() {
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--branch)
|
||||
MODE="branch"
|
||||
TARGET="${2:?Missing branch name after --branch}"
|
||||
shift 2
|
||||
;;
|
||||
--commit)
|
||||
MODE="commit"
|
||||
TARGET="${2:?Missing commit range after --commit}"
|
||||
shift 2
|
||||
;;
|
||||
--base)
|
||||
BASE_BRANCH="${2:?Missing base branch after --base}"
|
||||
shift 2
|
||||
;;
|
||||
--stat-only)
|
||||
STAT_ONLY="true"
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [--branch <name>] [--commit <range>] [--base <branch>] [--stat-only]"
|
||||
echo ""
|
||||
echo " (no args) Review uncommitted changes (staged + unstaged)"
|
||||
echo " --branch <name> Review branch diff against base (default: main)"
|
||||
echo " --commit <range> Review a commit range (e.g. HEAD~3..HEAD)"
|
||||
echo " --base <branch> Override base branch (default: auto-detect main/master)"
|
||||
echo " --stat-only Print stats only, no diff output"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
die "Unknown argument: $1. Use --help for usage."
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Diff extraction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
get_diff() {
|
||||
local diff_text=""
|
||||
|
||||
case "$MODE" in
|
||||
uncommitted)
|
||||
# Combine staged and unstaged changes against HEAD
|
||||
diff_text=$(git diff HEAD 2>/dev/null || true)
|
||||
if [[ -z "$diff_text" ]]; then
|
||||
# Maybe everything is staged, try just staged
|
||||
diff_text=$(git diff --cached 2>/dev/null || true)
|
||||
fi
|
||||
;;
|
||||
branch)
|
||||
# Verify target branch exists
|
||||
if ! git show-ref --verify --quiet "refs/heads/${TARGET}" 2>/dev/null; then
|
||||
# Maybe it's a remote branch
|
||||
if ! git rev-parse --verify "${TARGET}" &>/dev/null; then
|
||||
die "Branch '${TARGET}' not found."
|
||||
fi
|
||||
fi
|
||||
diff_text=$(git diff "${BASE_BRANCH}...${TARGET}" 2>/dev/null || true)
|
||||
;;
|
||||
commit)
|
||||
# Validate commit range resolves
|
||||
if ! git rev-parse "${TARGET}" &>/dev/null 2>&1; then
|
||||
die "Invalid commit range: '${TARGET}'"
|
||||
fi
|
||||
diff_text=$(git diff "${TARGET}" 2>/dev/null || true)
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "$diff_text"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
main() {
|
||||
# Verify we're in a git repo
|
||||
if ! git rev-parse --is-inside-work-tree &>/dev/null; then
|
||||
die "Not inside a git repository."
|
||||
fi
|
||||
|
||||
parse_args "$@"
|
||||
|
||||
# Auto-detect base branch if not overridden
|
||||
if [[ "$BASE_BRANCH" == "main" ]]; then
|
||||
BASE_BRANCH=$(detect_base_branch)
|
||||
fi
|
||||
|
||||
# Describe what we're reviewing
|
||||
case "$MODE" in
|
||||
uncommitted) info "Reviewing: uncommitted changes vs HEAD" ;;
|
||||
branch) info "Reviewing: branch '${TARGET}' vs '${BASE_BRANCH}'" ;;
|
||||
commit) info "Reviewing: commit range '${TARGET}'" ;;
|
||||
esac
|
||||
|
||||
local diff_text
|
||||
diff_text=$(get_diff)
|
||||
|
||||
# Validate non-empty
|
||||
if [[ -z "$diff_text" ]]; then
|
||||
info "No changes found. Nothing to review."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Print stats to stderr
|
||||
print_stats "$diff_text"
|
||||
|
||||
# Output the diff to stdout (unless stat-only)
|
||||
if [[ "$STAT_ONLY" != "true" ]]; then
|
||||
echo "$diff_text"
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
108
lib/archeflow-rollback.sh
Executable file
108
lib/archeflow-rollback.sh
Executable file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env bash
|
||||
# archeflow-rollback.sh — Auto-revert a merge that fails post-merge tests,
|
||||
# or roll back to a specific PDCA phase boundary.
|
||||
#
|
||||
# Usage:
|
||||
# archeflow-rollback.sh <run_id> [--test-cmd <cmd>] # Post-merge test + revert
|
||||
# archeflow-rollback.sh <run_id> --to <phase> # Roll back to phase boundary
|
||||
#
|
||||
# --to <phase>: Roll back to the given phase boundary (plan, do, or check).
|
||||
# Delegates to archeflow-git.sh rollback and emits a decision event.
|
||||
#
|
||||
# If --test-cmd not provided (and --to not used), reads test_command from .archeflow/config.yaml.
|
||||
# Returns 0 if tests pass (or rollback succeeds), 1 if tests fail (merge reverted).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
RUN_ID="${1:?Usage: archeflow-rollback.sh <run_id> [--test-cmd <cmd>] [--to <phase>]}"
|
||||
shift
|
||||
|
||||
# Parse options
|
||||
TEST_CMD=""
|
||||
TARGET_PHASE=""
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--test-cmd) TEST_CMD="$2"; shift 2 ;;
|
||||
--to) TARGET_PHASE="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Mutual exclusivity check
|
||||
if [[ -n "$TARGET_PHASE" && -n "$TEST_CMD" ]]; then
|
||||
echo "ERROR: --to and --test-cmd are mutually exclusive." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# --- Phase rollback mode ---
|
||||
if [[ -n "$TARGET_PHASE" ]]; then
|
||||
# Validate phase name
|
||||
case "$TARGET_PHASE" in
|
||||
plan|do|check) ;;
|
||||
*)
|
||||
echo "ERROR: Invalid phase '$TARGET_PHASE'. Must be one of: plan, do, check" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "Rolling back run $RUN_ID to phase boundary: $TARGET_PHASE"
|
||||
|
||||
# Delegate to archeflow-git.sh
|
||||
if [[ ! -x "$SCRIPT_DIR/archeflow-git.sh" ]]; then
|
||||
echo "ERROR: archeflow-git.sh not found or not executable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"$SCRIPT_DIR/archeflow-git.sh" rollback "$RUN_ID" --to "$TARGET_PHASE"
|
||||
|
||||
# Emit decision event
|
||||
if [[ -x "$SCRIPT_DIR/archeflow-event.sh" ]]; then
|
||||
"$SCRIPT_DIR/archeflow-event.sh" "$RUN_ID" decision act "" \
|
||||
"{\"what\":\"phase_rollback\",\"chosen\":\"rollback_to_${TARGET_PHASE}\",\"rationale\":\"user requested rollback to ${TARGET_PHASE} phase boundary\"}" ""
|
||||
fi
|
||||
|
||||
echo "Rollback to $TARGET_PHASE complete for run $RUN_ID."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# --- Post-merge test mode ---
|
||||
|
||||
# Read test_command from config if not provided
|
||||
if [[ -z "$TEST_CMD" ]]; then
|
||||
if [[ -f ".archeflow/config.yaml" ]]; then
|
||||
TEST_CMD=$(grep -E "^test_command:" .archeflow/config.yaml | sed 's/^test_command:\s*//' | tr -d '"' || true)
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -z "$TEST_CMD" ]]; then
|
||||
echo "ERROR: No test command specified (use --test-cmd or set test_command in .archeflow/config.yaml)" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Verify HEAD is an ArcheFlow merge
|
||||
HEAD_MSG=$(git log -1 --format=%s HEAD)
|
||||
if [[ "$HEAD_MSG" != *"$RUN_ID"* ]] && [[ "$HEAD_MSG" != *"archeflow"* ]]; then
|
||||
echo "WARNING: HEAD commit does not appear to be an ArcheFlow merge: $HEAD_MSG" >&2
|
||||
echo "Proceeding anyway..." >&2
|
||||
fi
|
||||
|
||||
echo "Running post-merge tests: $TEST_CMD"
|
||||
|
||||
if timeout 300 bash -c "$TEST_CMD"; then
|
||||
echo "Tests passed — merge is good."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Tests FAILED — reverting merge..."
|
||||
git revert --no-edit --mainline 1 HEAD
|
||||
|
||||
# Emit event if event script exists
|
||||
if [[ -x "$SCRIPT_DIR/archeflow-event.sh" ]]; then
|
||||
"$SCRIPT_DIR/archeflow-event.sh" "$RUN_ID" decision act "" \
|
||||
"{\"what\":\"post_merge_test\",\"chosen\":\"revert\",\"rationale\":\"test suite failed after merge\"}" ""
|
||||
fi
|
||||
|
||||
REVERT_HASH=$(git rev-parse --short HEAD)
|
||||
echo "Merge reverted (commit: $REVERT_HASH). Tests must pass before re-merging."
|
||||
exit 1
|
||||
@@ -286,18 +286,20 @@ When cycling back, produce `act-feedback.md` as a structured handoff. This repla
|
||||
| — | — | — | — | — |
|
||||
```
|
||||
|
||||
**Routing rules** (same as orchestration skill, repeated here for self-containment):
|
||||
**Routing rules** (canonical table — matches orchestration and artifact-routing skills):
|
||||
|
||||
| Finding Source | Routes to | When |
|
||||
|----------------|-----------|------|
|
||||
| Guardian (security, breaking-change) | Creator | Design must change |
|
||||
| Skeptic (design, scalability) | Creator | Assumptions need revision |
|
||||
| Sage (quality, consistency) | Maker | Implementation refinement |
|
||||
| Sage (design) | Creator | If it's an architectural concern |
|
||||
| Trickster (reliability) | Creator | If root cause is a design flaw |
|
||||
| Trickster (testing) | Maker | If root cause is a test gap |
|
||||
| Source | Category | Routes to | Reason |
|
||||
|--------|----------|-----------|--------|
|
||||
| Guardian | security, breaking-change | Creator | Design must change |
|
||||
| Guardian | reliability, dependency | Creator | Architectural decision needed |
|
||||
| Skeptic | design, scalability | Creator | Assumptions need revision |
|
||||
| Sage | quality, consistency | Maker | Implementation refinement |
|
||||
| Sage | testing | Maker | Test gap, not design flaw |
|
||||
| Trickster | reliability (design flaw) | Creator | Needs redesign |
|
||||
| Trickster | reliability (test gap) | Maker | Needs more tests |
|
||||
| Trickster | testing | Maker | Edge case not covered |
|
||||
|
||||
When in doubt about routing: if the fix requires changing the approach, route to Creator. If the fix requires changing the code within the existing approach, route to Maker.
|
||||
**Disambiguation rule:** When in doubt: if the fix requires changing the approach, route to Creator. If it requires changing the code within the existing approach, route to Maker.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ Artifacts follow the pattern: `<phase>-<agent>.<ext>`
|
||||
|-------|-------|----------|--------|
|
||||
| plan | explorer | `plan-explorer.md` | Markdown research report |
|
||||
| plan | creator | `plan-creator.md` | Markdown proposal with confidence scores |
|
||||
| plan | mini-explorer | `plan-mini-explorer.md` | Focused risk research (only if confidence gate triggers) |
|
||||
| do | maker | `do-maker.md` | Markdown implementation summary |
|
||||
| do | maker | `do-maker-files.txt` | Plain text, one file path per line |
|
||||
| check | guardian | `check-guardian.md` | Markdown verdict + findings table |
|
||||
@@ -89,8 +90,8 @@ Note: Address each unresolved issue listed above. Explain how your revised propo
|
||||
|
||||
| Agent | Receives | Does NOT receive |
|
||||
|-------|----------|-----------------|
|
||||
| **Maker** (cycle 1) | `plan-creator.md` (the proposal) | `plan-explorer.md`, reviewer outputs, raw task description |
|
||||
| **Maker** (cycle 2+) | `plan-creator.md`, Maker-routed findings from `act-feedback.md` | Explorer research, Guardian/Skeptic findings (those went to Creator) |
|
||||
| **Maker** (cycle 1) | `plan-creator.md` (the proposal), `plan-mini-explorer.md` (if exists) | `plan-explorer.md`, reviewer outputs, raw task description |
|
||||
| **Maker** (cycle 2+) | `plan-creator.md`, `plan-mini-explorer.md` (if exists), Maker-routed findings from `act-feedback.md` | Explorer research, Guardian/Skeptic findings (those went to Creator) |
|
||||
|
||||
**Maker context injection template (cycle 2+):**
|
||||
```markdown
|
||||
@@ -164,6 +165,8 @@ No agents are spawned in Act. The orchestrator reads all `check-*.md` artifacts
|
||||
|
||||
## Feedback Routing
|
||||
|
||||
> **This is the canonical routing table.** Other skills (orchestration, act-phase) must match this table exactly. When updating routing rules, update this table first, then sync the others.
|
||||
|
||||
When building `act-feedback.md` after the Check phase, route each finding to the right agent for the next cycle:
|
||||
|
||||
| Finding Source | Finding Category | Routes To | Rationale |
|
||||
@@ -177,7 +180,7 @@ When building `act-feedback.md` after the Check phase, route each finding to the
|
||||
| Trickster | reliability (test gap) | **Maker** | Needs more tests |
|
||||
| Trickster | testing | **Maker** | Edge case not covered |
|
||||
|
||||
**Ambiguous cases:** If a Trickster finding could be either a design flaw or a test gap, check: does the fix require changing the proposal's architecture/approach, or just adding a test/validation? Architecture change → Creator. Additional test → Maker.
|
||||
**Disambiguation rule:** When in doubt: if the fix requires changing the approach, route to Creator. If it requires changing the code within the existing approach, route to Maker.
|
||||
|
||||
### Feedback File Format
|
||||
|
||||
@@ -252,6 +255,7 @@ Before injecting an artifact into an agent's context, always check if the file e
|
||||
| Artifact | Missing when |
|
||||
|----------|-------------|
|
||||
| `plan-explorer.md` | Fast workflow (no Explorer) |
|
||||
| `plan-mini-explorer.md` | Confidence gate did not trigger for risk coverage |
|
||||
| `check-skeptic.md` | Fast workflow, or A2 fast-path taken |
|
||||
| `check-sage.md` | Fast workflow, or A2 fast-path taken |
|
||||
| `check-trickster.md` | Non-thorough workflow, or A2 fast-path taken |
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
---
|
||||
name: attention-filters
|
||||
description: Use when spawning archetype agents to decide what context each agent receives. Reduces token waste and sharpens focus by passing only relevant artifacts.
|
||||
---
|
||||
|
||||
# Attention Filters
|
||||
|
||||
Each archetype needs different context. Pass only what's relevant — not everything.
|
||||
|
||||
| Archetype | Receives | Does NOT Receive |
|
||||
|-----------|----------|-----------------|
|
||||
| Explorer | Task description, codebase access | Prior proposals or reviews |
|
||||
| Creator | Explorer's research + task description | Implementation details |
|
||||
| Maker | Creator's proposal | Explorer's research, reviews |
|
||||
| Guardian | Maker's git diff + proposal risk section | Explorer's research |
|
||||
| Skeptic | Creator's proposal (focus: assumptions) | Git diff details |
|
||||
| Trickster | Maker's git diff only | Everything else |
|
||||
| Sage | Proposal + implementation + diff | Explorer's raw research |
|
||||
|
||||
## Why This Matters
|
||||
|
||||
- **Token cost:** A Guardian reading the Explorer's 2000-word research wastes ~2600 tokens on irrelevant context
|
||||
- **Focus:** An agent with too much context drifts from its archetype's concern
|
||||
- **Shadow prevention:** Over-loading context encourages rabbit-holing (Explorer) and scope creep (Maker)
|
||||
|
||||
## In Practice
|
||||
|
||||
When spawning a Check-phase agent, include only the filtered context in the prompt:
|
||||
|
||||
```
|
||||
# Guardian receives:
|
||||
"Review these changes: <git diff output>
|
||||
The proposal identified these risks: <risks section only>
|
||||
Verdict: APPROVED or REJECTED with findings."
|
||||
|
||||
# NOT:
|
||||
"Here is the full research, the full proposal, the full implementation,
|
||||
the full git log, and everything else we have..."
|
||||
```
|
||||
@@ -1,85 +1,110 @@
|
||||
---
|
||||
name: check-phase
|
||||
description: Use when you are acting as Guardian, Skeptic, Sage, or Trickster archetype in the Check phase. Defines shared review rules and output format.
|
||||
description: Use when acting as Guardian, Skeptic, Sage, or Trickster in the Check phase. Defines review rules, finding format, attention filters, and spawning protocol.
|
||||
---
|
||||
|
||||
# Check Phase
|
||||
|
||||
Multiple reviewers examine the Maker's implementation in parallel. Each agent definition has its specific protocol — this skill defines the shared rules.
|
||||
Reviewers examine the Maker's implementation. This skill defines shared rules, finding format, and spawning protocol.
|
||||
|
||||
## Shared Rules
|
||||
|
||||
1. **Read the proposal first.** Review against the intended design, not invented requirements.
|
||||
2. **Read the actual code.** Use `git diff` on the Maker's branch. Don't review descriptions alone.
|
||||
3. **Structured findings.** Use the standardized finding format below for every issue.
|
||||
4. **Clear verdict:** `APPROVED` or `REJECTED` with rationale.
|
||||
1. Review against the proposal's intended design, not invented requirements.
|
||||
2. Read actual code via `git diff` on the Maker's branch.
|
||||
3. Use the finding format below for every issue.
|
||||
4. Give a clear verdict: `APPROVED` or `REJECTED` with rationale.
|
||||
5. `STATUS: DONE` signals agent completion. `APPROVED`/`REJECTED` is domain output. Both are parsed independently.
|
||||
|
||||
## Finding Format
|
||||
|
||||
Every finding must use this format for cross-cycle tracking:
|
||||
|
||||
```
|
||||
| Location | Severity | Category | Description | Fix |
|
||||
|----------|----------|----------|-------------|-----|
|
||||
| src/auth/handler.ts:48 | CRITICAL | security | Empty string bypasses validation | Add length check before processing |
|
||||
```
|
||||
| src/auth/handler.ts:48 | CRITICAL | security | Empty string bypasses validation | Add length check |
|
||||
|
||||
**Severity:**
|
||||
- **CRITICAL** — Must fix. Blocks approval.
|
||||
- **WARNING** — Should fix. Doesn't block alone.
|
||||
- **INFO** — Nice to have. Never blocks.
|
||||
**Severity:** CRITICAL = must fix, blocks approval. WARNING = should fix, doesn't block alone. INFO = nice to have, never blocks.
|
||||
|
||||
**Categories** (use consistently for cross-cycle tracking):
|
||||
- `security` — Injection, auth bypass, data exposure, secrets
|
||||
- `reliability` — Error handling, edge cases, race conditions, crashes
|
||||
- `design` — Architecture, assumptions, scalability, coupling
|
||||
- `breaking-change` — API compatibility, schema migrations, removals
|
||||
- `dependency` — New deps, version conflicts, license issues
|
||||
- `quality` — Readability, maintainability, naming, duplication
|
||||
- `testing` — Missing tests, weak assertions, untested paths
|
||||
- `consistency` — Deviates from codebase patterns
|
||||
**Categories:** `security` `reliability` `design` `breaking-change` `dependency` `quality` `testing` `consistency`
|
||||
|
||||
## Consolidated Output
|
||||
## Evidence Requirements
|
||||
|
||||
After all reviewers finish, compile:
|
||||
Every CRITICAL or WARNING must include concrete evidence. Without evidence, downgrade to INFO.
|
||||
|
||||
**Valid evidence:** command output, exit codes, code citations with line numbers, git diff excerpts, reproduction steps.
|
||||
|
||||
**Banned in CRITICAL/WARNING:** "might be", "could potentially", "appears to", "seems like", "may not". Rewrite with evidence or downgrade.
|
||||
|
||||
For each CRITICAL/WARNING, state: (1) what was tested, (2) what was observed, (3) what correct behavior should be.
|
||||
|
||||
## Attention Filters
|
||||
|
||||
Each archetype receives only relevant context. Do not pass everything.
|
||||
|
||||
| Archetype | Receives | Excludes |
|
||||
|-----------|----------|----------|
|
||||
| Guardian | Maker's git diff + proposal risk section + test results | Explorer research, Creator rationale, other reviewers |
|
||||
| Skeptic | Creator's proposal (assumptions + architecture) + confidence scores | Git diff, Explorer research, other reviewers |
|
||||
| Sage | Creator's proposal + Maker's diff + implementation summary + test results | Explorer raw research, other reviewer verdicts |
|
||||
| Trickster | Maker's git diff + attack surface summary (file types + entry points) | Proposal, research, other reviewers |
|
||||
|
||||
**Token budget targets:**
|
||||
|
||||
| Archetype | Fast | Standard | Thorough |
|
||||
|-----------|------|----------|----------|
|
||||
| Guardian | 1500 | 2000 | 2500 |
|
||||
| Skeptic | skip | 1500 | 2000 |
|
||||
| Trickster | skip | skip | 1500 |
|
||||
| Sage | skip | 2500 | 3000 |
|
||||
|
||||
**Context isolation:** Agents receive fresh, controller-constructed context only. No session bleed, no cross-agent contamination, no ambient knowledge. Verify zero references to excluded artifacts before spawning.
|
||||
|
||||
**Cycle-back filtering (cycle 2+):** Pass structured feedback table only (not full reviewer artifacts). Strip resolved items. Cap at 500 tokens — summarize by severity if exceeded.
|
||||
|
||||
## Reviewer Spawning Protocol
|
||||
|
||||
### Step 1: Guardian First (mandatory)
|
||||
|
||||
Guardian always runs first. It receives the Maker's git diff and the proposal's risk section only.
|
||||
|
||||
Save output to `.archeflow/artifacts/${RUN_ID}/check-guardian.md`.
|
||||
|
||||
### Step 2: A2 Fast-Path Evaluation
|
||||
|
||||
After Guardian completes, count CRITICAL and WARNING findings in its output. If both are zero, and not escalated, and not first cycle of a thorough workflow — skip remaining reviewers and proceed to Act phase.
|
||||
|
||||
### Step 3: Parallel Remaining Reviewers
|
||||
|
||||
If A2 does not trigger, spawn remaining reviewers in parallel:
|
||||
|
||||
| Workflow | Reviewers (after Guardian) |
|
||||
|----------|--------------------------|
|
||||
| `fast` | None (Guardian only) |
|
||||
| `fast` (escalated) | Skeptic + Sage |
|
||||
| `standard` | Skeptic + Sage |
|
||||
| `thorough` | Skeptic + Sage + Trickster |
|
||||
|
||||
Each reviewer gets context per the attention filters above.
|
||||
|
||||
### Step 4: Collect and Consolidate
|
||||
|
||||
For each reviewer: save to `.archeflow/artifacts/${RUN_ID}/check-<archetype>.md`, emit `review.verdict` event, record sequence number.
|
||||
|
||||
**Deduplication:** If two reviewers raise the same issue (same file + same category), merge into one finding using the higher severity. Don't double-count.
|
||||
|
||||
**Verdict:** Count CRITICAL findings across all reviewers (after dedup). Any CRITICAL = `REJECTED`. Otherwise `APPROVED`.
|
||||
|
||||
Example consolidated output:
|
||||
|
||||
```markdown
|
||||
## Check Phase Results — Cycle N
|
||||
|
||||
## Check Phase Results — Cycle 1
|
||||
### Guardian: APPROVED
|
||||
| Location | Severity | Category | Description | Fix |
|
||||
|----------|----------|----------|-------------|-----|
|
||||
| src/auth/handler.ts:52 | WARNING | security | Missing rate limit | Add rate limiter middleware |
|
||||
|
||||
### Skeptic: APPROVED
|
||||
| Location | Severity | Category | Description | Fix |
|
||||
|----------|----------|----------|-------------|-----|
|
||||
| src/auth/handler.ts:30 | INFO | design | Consider caching validated tokens | Add TTL cache for token validation |
|
||||
|
||||
### Sage: APPROVED
|
||||
| Location | Severity | Category | Description | Fix |
|
||||
|----------|----------|----------|-------------|-----|
|
||||
| tests/auth.test.ts:15 | WARNING | testing | Test names don't describe behavior | Rename to "should reject expired tokens" |
|
||||
|
||||
### Trickster: REJECTED
|
||||
| Location | Severity | Category | Description | Fix |
|
||||
|----------|----------|----------|-------------|-----|
|
||||
| src/auth/handler.ts:48 | CRITICAL | reliability | Empty string bypasses validation | Add `if (!token || token.trim() === '')` guard |
|
||||
|
||||
### Deduplication
|
||||
If two reviewers raise the same issue (same file + same category), merge:
|
||||
| Guardian + Skeptic | CRITICAL | security | Input not sanitized (src/api.ts:30) | Add validation |
|
||||
|
||||
Use the higher severity. Don't double-count in the verdict.
|
||||
|
||||
### Verdict: REJECTED — 1 critical finding
|
||||
→ Build cycle feedback (see orchestration skill) and feed to Plan phase
|
||||
| src/auth.ts:52 | WARNING | security | Missing rate limit | Add rate limiter |
|
||||
### Verdict: APPROVED — 0 critical, 1 warning
|
||||
```
|
||||
|
||||
## Why Structured Findings Matter
|
||||
## Timeout Handling
|
||||
|
||||
The standardized format enables:
|
||||
- **Cross-cycle tracking:** Same category + location = same issue. Can detect resolution or regression.
|
||||
- **Feedback routing:** Security/design findings → Creator. Quality/testing findings → Maker.
|
||||
- **Shadow detection:** CRITICAL:WARNING ratios, finding counts, and category distributions are measurable.
|
||||
- **Metrics:** Severity counts feed into the orchestration summary.
|
||||
Each reviewer has a **5-minute timeout**. On timeout: emit `agent.complete` with `"error": true`, log WARNING, treat as no findings, proceed.
|
||||
|
||||
**Exception:** Guardian timeout is blocking — abort Check phase and report to user.
|
||||
|
||||
@@ -186,7 +186,7 @@ When regenerating:
|
||||
|
||||
## Per-Agent Attention Filters
|
||||
|
||||
Not every agent needs the full bundle. The bridge defines attention filters that control which sections each archetype receives. This extends the base attention filters from `archeflow:attention-filters`.
|
||||
Not every agent needs the full bundle. The bridge defines attention filters that control which sections each archetype receives. This extends the base attention filters from `archeflow:check-phase`.
|
||||
|
||||
| Archetype | Bundle sections injected | Rationale |
|
||||
|-----------|------------------------|-----------|
|
||||
|
||||
@@ -165,3 +165,29 @@ Before your final commit, verify:
|
||||
- [ ] Every logical step has its own commit
|
||||
- [ ] Output summary is complete and accurate
|
||||
- [ ] Branch name follows convention
|
||||
|
||||
## Test-First Gate
|
||||
|
||||
Before the Maker's output is accepted, the orchestrator validates that tests were included.
|
||||
|
||||
### Validation Logic
|
||||
|
||||
Read `do-maker-files.txt`. Check if any file path matches common test patterns:
|
||||
- `*test*`, `*spec*`, `*.test.*`, `*.spec.*`, `*_test.*`, `*_spec.*`
|
||||
- Files in directories named `test/`, `tests/`, `__tests__/`, `spec/`
|
||||
|
||||
For writing domain projects, this gate is skipped.
|
||||
|
||||
### Outcomes
|
||||
|
||||
| Result | Action |
|
||||
|--------|--------|
|
||||
| Test files found | Pass — proceed to Check phase |
|
||||
| No test files, code domain | **Warn** — emit WARNING event, note in do-maker.md |
|
||||
| No test files + Creator specified tests | **Block** — re-run Maker with test instruction (1 retry) |
|
||||
| Writing domain | Skip gate entirely |
|
||||
|
||||
The block case triggers a targeted re-run with prompt:
|
||||
"The proposal specified these test cases: <test strategy section>. No test files
|
||||
were found in your changes. Add the specified tests before finishing."
|
||||
This is one retry within the Do phase, not a full PDCA cycle.
|
||||
|
||||
@@ -219,6 +219,32 @@ The helper script reads this config if it exists. All values have sensible defau
|
||||
|
||||
---
|
||||
|
||||
## Post-Merge Rollback
|
||||
|
||||
After merging, the run skill validates the merge by running the project's test suite. If tests fail, the merge is automatically reverted.
|
||||
|
||||
### Script
|
||||
|
||||
```bash
|
||||
./lib/archeflow-rollback.sh <run_id> [--test-cmd <cmd>]
|
||||
```
|
||||
|
||||
**Behavior:**
|
||||
1. Reads `test_command` from `.archeflow/config.yaml` (or uses `--test-cmd` override)
|
||||
2. Runs the test suite with a 5-minute timeout
|
||||
3. If tests pass: exits 0 (merge is good)
|
||||
4. If tests fail: runs `git revert --no-edit HEAD`, emits a `decision` event, exits 1
|
||||
5. Verifies HEAD is an ArcheFlow merge commit before reverting (warning if not, proceeds anyway)
|
||||
|
||||
**Integration with run skill:** Called in section 4c (All Approved) after `archeflow-git.sh merge`. If it returns non-zero, the orchestrator cycles back with "integration test failure" feedback or reports to the user if max cycles are reached.
|
||||
|
||||
**Configuration:** Set `test_command` in `.archeflow/config.yaml`:
|
||||
```yaml
|
||||
test_command: "npm test" # or "pytest", "cargo test", etc.
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Safety Rules
|
||||
|
||||
These rules are inherited from `archeflow:orchestration` and reinforced here:
|
||||
|
||||
@@ -215,6 +215,59 @@ Moves the lesson to `archive.jsonl` regardless of frequency.
|
||||
| Before agent spawn (run start) | Inject relevant lessons | `archeflow-memory.sh inject <domain> <archetype>` |
|
||||
| User command | Add/list/forget lessons | `archeflow-memory.sh add/list/forget` |
|
||||
|
||||
## Audit Trail
|
||||
|
||||
Track which lessons are injected into each run and whether they were effective.
|
||||
|
||||
### Storage
|
||||
|
||||
```
|
||||
.archeflow/memory/audit.jsonl # Append-only audit log
|
||||
```
|
||||
|
||||
### Injection Audit Record
|
||||
|
||||
When `--audit <run_id>` is passed to the `inject` command, an audit record is written:
|
||||
|
||||
```jsonl
|
||||
{"ts":"2026-04-04T10:00:00Z","run_id":"2026-04-04-auth-fix","domain":"code","archetype":"","lessons_injected":["m-001","m-003"],"lesson_count":2}
|
||||
```
|
||||
|
||||
Usage:
|
||||
```bash
|
||||
./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID"
|
||||
```
|
||||
|
||||
### Effectiveness Check
|
||||
|
||||
After a run completes, check whether injected lessons prevented issues:
|
||||
|
||||
```bash
|
||||
./lib/archeflow-memory.sh audit-check <run_id>
|
||||
```
|
||||
|
||||
This command:
|
||||
1. Reads `audit.jsonl` for lessons injected in the given run
|
||||
2. Reads the run's event file for `review.verdict` events
|
||||
3. For each injected lesson, checks keyword overlap between the lesson's description and review findings
|
||||
4. **No matching finding** = `helpful` (the lesson likely prevented the issue)
|
||||
5. **Matching finding** = `ineffective` (the issue repeated despite the lesson being injected)
|
||||
6. Appends effectiveness results to `audit.jsonl`
|
||||
|
||||
### Effectiveness Over Time
|
||||
|
||||
By querying `audit.jsonl` for effectiveness records, you can measure:
|
||||
- Which lessons consistently prevent issues (high `helpful` count)
|
||||
- Which lessons are not working (high `ineffective` count — consider rewording or removing)
|
||||
- Overall memory system ROI (ratio of helpful to ineffective across all runs)
|
||||
|
||||
```bash
|
||||
# Count effectiveness per lesson
|
||||
jq -r 'select(.type == "effectiveness_check") | [.lesson_id, .effectiveness] | @tsv' .archeflow/memory/audit.jsonl | sort | uniq -c
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design Principles
|
||||
|
||||
1. **Append-only storage.** `lessons.jsonl` is append-only during writes; decay rewrites the file in place but preserves all data (archived lessons move to `archive.jsonl`).
|
||||
|
||||
@@ -7,6 +7,58 @@ description: Use when executing a multi-agent orchestration — spawning archety
|
||||
|
||||
This skill guides you through running a full ArcheFlow orchestration using Claude Code's native Agent tool and git worktrees.
|
||||
|
||||
## Strategy Selection
|
||||
|
||||
A **strategy** defines the shape of an orchestration run — which phases execute, in what order, and when to iterate. A **workflow** (fast/standard/thorough) controls the depth within a strategy.
|
||||
|
||||
### Available Strategies
|
||||
|
||||
| Strategy | Flow | When to Use |
|
||||
|----------|------|-------------|
|
||||
| `pdca` | Plan -> Do -> Check -> Act (cyclic) | Refactors, thorough reviews, multi-concern tasks |
|
||||
| `pipeline` | Plan -> Implement -> Spec-Review -> Quality-Review -> Verify (linear) | Bug fixes, fast patches, single-concern tasks |
|
||||
| `auto` | Selected by task analysis | Default — let ArcheFlow decide |
|
||||
|
||||
### Strategy Interface
|
||||
|
||||
Every strategy defines:
|
||||
|
||||
- **Phases** — ordered list of execution stages
|
||||
- **Agent mapping** — which archetypes run in each phase
|
||||
- **Transition rules** — conditions for moving between phases
|
||||
- **Iteration model** — cyclic (PDCA) or linear (pipeline)
|
||||
- **Exit conditions** — when the run terminates
|
||||
|
||||
### PDCA Strategy
|
||||
|
||||
The existing orchestration flow (Steps 0-4 below). Cyclic — the Act phase can feed back to Plan for another iteration. Best for tasks requiring multiple review perspectives and iterative refinement.
|
||||
|
||||
### Pipeline Strategy
|
||||
|
||||
Linear flow with no cycle-back. Faster for well-understood tasks where one pass is sufficient.
|
||||
|
||||
| Phase | Agent | Purpose |
|
||||
|-------|-------|---------|
|
||||
| Plan | Creator | Design proposal |
|
||||
| Implement | Maker | Build in worktree |
|
||||
| Spec-Review | Guardian, then Skeptic | Security + assumption check (sequential) |
|
||||
| Quality-Review | Sage | Code quality review |
|
||||
| Verify | (automated) | Run tests, apply targeted fix if CRITICAL |
|
||||
|
||||
No cycle-back — WARNINGs are logged but do not block. CRITICALs in Verify trigger a single targeted fix attempt by the Maker, not a full cycle.
|
||||
|
||||
### Auto-Selection Rules
|
||||
|
||||
When `strategy: auto` (default):
|
||||
|
||||
- Task contains "fix", "bug", "patch", "hotfix" → `pipeline`
|
||||
- Task contains "refactor", "redesign", "review" → `pdca`
|
||||
- Workflow is `thorough` → `pdca` (always)
|
||||
- Workflow is `fast` with single file → `pipeline`
|
||||
- Otherwise → `pdca`
|
||||
|
||||
---
|
||||
|
||||
## Step 0: Choose a Workflow
|
||||
|
||||
If `.archeflow/teams/<name>.yaml` exists, the user can reference a team preset: `"Use the backend team"`. Load the preset's phase config instead of built-in defaults. See `archeflow:custom-archetypes` skill for preset format.
|
||||
@@ -89,6 +141,12 @@ Events are optional — if the events dir doesn't exist, skip logging. Never let
|
||||
|
||||
---
|
||||
|
||||
## Model Configuration
|
||||
|
||||
Model assignment per archetype and workflow is configured in `.archeflow/config.yaml` under the `models:` section. The `archeflow:run` skill (section 0c) handles resolution with fallback chain: per-workflow per-archetype > per-workflow default > per-archetype > global default. When spawning agents manually, read the config to select the appropriate model.
|
||||
|
||||
---
|
||||
|
||||
## Step 1: Plan Phase
|
||||
|
||||
Spawn agents sequentially — Creator needs Explorer's findings.
|
||||
@@ -188,6 +246,8 @@ Agent(
|
||||
|
||||
Spawn Guardian **first**. After Guardian completes, check adaptation rule A2 (fast-path). If A2 triggers (0 CRITICAL, 0 WARNING, non-escalated workflow), skip remaining reviewers and proceed to Act. Otherwise, spawn remaining reviewers **in parallel**.
|
||||
|
||||
**Reviewer spawning protocol:** The canonical sequence (Guardian first, A2 evaluation, parallel spawning, timeout handling) is defined in `archeflow:check-phase` under "Reviewer Spawning Protocol". Follow that protocol for the exact spawning order, context per reviewer, and timeout rules.
|
||||
|
||||
### Guardian (always runs first)
|
||||
|
||||
**Context to include:** Maker's git diff, proposal risk section only.
|
||||
@@ -344,12 +404,18 @@ Parse each reviewer's output into the standardized format:
|
||||
|
||||
Not all findings go to the same agent:
|
||||
|
||||
| Finding source | Routes to | Rationale |
|
||||
|----------------|-----------|-----------|
|
||||
| Guardian (security, breaking-change) | **Creator** | Design must change |
|
||||
| Skeptic (design, scalability) | **Creator** | Assumptions need revision |
|
||||
| Sage (quality, consistency) | **Maker** | Implementation refinement |
|
||||
| Trickster (reliability, testing) | **Creator** if design flaw, **Maker** if test gap | Depends on root cause |
|
||||
| Source | Category | Routes to | Reason |
|
||||
|--------|----------|-----------|--------|
|
||||
| Guardian | security, breaking-change | **Creator** | Design must change |
|
||||
| Guardian | reliability, dependency | **Creator** | Architectural decision needed |
|
||||
| Skeptic | design, scalability | **Creator** | Assumptions need revision |
|
||||
| Sage | quality, consistency | **Maker** | Implementation refinement |
|
||||
| Sage | testing | **Maker** | Test gap, not design flaw |
|
||||
| Trickster | reliability (design flaw) | **Creator** | Needs redesign |
|
||||
| Trickster | reliability (test gap) | **Maker** | Needs more tests |
|
||||
| Trickster | testing | **Maker** | Edge case not covered |
|
||||
|
||||
**Disambiguation rule:** When in doubt: if the fix requires changing the approach, route to Creator. If it requires changing the code within the existing approach, route to Maker.
|
||||
|
||||
### 3. Track Resolution
|
||||
|
||||
|
||||
@@ -117,3 +117,59 @@ When the Creator receives structured feedback from a prior cycle, the proposal m
|
||||
- **Disputed:** Disagrees with the finding. Must provide evidence or reasoning.
|
||||
|
||||
CRITICAL findings cannot be deferred or disputed — they must be fixed or the proposal will be rejected again.
|
||||
|
||||
## Task Granularity
|
||||
|
||||
Each change item in the Creator's proposal must be a **2-5 minute task** — specific enough that the Maker can implement it without interpretation.
|
||||
|
||||
### Requirements per Change Item
|
||||
|
||||
Every item in the `### Changes` section must include:
|
||||
|
||||
1. **Exact file path** — `src/auth/handler.ts`, not "the auth module"
|
||||
2. **What to change** — a code block showing the target state or transformation
|
||||
3. **How to verify** — a command or check that confirms correctness
|
||||
|
||||
### Good Example
|
||||
|
||||
```markdown
|
||||
1. **`src/auth/handler.ts:48`** — Add input length validation before token processing
|
||||
```typescript
|
||||
if (!token || token.trim().length === 0) {
|
||||
throw new ValidationError('Token must not be empty');
|
||||
}
|
||||
```
|
||||
**Verify:** `npm test -- --grep "empty token"` passes
|
||||
```
|
||||
|
||||
### Bad Example
|
||||
|
||||
```markdown
|
||||
1. **Auth module** — Fix the validation logic
|
||||
```
|
||||
|
||||
This is too vague. Which file? Which function? What does "fix" mean? The Maker will guess.
|
||||
|
||||
### Granularity Check
|
||||
|
||||
- If a single change item would take **>5 minutes**, split it into smaller items
|
||||
- If a non-trivial task has **<2 change items**, it is under-specified — the Creator missed something
|
||||
- Each item should touch **1-2 files** at most. Cross-cutting changes need separate items per file.
|
||||
|
||||
---
|
||||
|
||||
## Explorer Skip Conditions
|
||||
|
||||
Not every task needs Explorer research. Use this decision table:
|
||||
|
||||
| Condition | Skip Explorer? | Reason |
|
||||
|-----------|---------------|--------|
|
||||
| Task names specific files (1-2) and change is clear | **Yes** | Context is already known |
|
||||
| Bug fix with stack trace or error message | **Yes** | Root cause is locatable without research |
|
||||
| High confidence + small scope (single function/class) | **Yes** | Creator can mini-reflect instead |
|
||||
| Task contains "investigate", "research", "explore" | **No** | Explicit research request |
|
||||
| Task affects >3 files or unknown scope | **No** | Need dependency mapping |
|
||||
| Unfamiliar area of codebase (no recent commits by team) | **No** | Need pattern discovery |
|
||||
| Security-sensitive change (auth, crypto, input handling) | **No** | Need risk surface mapping |
|
||||
|
||||
When Explorer is skipped, Creator MUST include the **Mini-Reflect** section in its proposal to compensate for missing research context.
|
||||
|
||||
@@ -89,12 +89,12 @@ Only show if the user explicitly asks or if `progress.dag_on_complete: true` in
|
||||
When ArcheFlow activates at session start (via the `using-archeflow` skill), show ONE line:
|
||||
|
||||
```
|
||||
archeflow v0.3.0 · 24 skills · writing domain detected
|
||||
archeflow v0.7.0 · 24 skills · writing domain detected
|
||||
```
|
||||
|
||||
Or for code projects:
|
||||
```
|
||||
archeflow v0.3.0 · 24 skills · code domain
|
||||
archeflow v0.7.0 · 24 skills · code domain
|
||||
```
|
||||
|
||||
If ArcheFlow decides NOT to activate (simple task, single file):
|
||||
|
||||
146
skills/review/SKILL.md
Normal file
146
skills/review/SKILL.md
Normal file
@@ -0,0 +1,146 @@
|
||||
---
|
||||
name: review
|
||||
description: |
|
||||
Review-only mode. Run Guardian + optional reviewers on an existing diff or branch,
|
||||
without any Plan/Do orchestration. The highest-ROI mode for catching design-level bugs.
|
||||
<example>User: "af-review"</example>
|
||||
<example>User: "Review the last commit"</example>
|
||||
<example>User: "af-review --reviewers guardian,skeptic"</example>
|
||||
---
|
||||
|
||||
# ArcheFlow Review Mode
|
||||
|
||||
Run reviewers on existing code changes without orchestrating implementation.
|
||||
This is the most cost-effective mode — it delivers Guardian's error-path analysis
|
||||
without the Maker overhead.
|
||||
|
||||
## When to Use
|
||||
|
||||
- After you've implemented something and want a quality check
|
||||
- On a PR or branch before merging
|
||||
- When the sprint runner flags a task as DONE_WITH_CONCERNS
|
||||
- As a pre-commit quality gate for complex changes
|
||||
|
||||
## Invocation
|
||||
|
||||
```
|
||||
af-review # Review uncommitted changes
|
||||
af-review --branch feat/batch-api # Review branch diff against main
|
||||
af-review --commit HEAD~3..HEAD # Review last 3 commits
|
||||
af-review --reviewers guardian,skeptic,sage # Choose reviewers (default: guardian)
|
||||
af-review --evidence # Enable evidence-gating (stricter)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution
|
||||
|
||||
### Step 1: Get the Diff
|
||||
|
||||
Use `lib/archeflow-review.sh` to extract the diff and stats:
|
||||
|
||||
```bash
|
||||
# Uncommitted changes (default)
|
||||
DIFF=$(bash lib/archeflow-review.sh)
|
||||
|
||||
# Branch diff against main
|
||||
DIFF=$(bash lib/archeflow-review.sh --branch feat/batch-api)
|
||||
|
||||
# Commit range
|
||||
DIFF=$(bash lib/archeflow-review.sh --commit HEAD~3..HEAD)
|
||||
|
||||
# Override base branch
|
||||
DIFF=$(bash lib/archeflow-review.sh --branch feat/x --base develop)
|
||||
|
||||
# Stats only (no diff output)
|
||||
bash lib/archeflow-review.sh --stat-only
|
||||
```
|
||||
|
||||
The script prints the diff to stdout and stats to stderr. It exits 1 if the diff
|
||||
is empty (nothing to review). For large diffs (>500 lines), it warns on stderr.
|
||||
|
||||
### Step 2: Spawn Reviewers
|
||||
|
||||
Default: Guardian only (fastest, highest ROI).
|
||||
With `--reviewers`: spawn requested reviewers in parallel.
|
||||
|
||||
**Guardian** (always first):
|
||||
```
|
||||
Agent(
|
||||
description: "Guardian: review changes for <project>",
|
||||
prompt: "You are the GUARDIAN archetype — security and risk reviewer.
|
||||
|
||||
Review this diff for: security vulnerabilities, error handling gaps,
|
||||
data loss scenarios, race conditions, and breaking changes.
|
||||
|
||||
For each finding: cite specific code (file:line), state what you tested
|
||||
or observed, state what the correct behavior should be.
|
||||
|
||||
Diff:
|
||||
<DIFF>
|
||||
|
||||
STATUS: DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED",
|
||||
subagent_type: "code-reviewer"
|
||||
)
|
||||
```
|
||||
|
||||
**Skeptic** (if requested):
|
||||
- Focus: hidden assumptions, edge cases, scalability
|
||||
- Context: diff + any design docs
|
||||
|
||||
**Sage** (if requested):
|
||||
- Focus: code quality, test coverage, maintainability
|
||||
- Context: diff + surrounding code
|
||||
|
||||
**Trickster** (if requested):
|
||||
- Focus: adversarial inputs, failure injection, chaos testing
|
||||
- Context: diff only
|
||||
|
||||
### Step 3: Collect and Report
|
||||
|
||||
Parse each reviewer's output. Show findings:
|
||||
|
||||
```
|
||||
── af-review: <project> ───────────────────────
|
||||
Reviewers: guardian, skeptic
|
||||
|
||||
🛡️ Guardian: 2 findings (1 HIGH, 1 MEDIUM)
|
||||
[HIGH] Timeout marks variant as done — loses batch state (fanout.py:552)
|
||||
[MEDIUM] No JSON error handling on corrupted state (batch.py:310)
|
||||
|
||||
🤔 Skeptic: 1 finding (1 INFO)
|
||||
[INFO] hash() non-deterministic across processes (fanout.py:524)
|
||||
|
||||
Total: 3 findings (1 HIGH, 1 MEDIUM, 1 INFO)
|
||||
────────────────────────────────────────────────
|
||||
```
|
||||
|
||||
### Step 4: Evidence Gate (if --evidence)
|
||||
|
||||
When `--evidence` is active, apply the evidence requirements from `archeflow:check-phase`:
|
||||
- Scan findings for banned phrases ("might be", "could potentially", etc.)
|
||||
- Check for evidence markers (exit codes, line numbers, reproduction steps)
|
||||
- Downgrade unsupported findings to INFO
|
||||
|
||||
---
|
||||
|
||||
## Integration with Sprint Runner
|
||||
|
||||
The sprint runner can invoke `af-review` automatically:
|
||||
|
||||
| Sprint trigger | Review action |
|
||||
|----------------|--------------|
|
||||
| Task marked DONE_WITH_CONCERNS | Run Guardian on the agent's changes |
|
||||
| Task is L/XL estimate | Run Guardian + Skeptic after completion |
|
||||
| Task involves security keywords | Run Guardian automatically |
|
||||
| User requests | Run specified reviewers |
|
||||
|
||||
---
|
||||
|
||||
## Cost
|
||||
|
||||
Review-only is 60-80% cheaper than full PDCA:
|
||||
- No Explorer research (~30% of PDCA cost)
|
||||
- No Creator planning (~20% of PDCA cost)
|
||||
- No Maker implementation (already done)
|
||||
- Only reviewer token costs remain
|
||||
@@ -63,6 +63,165 @@ After emitting `run.start`, record `SEQ_RUN_START=1`.
|
||||
|
||||
If `--start-from` is specified, verify that the required prior artifacts exist in `.archeflow/artifacts/${RUN_ID}/` before skipping phases. If missing, abort with an error.
|
||||
|
||||
#### 0a. Strategy Resolution
|
||||
|
||||
Determine the execution strategy before proceeding. Strategy controls the overall flow shape (cyclic vs linear).
|
||||
|
||||
```bash
|
||||
# Read strategy from config or CLI flag
|
||||
STRATEGY=$(grep '^strategy:' "$CONFIG" 2>/dev/null | sed 's/strategy:\s*//' | tr -d '"' | head -1)
|
||||
STRATEGY="${STRATEGY:-auto}"
|
||||
|
||||
# CLI override: --strategy pdca|pipeline
|
||||
# (parsed from invocation args, overrides config)
|
||||
|
||||
# Auto-select logic
|
||||
if [[ "$STRATEGY" == "auto" ]]; then
|
||||
TASK_LOWER=$(echo "$TASK" | tr '[:upper:]' '[:lower:]')
|
||||
if echo "$TASK_LOWER" | grep -qE '(fix|bug|patch|hotfix)'; then
|
||||
STRATEGY="pipeline"
|
||||
elif echo "$TASK_LOWER" | grep -qE '(refactor|redesign|review)'; then
|
||||
STRATEGY="pdca"
|
||||
elif [[ "$WORKFLOW" == "fast" ]]; then
|
||||
STRATEGY="pipeline"
|
||||
elif [[ "$WORKFLOW" == "thorough" ]]; then
|
||||
STRATEGY="pdca"
|
||||
else
|
||||
STRATEGY="pdca"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "Strategy: $STRATEGY"
|
||||
```
|
||||
|
||||
**Strategy dispatch:** If `STRATEGY=pdca`, execute Steps 1-5 below (existing PDCA flow). If `STRATEGY=pipeline`, skip to the "Pipeline Strategy Execution" section at the end of this skill.
|
||||
|
||||
#### 0b. Lib Script Validation
|
||||
|
||||
Verify that all required library scripts exist and are executable before proceeding. Fail fast if any dependency is missing.
|
||||
|
||||
```bash
|
||||
# Required lib scripts
|
||||
REQUIRED_LIBS=(
|
||||
"archeflow-event.sh"
|
||||
"archeflow-memory.sh"
|
||||
"archeflow-git.sh"
|
||||
"archeflow-rollback.sh"
|
||||
"archeflow-report.sh"
|
||||
"archeflow-progress.sh"
|
||||
)
|
||||
|
||||
MISSING=()
|
||||
for lib in "${REQUIRED_LIBS[@]}"; do
|
||||
if [[ ! -x "./lib/$lib" ]]; then
|
||||
MISSING+=("$lib")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#MISSING[@]} -gt 0 ]]; then
|
||||
echo "ERROR: Missing or non-executable lib scripts:" >&2
|
||||
for m in "${MISSING[@]}"; do
|
||||
echo " - lib/$m" >&2
|
||||
done
|
||||
echo "Ensure ArcheFlow is installed correctly. See README for setup." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check jq availability (required for event processing and memory)
|
||||
if ! command -v jq &>/dev/null; then
|
||||
echo "ERROR: jq is required but not found in PATH." >&2
|
||||
echo "Install with: apt install jq / brew install jq" >&2
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
#### 0c. Memory Injection
|
||||
|
||||
Load cross-run memory lessons and inject into agent prompts. Use `--audit` to track which lessons were injected for this run:
|
||||
|
||||
```bash
|
||||
# Load cross-run memory for this domain (with audit trail)
|
||||
MEMORY_LESSONS=$(./lib/archeflow-memory.sh inject "$DOMAIN" "" --audit "$RUN_ID")
|
||||
|
||||
# Inject into Explorer/Creator prompts if non-empty
|
||||
if [[ -n "$MEMORY_LESSONS" ]]; then
|
||||
EXPLORER_PROMPT="${EXPLORER_PROMPT}
|
||||
|
||||
${MEMORY_LESSONS}"
|
||||
CREATOR_PROMPT="${CREATOR_PROMPT}
|
||||
|
||||
${MEMORY_LESSONS}"
|
||||
fi
|
||||
```
|
||||
|
||||
#### 0d. Model Configuration
|
||||
|
||||
Read model assignment from `.archeflow/config.yaml` and resolve the model for each archetype based on the current workflow. Per-workflow overrides take precedence over per-archetype overrides, which take precedence over the default.
|
||||
|
||||
```bash
|
||||
CONFIG=".archeflow/config.yaml"
|
||||
|
||||
# Read default model
|
||||
DEFAULT_MODEL=$(grep -A1 '^models:' "$CONFIG" 2>/dev/null | grep 'default:' | sed 's/.*default:\s*//' | tr -d '"' | head -1)
|
||||
DEFAULT_MODEL="${DEFAULT_MODEL:-sonnet}"
|
||||
|
||||
# Resolve model for a given archetype and workflow
|
||||
# Usage: resolve_model <archetype> <workflow>
|
||||
resolve_model() {
|
||||
local arch="$1" wf="$2" model=""
|
||||
|
||||
# Check per-workflow per-archetype override
|
||||
model=$(sed -n "/workflows:/,\$p" "$CONFIG" 2>/dev/null \
|
||||
| sed -n "/${wf}:/,/^ [a-z]/p" \
|
||||
| grep -A1 "archetypes:" | grep "${arch}:" \
|
||||
| sed "s/.*${arch}:\s*//" | tr -d '"' | head -1)
|
||||
[[ -n "$model" ]] && echo "$model" && return
|
||||
|
||||
# Check per-workflow default
|
||||
model=$(sed -n "/workflows:/,\$p" "$CONFIG" 2>/dev/null \
|
||||
| sed -n "/${wf}:/,/^ [a-z]/p" \
|
||||
| grep 'default:' | sed 's/.*default:\s*//' | tr -d '"' | head -1)
|
||||
[[ -n "$model" ]] && echo "$model" && return
|
||||
|
||||
# Check per-archetype override
|
||||
model=$(sed -n "/^ archetypes:/,/^ [a-z]/p" "$CONFIG" 2>/dev/null \
|
||||
| grep "${arch}:" | sed "s/.*${arch}:\s*//" | tr -d '"' | head -1)
|
||||
[[ -n "$model" ]] && echo "$model" && return
|
||||
|
||||
# Fall back to default
|
||||
echo "$DEFAULT_MODEL"
|
||||
}
|
||||
|
||||
# Example: EXPLORER_MODEL=$(resolve_model explorer "$WORKFLOW")
|
||||
```
|
||||
|
||||
Use `resolve_model` when spawning each agent to pass the correct model. The resolved model can be included in the `agent.start` event data for cost tracking.
|
||||
|
||||
---
|
||||
|
||||
### Status Token Protocol
|
||||
|
||||
Every agent ends its output with a `STATUS:` line. The orchestrator parses this to decide the next action.
|
||||
|
||||
**Parsing:**
|
||||
|
||||
```bash
|
||||
STATUS=$(tail -20 "$AGENT_OUTPUT" | grep -oE 'STATUS: (DONE|DONE_WITH_CONCERNS|NEEDS_CONTEXT|BLOCKED)' | head -1)
|
||||
STATUS="${STATUS#STATUS: }"
|
||||
if [[ -z "$STATUS" ]]; then STATUS="DONE"; fi
|
||||
```
|
||||
|
||||
**Status to action mapping:**
|
||||
|
||||
| Status | Action |
|
||||
|--------|--------|
|
||||
| `DONE` | Proceed to next phase or agent |
|
||||
| `DONE_WITH_CONCERNS` | Log concerns in event data, proceed |
|
||||
| `NEEDS_CONTEXT` | Pause run, request missing information from user |
|
||||
| `BLOCKED` | Abort phase, report blocker to user |
|
||||
|
||||
Include the parsed status in the `agent.complete` event data: `"status":"<STATUS>"`.
|
||||
|
||||
---
|
||||
|
||||
### 1. Plan Phase
|
||||
@@ -126,17 +285,83 @@ After Creator returns:
|
||||
|
||||
#### 1c. Confidence Gate (Adaptation Rule A3)
|
||||
|
||||
Read Creator's confidence scores from `plan-creator.md`. Apply A3 per `archeflow:orchestration`:
|
||||
- Task understanding < 0.5 → **Pause**, ask user
|
||||
- Solution completeness < 0.5 → **Upgrade** to standard, spawn Explorer
|
||||
- Risk coverage < 0.5 → **Spawn mini-Explorer** for risky area (parallel, 5 min max)
|
||||
**Parsing instructions:**
|
||||
|
||||
Read `plan-creator.md`, locate the `### Confidence` table. Extract scores for each axis as floats:
|
||||
|
||||
```bash
|
||||
CONF_FILE=".archeflow/artifacts/${RUN_ID}/plan-creator.md"
|
||||
|
||||
# Extract confidence scores (expects format: "| Task understanding | 0.8 |")
|
||||
TASK_UNDERSTANDING=$(grep -i "task understanding" "$CONF_FILE" | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
SOLUTION_COMPLETENESS=$(grep -i "solution completeness" "$CONF_FILE" | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
RISK_COVERAGE=$(grep -i "risk coverage" "$CONF_FILE" | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
||||
|
||||
# Fallback: if unparseable, emit warning and default to 0.0 (triggers gate, not bypasses it)
|
||||
if [[ -z "$TASK_UNDERSTANDING" || -z "$SOLUTION_COMPLETENESS" || -z "$RISK_COVERAGE" ]]; then
|
||||
echo "WARNING: Could not parse confidence scores from plan-creator.md" >&2
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision plan "" \
|
||||
'{"what":"confidence_parse_failure","chosen":"warn","rationale":"one or more scores unparseable"}' "$SEQ_CREATOR_COMPLETE"
|
||||
fi
|
||||
TASK_UNDERSTANDING="${TASK_UNDERSTANDING:-0.0}"
|
||||
SOLUTION_COMPLETENESS="${SOLUTION_COMPLETENESS:-0.0}"
|
||||
RISK_COVERAGE="${RISK_COVERAGE:-0.0}"
|
||||
```
|
||||
|
||||
**Pause branch** (Task understanding < 0.5):
|
||||
|
||||
The Creator does not sufficiently understand the task. Do not spawn Maker.
|
||||
|
||||
1. Emit decision event with `"chosen":"pause"`
|
||||
2. Display message to user: "Creator rated task understanding at <score>. Clarification needed before proceeding."
|
||||
3. Block until the user provides clarification
|
||||
4. Re-run Creator with the clarification appended to the task description
|
||||
|
||||
If A3 triggers, emit a `decision` event:
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision plan "" \
|
||||
'{"what":"confidence_gate","chosen":"<action>","rationale":"<axis> scored <score>"}' "$SEQ_CREATOR_COMPLETE"
|
||||
'{"what":"confidence_gate","chosen":"pause","rationale":"task_understanding scored '"$TASK_UNDERSTANDING"'"}' "$SEQ_CREATOR_COMPLETE"
|
||||
```
|
||||
|
||||
**Upgrade branch** (Solution completeness < 0.5):
|
||||
|
||||
The Creator's proposal is incomplete — more research is needed.
|
||||
|
||||
1. If fast workflow: upgrade to standard, spawn Explorer, then re-run Creator with Explorer output
|
||||
2. If already standard/thorough: re-run Explorer with a focused prompt targeting the incomplete areas
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision plan "" \
|
||||
'{"what":"confidence_gate","chosen":"upgrade","rationale":"solution_completeness scored '"$SOLUTION_COMPLETENESS"'"}' "$SEQ_CREATOR_COMPLETE"
|
||||
|
||||
# If fast → standard upgrade:
|
||||
WORKFLOW="standard"
|
||||
# Spawn Explorer, then re-run Creator with Explorer findings
|
||||
```
|
||||
|
||||
**Mini-Explorer branch** (Risk coverage < 0.5):
|
||||
|
||||
The Creator identified risks but lacks confidence in their assessment. Spawn a focused Explorer to investigate.
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "Mini-Explorer: investigate risk area for <task>",
|
||||
prompt: "You are the EXPLORER archetype. The Creator rated risk coverage at <score>.
|
||||
Identified risks: <risks from plan-creator.md>
|
||||
Research ONLY the risky areas. Answer: Is the risk real? What mitigations exist? What tests/guards would help?
|
||||
Limit: focused output only.",
|
||||
subagent_type: "Explore"
|
||||
)
|
||||
```
|
||||
|
||||
Save output to `.archeflow/artifacts/${RUN_ID}/plan-mini-explorer.md`. The Maker receives both `plan-creator.md` and `plan-mini-explorer.md` as context.
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision plan "" \
|
||||
'{"what":"confidence_gate","chosen":"mini_explorer","rationale":"risk_coverage scored '"$RISK_COVERAGE"'"}' "$SEQ_CREATOR_COMPLETE"
|
||||
```
|
||||
|
||||
**Note:** The mini-Explorer runs in parallel with Do phase preparation (5 min max). The Maker can proceed once both `plan-creator.md` and `plan-mini-explorer.md` are available.
|
||||
|
||||
#### 1d. Phase Transition: Plan to Do
|
||||
|
||||
```bash
|
||||
@@ -184,6 +409,28 @@ After Maker returns:
|
||||
|
||||
**Critical:** Verify the Maker committed its changes before proceeding. If uncommitted changes exist, instruct the Maker to commit.
|
||||
|
||||
#### 2a-ii. Test-First Validation
|
||||
|
||||
After Maker completes, check `do-maker-files.txt` for test files:
|
||||
```bash
|
||||
TEST_FILES=$(grep -iE '([/_.-](test|spec)[/_.-]|\.(test|spec)\.|_(test|spec)\.|/tests?/|/__tests__/|/specs?/)' ".archeflow/artifacts/${RUN_ID}/do-maker-files.txt" || true)
|
||||
```
|
||||
|
||||
If `TEST_FILES` is empty and domain is not `writing`:
|
||||
1. Check if `plan-creator.md` contains a `### Test Strategy` section
|
||||
2. If yes: re-run Maker with targeted test instruction (one retry within Do phase)
|
||||
3. If no test strategy specified: emit WARNING event and proceed
|
||||
|
||||
```bash
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision do "" \
|
||||
'{"what":"test_first_gate","chosen":"<pass|warn|retry>","rationale":"<reason>"}' "$SEQ_MAKER_COMPLETE"
|
||||
```
|
||||
|
||||
The re-run prompt for the retry case:
|
||||
> "The proposal specified these test cases: <test strategy section>. No test files were found in your changes. Add the specified tests before finishing."
|
||||
|
||||
This is one retry within the Do phase, not a full PDCA cycle. If the retry also produces no tests, emit WARNING and proceed to Check.
|
||||
|
||||
#### 2b. Phase Transition: Do to Check
|
||||
|
||||
```bash
|
||||
@@ -263,6 +510,61 @@ Spawn all applicable reviewers in parallel (multiple Agent calls in one message)
|
||||
|
||||
After each returns, emit `review.verdict` and save artifact.
|
||||
|
||||
#### 3c-ii. Evidence Validation
|
||||
|
||||
After all reviewers complete, scan CRITICAL/WARNING findings for two conditions:
|
||||
1. **Banned phrases** — hedged language without evidence
|
||||
2. **Missing evidence** — no command output, code citation, or reproduction steps
|
||||
|
||||
Downgrade unsupported findings to INFO before proceeding to Act.
|
||||
|
||||
```bash
|
||||
BANNED_PHRASES=("might be" "could potentially" "appears to" "seems like" "may not")
|
||||
EVIDENCE_MARKERS=("exit" "output" "line [0-9]" ":[0-9]" "returned" "FAIL" "PASS" "assert")
|
||||
|
||||
for artifact in .archeflow/artifacts/${RUN_ID}/check-*.md; do
|
||||
REVIEWER=$(basename "$artifact" .md | sed 's/check-//')
|
||||
|
||||
# Read findings table rows (CRITICAL and WARNING only)
|
||||
grep -E '\| (CRITICAL|WARNING) \|' "$artifact" 2>/dev/null | while IFS= read -r line; do
|
||||
SEVERITY=$(echo "$line" | grep -oE '(CRITICAL|WARNING)' | head -1)
|
||||
DOWNGRADE_REASON=""
|
||||
|
||||
# Check 1: banned phrases
|
||||
for phrase in "${BANNED_PHRASES[@]}"; do
|
||||
if echo "$line" | grep -qi "$phrase"; then
|
||||
DOWNGRADE_REASON="banned phrase: $phrase"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Check 2: no evidence markers (only if not already flagged)
|
||||
if [[ -z "$DOWNGRADE_REASON" ]]; then
|
||||
HAS_EVIDENCE=false
|
||||
for marker in "${EVIDENCE_MARKERS[@]}"; do
|
||||
if echo "$line" | grep -qiE "$marker"; then
|
||||
HAS_EVIDENCE=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ "$HAS_EVIDENCE" == "false" ]]; then
|
||||
DOWNGRADE_REASON="no evidence cited"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -n "$DOWNGRADE_REASON" ]]; then
|
||||
echo "EVIDENCE DOWNGRADE: $REVIEWER $SEVERITY finding — $DOWNGRADE_REASON"
|
||||
./lib/archeflow-event.sh "$RUN_ID" decision check "" \
|
||||
'{"what":"evidence_downgrade","from":"'"$SEVERITY"'","to":"INFO","reviewer":"'"$REVIEWER"'","reason":"'"$DOWNGRADE_REASON"'"}'
|
||||
# Note: the orchestrator tracks downgraded findings separately —
|
||||
# do not modify the artifact file (avoids sed corruption on table rows)
|
||||
fi
|
||||
done
|
||||
done
|
||||
```
|
||||
|
||||
**Important:** Downgraded findings are tracked in events, NOT by modifying artifact files. The Act phase reads the decision events to know which findings were downgraded and excludes them from CRITICAL tallies.
|
||||
|
||||
#### 3d. Phase Transition: Check to Act
|
||||
|
||||
Collect all verdict seq numbers for the parent array.
|
||||
@@ -301,12 +603,46 @@ If all reviewers approved (and completion criteria met, if defined):
|
||||
'{"cycle":<N>,"max_cycles":<M>,"exit_condition":"all_approved","met":true,"next_action":"complete"}' "$SEQ_CHECK_TO_ACT"
|
||||
```
|
||||
|
||||
2. Run pre-merge hooks (check `.archeflow/hooks.yaml`)
|
||||
3. Merge Maker's worktree branch: `git merge --no-ff <branch>`
|
||||
4. Run post-merge hooks + test suite
|
||||
- Tests pass → continue
|
||||
- Tests fail → auto-revert, cycle back with "integration test failure" feedback
|
||||
5. Clean up worktree
|
||||
2. **Pre-merge hook check:**
|
||||
```bash
|
||||
# Read hooks config if it exists
|
||||
if [[ -f ".archeflow/hooks.yaml" ]]; then
|
||||
PRE_MERGE_HOOKS=$(grep -A5 "pre-merge:" .archeflow/hooks.yaml || true)
|
||||
if [[ -n "$PRE_MERGE_HOOKS" ]]; then
|
||||
echo "Running pre-merge hooks..."
|
||||
# Execute hooks; abort merge if fail_action: abort
|
||||
# Hook execution is project-specific — see .archeflow/hooks.yaml
|
||||
fi
|
||||
fi
|
||||
```
|
||||
|
||||
3. **Merge the Maker's worktree branch:**
|
||||
```bash
|
||||
./lib/archeflow-git.sh merge "$RUN_ID" --no-ff
|
||||
```
|
||||
|
||||
4. **Post-merge test validation** (using the auto-rollback script):
|
||||
```bash
|
||||
# Run tests and auto-revert if they fail
|
||||
if ! ./lib/archeflow-rollback.sh "$RUN_ID"; then
|
||||
# Rollback script already reverted HEAD and emitted decision event
|
||||
# If cycles remain, cycle back with integration test failure feedback
|
||||
if [[ "$CYCLE" -lt "$MAX_CYCLES" ]]; then
|
||||
echo "Cycling back with integration test failure feedback..."
|
||||
# Build act-feedback.md with "integration test failure on main" as top finding
|
||||
# Continue to step 4d (Issues Found)
|
||||
else
|
||||
echo "Max cycles reached. Reporting failure to user."
|
||||
# Continue to step 4e (Max Cycles Reached)
|
||||
fi
|
||||
fi
|
||||
```
|
||||
|
||||
5. **Clean up worktree:**
|
||||
```bash
|
||||
./lib/archeflow-git.sh cleanup "$RUN_ID"
|
||||
```
|
||||
|
||||
6. Proceed to Completion (step 5)
|
||||
|
||||
#### 4d. Branch: Issues Found (cycles remaining)
|
||||
@@ -360,6 +696,14 @@ If `CYCLE >= MAX_CYCLES` and issues remain:
|
||||
./lib/archeflow-event.sh "$RUN_ID" run.complete act "" \
|
||||
'{"status":"completed","cycles":<N>,"agents_total":<count>,"fixes_total":<count>,"shadows":0,"artifacts":[<list>]}'
|
||||
|
||||
# Check for regressions from previously fixed findings
|
||||
if ./lib/archeflow-memory.sh regression-check ".archeflow/events/${RUN_ID}.jsonl"; then
|
||||
echo "No regressions detected."
|
||||
else
|
||||
echo "WARNING: Regressions detected — previously fixed findings have reappeared."
|
||||
echo "Review the regression output above and consider addressing them."
|
||||
fi
|
||||
|
||||
# Generate report
|
||||
./lib/archeflow-report.sh .archeflow/events/${RUN_ID}.jsonl
|
||||
|
||||
@@ -458,3 +802,89 @@ Run ID: <run_id> | Workflow: <standard> | Cycle: 1/<max>
|
||||
Artifacts: .archeflow/artifacts/<run_id>/
|
||||
Report: .archeflow/events/<run_id>.jsonl
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Pipeline Strategy Execution
|
||||
|
||||
When `STRATEGY=pipeline`, execute this linear flow instead of the PDCA cycle above.
|
||||
|
||||
### Pipeline Phases
|
||||
|
||||
```
|
||||
Plan -> Implement -> Spec-Review -> Quality-Review -> Verify
|
||||
```
|
||||
|
||||
No cycle-back. Each phase runs once.
|
||||
|
||||
### 1. Plan
|
||||
|
||||
Spawn Creator only (no Explorer). Use fast-workflow Creator prompt with Mini-Reflect.
|
||||
|
||||
Save output to `.archeflow/artifacts/${RUN_ID}/plan-creator.md`.
|
||||
|
||||
### 2. Implement
|
||||
|
||||
Spawn Maker in isolated worktree with Creator's proposal.
|
||||
|
||||
Save output to `.archeflow/artifacts/${RUN_ID}/do-maker.md`.
|
||||
|
||||
### 3. Spec-Review
|
||||
|
||||
Run Guardian and Skeptic **sequentially** (Guardian first, then Skeptic only if Guardian has findings).
|
||||
|
||||
- Guardian receives: Maker's git diff + proposal risk section
|
||||
- Skeptic receives: Creator's proposal (assumptions focus)
|
||||
|
||||
Save to `check-guardian.md` and `check-skeptic.md`.
|
||||
|
||||
### 4. Quality-Review
|
||||
|
||||
Spawn Sage with proposal + diff + implementation summary.
|
||||
|
||||
Save to `check-sage.md`.
|
||||
|
||||
### 5. Verify
|
||||
|
||||
Run the project's test suite. If tests pass and no CRITICAL findings exist:
|
||||
|
||||
1. Merge the Maker's branch
|
||||
2. Emit `run.complete`
|
||||
|
||||
If CRITICAL findings exist:
|
||||
|
||||
1. **Do NOT merge yet** — the branch remains separate
|
||||
2. Spawn Maker for a **single targeted fix** — provide only the CRITICAL findings as context
|
||||
3. Re-run the reviewer(s) that raised the CRITICAL finding(s) on just the fixed files
|
||||
4. Re-run test suite
|
||||
5. If tests pass and re-review approves: merge
|
||||
6. If still failing after this one fix attempt: **abort** — do NOT merge, report to user with the branch name for manual resolution
|
||||
|
||||
```bash
|
||||
# Pipeline verify: explicit merge guard
|
||||
if [[ "$VERIFY_PASS" == "true" ]]; then
|
||||
./lib/archeflow-git.sh merge "$RUN_ID" --no-ff
|
||||
./lib/archeflow-rollback.sh "$RUN_ID" # post-merge test validation
|
||||
else
|
||||
echo "Pipeline aborted: CRITICAL findings not resolved after 1 fix attempt."
|
||||
echo "Branch: archeflow/$RUN_ID (not merged)"
|
||||
# Emit run.complete with status: aborted
|
||||
fi
|
||||
```
|
||||
|
||||
WARNINGs are logged in the run event but do not block the merge.
|
||||
|
||||
### Pipeline Progress Display
|
||||
|
||||
```
|
||||
━━━ ArcheFlow Pipeline: <task> ━━━━━━━━━━━━━━━━
|
||||
Run ID: <run_id> | Strategy: pipeline
|
||||
|
||||
[Plan] Creator designing... -> done (20s)
|
||||
[Implement] Maker building... -> done (60s, 3 files)
|
||||
[Spec] Guardian reviewing... -> APPROVED
|
||||
[Quality] Sage reviewing... -> APPROVED (1 WARNING)
|
||||
[Verify] Tests passing... -> merged to main
|
||||
|
||||
━━━ Complete: 2m 15s ━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
```
|
||||
|
||||
302
skills/sprint/SKILL.md
Normal file
302
skills/sprint/SKILL.md
Normal file
@@ -0,0 +1,302 @@
|
||||
---
|
||||
name: sprint
|
||||
description: |
|
||||
Workspace sprint runner. Reads queue.json, spawns parallel agent teams across projects,
|
||||
manages lifecycle (commit, push, next task), tracks progress. The main operational mode
|
||||
for ArcheFlow in multi-project workspaces.
|
||||
<example>User: "af-sprint"</example>
|
||||
<example>User: "Run the sprint"</example>
|
||||
<example>User: "af-sprint --slots 5 --dry-run"</example>
|
||||
---
|
||||
|
||||
# Workspace Sprint Runner
|
||||
|
||||
Read the task queue, spawn parallel agents across projects, collect results, commit+push,
|
||||
spawn next batch. Repeat until the queue is drained or budget is exhausted.
|
||||
|
||||
## When to Use
|
||||
|
||||
This is the **primary operational mode** for ArcheFlow in multi-project workspaces.
|
||||
Use it when the user says "run the sprint", "work the queue", "go autonomous", or
|
||||
invokes `af-sprint`.
|
||||
|
||||
Do NOT use `archeflow:run` for individual tasks within a sprint — the sprint runner
|
||||
handles task dispatch internally, using `archeflow:run` only when a task warrants
|
||||
full PDCA orchestration.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- `docs/orchestra/queue.json` — task queue (managed by `./scripts/ws`)
|
||||
- `./scripts/ws` — workspace CLI for queue operations
|
||||
- Each project is a separate git repo under the workspace root
|
||||
|
||||
## Invocation
|
||||
|
||||
```
|
||||
af-sprint # Run sprint with defaults (4 slots, AUTONOM mode)
|
||||
af-sprint --slots 5 # Max 5 parallel agents
|
||||
af-sprint --dry-run # Show what would run, don't execute
|
||||
af-sprint --priority P0,P1 # Only process P0 and P1 items
|
||||
af-sprint --project writing.colette # Only process items for this project
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Execution Protocol
|
||||
|
||||
### Step 0: Orient
|
||||
|
||||
```bash
|
||||
# Load queue and workspace state
|
||||
QUEUE=$(cat docs/orchestra/queue.json)
|
||||
MODE=$(echo "$QUEUE" | jq -r '.mode')
|
||||
```
|
||||
|
||||
Check mode:
|
||||
- `AUTONOM` → proceed without asking
|
||||
- `ATTENDED` → show plan, wait for user approval before each batch
|
||||
- `PAUSED` → report status only, do not start tasks
|
||||
|
||||
Show one-line status:
|
||||
```
|
||||
sprint: AUTONOM · 7 pending (1×P0, 1×P2, 5×P3) · 4 slots
|
||||
```
|
||||
|
||||
### Step 1: Select Batch
|
||||
|
||||
Pick tasks for the next batch. Rules:
|
||||
|
||||
1. **Priority cascade**: P0 first, then P1, then P2. Never start P3 unless user explicitly includes it.
|
||||
2. **Dependency check**: Skip tasks whose `depends_on` items aren't all `completed`.
|
||||
3. **One agent per project**: Never run two tasks on the same project simultaneously.
|
||||
4. **Cost-aware concurrency**:
|
||||
- Estimate task cost from `estimate` field: S=cheap, M=moderate, L=expensive, XL=very expensive
|
||||
- **Expensive tasks** (L, XL): max 2 concurrent
|
||||
- **Cheap tasks** (S, M): fill remaining slots
|
||||
- Target mix: 1-2 expensive + 2-3 cheap = 4-5 total
|
||||
5. **Slot limit**: Never exceed `--slots` (default 4).
|
||||
|
||||
```python
|
||||
# Pseudocode for batch selection
|
||||
batch = []
|
||||
used_projects = set()
|
||||
expensive_count = 0
|
||||
|
||||
for priority in ["P0", "P1", "P2"]:
|
||||
for task in queue_items(priority, status="pending"):
|
||||
if len(batch) >= MAX_SLOTS:
|
||||
break
|
||||
if task.project in used_projects:
|
||||
continue # One agent per project
|
||||
if not deps_satisfied(task):
|
||||
continue
|
||||
if task.estimate in ("L", "XL"):
|
||||
if expensive_count >= 2:
|
||||
continue
|
||||
expensive_count += 1
|
||||
batch.append(task)
|
||||
used_projects.add(task.project)
|
||||
```
|
||||
|
||||
### Step 2: Assess and Dispatch
|
||||
|
||||
For each task in the batch, decide the execution strategy:
|
||||
|
||||
| Signal | Strategy | What happens |
|
||||
|--------|----------|-------------|
|
||||
| Estimate S, clear scope | **Direct** | Spawn Agent() with task description, no orchestration |
|
||||
| Estimate M, multi-file | **Direct+** | Spawn Agent() with task + "read code first, run tests after" |
|
||||
| Estimate L/XL, code | **Feature-dev style** | Agent explores → implements → self-reviews (see below) |
|
||||
| Estimate L/XL, writing | **PDCA** | Use af-run with writing domain archetypes |
|
||||
| Task contains "validate", "test", "lint", "check" | **Direct** | Cheap analytical task, no orchestration |
|
||||
| Task contains "review", "audit", "security" | **Review** | Spawn Guardian + relevant reviewers only |
|
||||
|
||||
### L/XL Code Task Template (feature-dev style)
|
||||
|
||||
For complex code tasks, give the agent a structured process instead of PDCA:
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "<project>: <task-short>",
|
||||
prompt: "You are working on project <project> at <path>.
|
||||
Task: <task description>
|
||||
|
||||
Follow this process:
|
||||
1. EXPLORE: Read CLAUDE.md, docs/status.md, and the relevant source files.
|
||||
Understand existing patterns before writing anything.
|
||||
2. PLAN: Identify 2-3 files to change. Write a brief plan (what, where, why).
|
||||
If ambiguous, list your assumptions.
|
||||
3. IMPLEMENT: Make the changes. Follow existing code patterns strictly.
|
||||
4. TEST: Run the project's test suite. Fix any failures.
|
||||
5. SELF-REVIEW: Before committing, re-read your diff. Check:
|
||||
- Error handling: what happens when this fails?
|
||||
- Protocol compliance: am I using the right function signatures?
|
||||
- Tests: did I test the important paths?
|
||||
6. COMMIT + PUSH: Conventional commits, signed, pushed.
|
||||
|
||||
<standard rules>
|
||||
|
||||
STATUS: DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED"
|
||||
)
|
||||
```
|
||||
|
||||
This gives the agent feature-dev's structured exploration without the multi-agent overhead.
|
||||
For writing/research L/XL tasks, use af-run instead — archetypes add value where linters don't exist.
|
||||
|
||||
**Agent spawn template:**
|
||||
|
||||
For each task in the batch, spawn an Agent in the SAME message (parallel dispatch):
|
||||
|
||||
```
|
||||
Agent(
|
||||
description: "<project>: <task-short>",
|
||||
prompt: "You are working on project <project> at <path>.
|
||||
Task: <task description>
|
||||
<notes if any>
|
||||
|
||||
Rules:
|
||||
- Read the project's CLAUDE.md first
|
||||
- Commit with: git -c user.signingkey=/home/c/.ssh/id_ed25519_dev.pub commit
|
||||
- NO Co-Authored-By trailers
|
||||
- Conventional commits
|
||||
- Push when done: GIT_SSH_COMMAND='ssh -i /home/c/.ssh/id_ed25519_dev -o IdentitiesOnly=yes' git push origin main
|
||||
- Run tests if the project has them
|
||||
- Report: what you did, what changed, any blockers
|
||||
|
||||
STATUS: DONE | DONE_WITH_CONCERNS | NEEDS_CONTEXT | BLOCKED",
|
||||
subagent_type: "general-purpose",
|
||||
isolation: "worktree" # Only for L/XL tasks; S/M tasks run directly
|
||||
)
|
||||
```
|
||||
|
||||
**CRITICAL: Spawn all batch agents in a SINGLE message.** This enables parallel execution.
|
||||
Do not spawn them sequentially.
|
||||
|
||||
### Step 3: Mark Running
|
||||
|
||||
After spawning, update the queue:
|
||||
|
||||
```bash
|
||||
# For each spawned task
|
||||
./scripts/ws start <task-id> # or manually update queue.json status to "running"
|
||||
```
|
||||
|
||||
If `./scripts/ws start` doesn't exist, update queue.json directly:
|
||||
```python
|
||||
task["status"] = "running"
|
||||
# Write back to docs/orchestra/queue.json
|
||||
```
|
||||
|
||||
### Step 4: Collect Results
|
||||
|
||||
As agents complete, process their results:
|
||||
|
||||
1. **Parse status token** from agent output (last line: `STATUS: DONE|...`)
|
||||
2. **Based on status**:
|
||||
- `DONE` → mark completed, note result
|
||||
- `DONE_WITH_CONCERNS` → mark completed, log concerns for user review
|
||||
- `NEEDS_CONTEXT` → mark pending, add concern to notes, skip for now
|
||||
- `BLOCKED` → mark failed, add blocker to notes
|
||||
3. **Update queue**:
|
||||
```bash
|
||||
./scripts/ws done <task-id> -r "<summary of what was done>"
|
||||
# or
|
||||
./scripts/ws fail <task-id> -r "<reason>"
|
||||
```
|
||||
|
||||
### Step 5: Report and Loop
|
||||
|
||||
After batch completes, show sprint status:
|
||||
|
||||
```
|
||||
── Sprint Batch 1 ──────────────────────────────
|
||||
✓ writing.colette fanout run done (45s)
|
||||
✓ book.3sets validation done (30s)
|
||||
△ book.sos meta-book concept needs_context (missing outline)
|
||||
✓ tool.archeflow af-review mode done (60s)
|
||||
|
||||
Queue: 3 completed, 1 blocked, 3 remaining
|
||||
Next batch: 2 items ready
|
||||
────────────────────────────────────────────────
|
||||
```
|
||||
|
||||
Then **immediately select and dispatch the next batch** (Step 1). Don't wait for user input in AUTONOM mode.
|
||||
|
||||
### Step 6: Sprint Complete
|
||||
|
||||
When no more tasks are schedulable (all done, blocked, or P3-only):
|
||||
|
||||
1. Update `docs/control-center.md` Handoff section
|
||||
2. Run `./scripts/ws log --summary "<sprint summary>"` if available
|
||||
3. Show final sprint report:
|
||||
|
||||
```
|
||||
── Sprint Complete ─────────────────────────────
|
||||
Duration: 12 min
|
||||
Tasks: 5 completed, 1 blocked, 1 remaining (P3)
|
||||
Projects touched: 4
|
||||
Commits: 7
|
||||
────────────────────────────────────────────────
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Mode Behavior
|
||||
|
||||
### AUTONOM
|
||||
- Dispatch immediately, no user confirmation
|
||||
- Commit + push after each agent completes
|
||||
- Only pause for BLOCKED tasks or budget exhaustion
|
||||
- Report between batches (one-line status)
|
||||
|
||||
### ATTENDED
|
||||
- Show the selected batch before dispatching
|
||||
- Wait for user to approve: "Proceed with this batch? [y/n]"
|
||||
- After each batch, show results and ask: "Continue to next batch? [y/n/edit]"
|
||||
- "edit" lets the user reprioritize before next batch
|
||||
|
||||
### PAUSED
|
||||
- Show queue status only
|
||||
- Do not dispatch any agents
|
||||
- Useful for reviewing state between sessions
|
||||
|
||||
---
|
||||
|
||||
## When to Use ArcheFlow Orchestration Within Sprint
|
||||
|
||||
Most sprint tasks should be **direct agent dispatch** (no PDCA/pipeline overhead).
|
||||
Only escalate to full orchestration when:
|
||||
|
||||
| Signal | Action |
|
||||
|--------|--------|
|
||||
| Task is S/M, clear scope, single project | Direct dispatch |
|
||||
| Task is L/XL | Use pipeline or PDCA strategy |
|
||||
| Task mentions "security", "auth", "encryption" | Add Guardian review |
|
||||
| Task is a review/audit | Spawn reviewers only (af-review mode) |
|
||||
| Task failed in a previous sprint | Escalate to PDCA with Explorer |
|
||||
|
||||
The sprint runner's job is **throughput**, not perfection. Ship fast, fix forward.
|
||||
|
||||
---
|
||||
|
||||
## Integration with Existing Tools
|
||||
|
||||
| Tool | How sprint uses it |
|
||||
|------|-------------------|
|
||||
| `./scripts/ws next` | Get next schedulable task |
|
||||
| `./scripts/ws done <id>` | Mark task completed |
|
||||
| `./scripts/ws fail <id>` | Mark task failed |
|
||||
| `./scripts/ws orient` | Initial workspace overview |
|
||||
| `./scripts/ws validate` | Pre-flight queue validation |
|
||||
| `git` per project | Commit + push after each agent |
|
||||
| `archeflow:run` | Only for L/XL tasks needing PDCA |
|
||||
|
||||
---
|
||||
|
||||
## Error Recovery
|
||||
|
||||
- **Agent crashes mid-task**: Mark task as `failed`, add error to notes, continue with next batch
|
||||
- **Git push fails**: Log the error, do NOT retry. User will handle push conflicts manually.
|
||||
- **Queue file corrupted**: Run `./scripts/ws validate`. If invalid, stop sprint and report.
|
||||
- **Budget exceeded**: Stop sprint, report remaining tasks and estimated cost.
|
||||
- **All tasks blocked**: Report dependency graph, suggest which blockers to resolve first.
|
||||
@@ -11,22 +11,33 @@ Multi-agent orchestration using archetypal roles and PDCA quality cycles.
|
||||
|
||||
On activation, print ONE line:
|
||||
```
|
||||
archeflow v0.3.0 · 25 skills · <domain> domain
|
||||
archeflow v0.7.0 · 25 skills · <domain> domain
|
||||
```
|
||||
Where `<domain>` is auto-detected: `writing` if `colette.yaml` exists, `research` if paper/thesis files exist, `code` otherwise. Then proceed silently — no further announcement unless `archeflow:run` is invoked.
|
||||
|
||||
During runs, follow the `archeflow:presence` skill for output format: show outcomes not mechanics, one line per phase, value at the end.
|
||||
|
||||
## IMPORTANT: When to Activate
|
||||
## IMPORTANT: When to Use What
|
||||
|
||||
You MUST use ArcheFlow orchestration (load `archeflow:orchestration` skill and follow its steps) for any task that matches:
|
||||
### Use `/af-sprint` (primary mode) when:
|
||||
- User says "run the sprint", "work the queue", "go autonomous"
|
||||
- Multiple tasks are pending across projects
|
||||
- The workspace queue (docs/orchestra/queue.json) has pending items
|
||||
|
||||
- **New features** -- any feature touching 2+ files
|
||||
- **Refactoring** -- structural changes across modules
|
||||
- **Security-sensitive changes** -- auth, encryption, input handling, API keys
|
||||
- **Bug fixes with unclear root cause** -- use Explorer to investigate first
|
||||
- **Code review requests** -- spawn Guardian + relevant reviewers
|
||||
- **Multi-file changes** -- anything beyond a single-file edit
|
||||
### Use `/af-review` when:
|
||||
- User wants to review code before merging
|
||||
- A diff, branch, or commit range needs quality check
|
||||
- Security-sensitive changes need Guardian analysis
|
||||
|
||||
### Use `/af-run` (deep orchestration) when:
|
||||
- **Writing/research tasks** -- archetypes add value where linters don't exist
|
||||
- **Security-sensitive code changes** -- auth, encryption, API keys
|
||||
- **Complex multi-module refactors** with unclear approach
|
||||
|
||||
### Do NOT use ArcheFlow for:
|
||||
- **Single-feature code development** -- use `feature-dev` plugin or work directly
|
||||
- **Simple fixes** -- just do them
|
||||
- **Questions, exploration, reading** -- no code changes needed
|
||||
|
||||
Choose the workflow based on risk:
|
||||
|
||||
@@ -146,7 +157,6 @@ Read `.archeflow/session-log.md` and show the last 5 orchestration summaries in
|
||||
|
||||
### Quality and Safety
|
||||
- **archeflow:shadow-detection** -- Quantitative dysfunction detection and correction
|
||||
- **archeflow:attention-filters** -- Context optimization per archetype
|
||||
- **archeflow:convergence** -- Detects convergence, stalling, and oscillation in multi-cycle runs
|
||||
- **archeflow:artifact-routing** -- Inter-phase artifact protocol for naming, storage, and routing
|
||||
|
||||
|
||||
@@ -25,6 +25,10 @@ ArcheFlow's PDCA cycles spiral upward through iterations — each cycle incorpor
|
||||
│ Plan (design) ← Cycle 1 (initial)
|
||||
```
|
||||
|
||||
## Strategy vs Workflow
|
||||
|
||||
A **strategy** defines the execution shape: PDCA is cyclic (Plan-Do-Check-Act with feedback loops), pipeline is linear (Plan-Implement-Review-Verify, no cycle-back). A **workflow** defines the depth: fast uses fewer agents and cycles, thorough uses more. Strategy and workflow are orthogonal — you can run a `fast` workflow with either strategy, though `thorough` always uses PDCA because linear flows cannot iterate on findings.
|
||||
|
||||
## Built-in Workflows
|
||||
|
||||
### `fast` — Single Turn
|
||||
|
||||
Reference in New Issue
Block a user