From 6854e858a4e32b60bf042eda9a76cd6dc00acf4a Mon Sep 17 00:00:00 2001 From: Christian Nennemann Date: Sat, 4 Apr 2026 09:35:55 +0200 Subject: [PATCH] fix: address v0.7.0 review findings - Auto-select: fast workflow now maps to pipeline strategy (was falling through to pdca) - Evidence validation: check for missing evidence markers, not just banned phrases - Remove sed-based artifact mutation (avoids table row corruption), track downgrades in events only - Pipeline verify: explicit merge guard prevents merging before tests/re-review pass --- skills/run/SKILL.md | 70 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/skills/run/SKILL.md b/skills/run/SKILL.md index 3bb724a..8d5ec45 100644 --- a/skills/run/SKILL.md +++ b/skills/run/SKILL.md @@ -82,6 +82,8 @@ if [[ "$STRATEGY" == "auto" ]]; then STRATEGY="pipeline" elif echo "$TASK_LOWER" | grep -qE '(refactor|redesign|review)'; then STRATEGY="pdca" + elif [[ "$WORKFLOW" == "fast" ]]; then + STRATEGY="pipeline" elif [[ "$WORKFLOW" == "thorough" ]]; then STRATEGY="pdca" else @@ -510,30 +512,59 @@ After each returns, emit `review.verdict` and save artifact. #### 3c-ii. Evidence Validation -After all reviewers complete, scan their outputs for banned phrases in CRITICAL/WARNING findings. Downgrade unsupported findings before proceeding to Act. +After all reviewers complete, scan CRITICAL/WARNING findings for two conditions: +1. **Banned phrases** — hedged language without evidence +2. **Missing evidence** — no command output, code citation, or reproduction steps + +Downgrade unsupported findings to INFO before proceeding to Act. ```bash BANNED_PHRASES=("might be" "could potentially" "appears to" "seems like" "may not") +EVIDENCE_MARKERS=("exit" "output" "line [0-9]" ":[0-9]" "returned" "FAIL" "PASS" "assert") for artifact in .archeflow/artifacts/${RUN_ID}/check-*.md; do REVIEWER=$(basename "$artifact" .md | sed 's/check-//') - while IFS= read -r line; do + + # Read findings table rows (CRITICAL and WARNING only) + grep -E '\| (CRITICAL|WARNING) \|' "$artifact" 2>/dev/null | while IFS= read -r line; do SEVERITY=$(echo "$line" | grep -oE '(CRITICAL|WARNING)' | head -1) - [[ -z "$SEVERITY" ]] && continue + DOWNGRADE_REASON="" + + # Check 1: banned phrases for phrase in "${BANNED_PHRASES[@]}"; do if echo "$line" | grep -qi "$phrase"; then - echo "EVIDENCE DOWNGRADE: $REVIEWER finding uses '$phrase' — downgrading to INFO" - ./lib/archeflow-event.sh "$RUN_ID" decision check "" \ - '{"what":"evidence_downgrade","from":"'"$SEVERITY"'","to":"INFO","reviewer":"'"$REVIEWER"'","reason":"banned phrase: '"$phrase"'"}' - # Replace severity in artifact - sed -i "s/$line/$(echo "$line" | sed "s/$SEVERITY/INFO/")/" "$artifact" + DOWNGRADE_REASON="banned phrase: $phrase" break fi done - done < "$artifact" + + # Check 2: no evidence markers (only if not already flagged) + if [[ -z "$DOWNGRADE_REASON" ]]; then + HAS_EVIDENCE=false + for marker in "${EVIDENCE_MARKERS[@]}"; do + if echo "$line" | grep -qiE "$marker"; then + HAS_EVIDENCE=true + break + fi + done + if [[ "$HAS_EVIDENCE" == "false" ]]; then + DOWNGRADE_REASON="no evidence cited" + fi + fi + + if [[ -n "$DOWNGRADE_REASON" ]]; then + echo "EVIDENCE DOWNGRADE: $REVIEWER $SEVERITY finding — $DOWNGRADE_REASON" + ./lib/archeflow-event.sh "$RUN_ID" decision check "" \ + '{"what":"evidence_downgrade","from":"'"$SEVERITY"'","to":"INFO","reviewer":"'"$REVIEWER"'","reason":"'"$DOWNGRADE_REASON"'"}' + # Note: the orchestrator tracks downgraded findings separately — + # do not modify the artifact file (avoids sed corruption on table rows) + fi + done done ``` +**Important:** Downgraded findings are tracked in events, NOT by modifying artifact files. The Act phase reads the decision events to know which findings were downgraded and excludes them from CRITICAL tallies. + #### 3d. Phase Transition: Check to Act Collect all verdict seq numbers for the parent array. @@ -822,9 +853,24 @@ Run the project's test suite. If tests pass and no CRITICAL findings exist: If CRITICAL findings exist: -1. Spawn Maker for a **single targeted fix** — provide only the CRITICAL findings as context -2. Re-run tests -3. If still failing, report to user (do not cycle back) +1. **Do NOT merge yet** — the branch remains separate +2. Spawn Maker for a **single targeted fix** — provide only the CRITICAL findings as context +3. Re-run the reviewer(s) that raised the CRITICAL finding(s) on just the fixed files +4. Re-run test suite +5. If tests pass and re-review approves: merge +6. If still failing after this one fix attempt: **abort** — do NOT merge, report to user with the branch name for manual resolution + +```bash +# Pipeline verify: explicit merge guard +if [[ "$VERIFY_PASS" == "true" ]]; then + ./lib/archeflow-git.sh merge "$RUN_ID" --no-ff + ./lib/archeflow-rollback.sh "$RUN_ID" # post-merge test validation +else + echo "Pipeline aborted: CRITICAL findings not resolved after 1 fix attempt." + echo "Branch: archeflow/$RUN_ID (not merged)" + # Emit run.complete with status: aborted +fi +``` WARNINGs are logged in the run event but do not block the merge.