claude-archeflow-plugin/lib/archeflow-replay.sh

#!/usr/bin/env bash
# archeflow-replay.sh — Inspect recorded runs: decision timeline and weighted what-if replay.
#
# Usage:
#   archeflow-replay.sh timeline <run_id>
#   archeflow-replay.sh whatif <run_id> [--weights arch=w,arch2=w2] [--threshold 0.5] [--json]
#   archeflow-replay.sh compare <run_id> [--weights ...] [--threshold ...] [--json]
#
# Events file: .archeflow/events/<run_id>.jsonl (relative to current working directory)
#
# whatif / compare:
#   - Loads check-phase review.verdict events (last verdict per archetype).
#   - Original gate (strict): BLOCK if any reviewer is not approved.
#   - Replay gate (weighted): BLOCK if sum(weight * strict) / sum(weight) >= threshold,
#     where strict=1 for non-approved verdicts, else 0. Default weight per archetype is 1.0.
#
# Requires: jq

set -euo pipefail

if [[ $# -lt 2 ]]; then
  echo "Usage: $0 {timeline|whatif|compare} <run_id> [options]" >&2
  echo "" >&2
  echo "  timeline <run_id>              Decision timeline (decision.point + review.verdict)" >&2
  echo "  whatif <run_id> [--weights k=v,...] [--threshold 0.5] [--json]" >&2
  echo "  compare <run_id>  (timeline + whatif summary)" >&2
  exit 1
fi

COMMAND="$1"
RUN_ID="$2"
shift 2

if ! command -v jq &>/dev/null; then
  echo "Error: jq is required." >&2
  exit 1
fi

EVENT_FILE=".archeflow/events/${RUN_ID}.jsonl"

resolve_event_file() {
  if [[ ! -f "$EVENT_FILE" ]]; then
    echo "Error: event file not found: $EVENT_FILE" >&2
    exit 1
  fi
}

cmd_timeline() {
  resolve_event_file
  echo "## Decision timeline — run_id=${RUN_ID}"
  echo ""
  local cnt
  cnt=$(jq -s '[.[] | select(.type == "decision.point")] | length' "$EVENT_FILE")
  if [[ "$cnt" -gt 0 ]]; then
    echo "### decision.point (${cnt})"
    jq -r 'select(.type == "decision.point")
      | "- \(.ts)  [\(.phase)] \(.data.archetype // .agent // "?")  \(.data.decision)  conf=\(.data.confidence // "n/a")  input=\(.data.input // "")"' \
      "$EVENT_FILE"
    echo ""
  else
    echo "### decision.point"
    echo "(none — emit with ./lib/archeflow-decision.sh during the run)"
    echo ""
  fi

  echo "### review.verdict (check phase)"
  if jq -e -s '[.[] | select(.type == "review.verdict" and .phase == "check")] | length > 0' "$EVENT_FILE" >/dev/null 2>&1; then
    jq -r 'select(.type == "review.verdict" and .phase == "check")
      | "- \(.ts)  \(.data.archetype // .agent // "?")  verdict=\(.data.verdict)  findings=\((.data.findings // []) | length)"' \
      "$EVENT_FILE"
  else
    echo "(none)"
  fi
  echo ""
}

parse_weights_to_json() {
  local raw="${1:-}"
  local obj='{}'
  if [[ -z "$raw" ]]; then
    echo '{}'
    return
  fi
  IFS=',' read -ra pairs <<< "$raw"
  for pair in "${pairs[@]}"; do
    [[ -z "$pair" ]] && continue
    local k="${pair%%=*}"
    local v="${pair#*=}"
    k=$(echo "$k" | tr '[:upper:]' '[:lower:]' | xargs)
    v=$(echo "$v" | xargs)
    if [[ -z "$k" || "$k" == "$pair" ]]; then
      echo "Error: invalid weight entry (use arch=1.5): $pair" >&2
      exit 1
    fi
    obj=$(echo "$obj" | jq --arg k "$k" --argjson v "$v" '. + {($k): $v}')
  done
  echo "$obj"
}

cmd_whatif() {
  local weights_str=""
  local threshold="0.5"
  local json_out="false"
  while [[ $# -gt 0 ]]; do
    case "$1" in
      --weights)
        weights_str="$2"
        shift 2
        ;;
      --threshold)
        threshold="$2"
        shift 2
        ;;
      --json)
        json_out="true"
        shift
        ;;
      *)
        echo "Unknown option: $1" >&2
        exit 1
        ;;
    esac
  done

  resolve_event_file
  local weights_json
  weights_json="$(parse_weights_to_json "$weights_str")"

  local result
  result=$(jq -s --argjson weights "$weights_json" --argjson thr "$threshold" --arg run_id "$RUN_ID" '
    def strict($v):
      if $v == null then 1
      else ($v | ascii_downcase) as $lv
      | if ($lv == "approved" or $lv == "approve") then 0 else 1 end
      end;

    def norm_key: ascii_downcase;

    ([.[] | select(.type == "review.verdict" and .phase == "check")]
      | sort_by(.seq)
      | reduce .[] as $e ({}; . + { (($e.data.archetype // $e.agent // "unknown") | norm_key): $e })
    ) as $last |

    ($last | keys) as $keys |
    if ($keys | length) == 0 then
      {
        run_id: $run_id,
        error: "no check-phase review.verdict events; nothing to simulate"
      }
    else
      [ $keys[] as $k | $last[$k] as $ev |
        ($weights[($k | norm_key)] // 1.0) as $w
        | strict($ev.data.verdict) as $s
        | {
            archetype: ($ev.data.archetype // $ev.agent // $k),
            verdict: ($ev.data.verdict // "unknown"),
            weight: $w,
            strict: $s,
            weighted_contrib: ($w * $s)
          }
      ] as $rows |
      ($rows | map(.weighted_contrib) | add) as $num |
      ($rows | map(.weight) | add) as $den |
      (if $den > 0 then ($num / $den) else 0 end) as $ratio |
      (if ($rows | map(.strict) | max) == 1 then "BLOCK" else "SHIP" end) as $strict_out |
      (if $ratio >= $thr then "BLOCK" else "SHIP" end) as $replay_out |
      {
        run_id: $run_id,
        threshold: $thr,
        weights_used: $weights,
        strict_any_veto: {
          outcome: $strict_out,
          description: "BLOCK if any reviewer verdict is not approved"
        },
        weighted_replay: {
          weighted_strictness: ($ratio * 1000 | round / 1000),
          outcome: $replay_out,
          description: ("BLOCK if weighted strictness >= " + ($thr | tostring))
        },
        reviewers: $rows
      }
    end
  ' "$EVENT_FILE")

  if [[ "$json_out" == "true" ]]; then
    echo "$result"
  else
    echo "$result" | jq -r '
      if .error then "Error: \(.error)" else
        "# What-if replay — run_id=\(.run_id)\n",
        "",
        "## Outcomes",
        "| Model | Result |",
        "|-------|--------|",
        "| Original (any non-approve → BLOCK) | \(.strict_any_veto.outcome) |",
        "| Weighted replay (threshold=\(.threshold)) | \(.weighted_replay.outcome) |",
        "",
        "## Weighted strictness",
        "\(.weighted_replay.weighted_strictness)  (0 = all approved, 1 = all blocking)",
        "",
        "## Per reviewer",
        "| Archetype | Verdict | Weight | Strict | w×strict |",
        "|-----------|---------|--------|--------|----------|",
        (.reviewers[] | "| \(.archetype) | \(.verdict) | \(.weight) | \(.strict) | \(.weighted_contrib) |"),
        "",
        (if (.weights_used | length) > 0 then
          "## Custom weights applied\n" + (.weights_used | to_entries | map("- \(.key): \(.value)") | join("\n")) + "\n"
        else empty end)
      end
    '
  fi
}

cmd_compare() {
  cmd_timeline
  echo ""
  cmd_whatif "$@"
}

case "$COMMAND" in
  timeline) cmd_timeline ;;
  whatif)   cmd_whatif "$@" ;;
  compare)  cmd_compare "$@" ;;
  *)
    echo "Unknown command: $COMMAND" >&2
    exit 1
    ;;
esac