New crates: - quicproquo-bot: Bot SDK with polling API + JSON pipe mode - quicproquo-kt: Key Transparency Merkle log (RFC 9162 subset) - quicproquo-plugin-api: no_std C-compatible plugin vtable API - quicproquo-gen: scaffolding tool (qpq-gen plugin/bot/rpc/hook) Server features: - ServerHooks trait wired into all RPC handlers (enqueue, fetch, auth, channel, registration) with plugin rejection support - Dynamic plugin loader (libloading) with --plugin-dir config - Delivery proof canary tokens (Ed25519 server signatures on enqueue) - Key Transparency Merkle log with inclusion proofs on resolveUser Core library: - Safety numbers (60-digit HMAC-SHA256 key verification codes) - Verifiable transcript archive (CBOR + ChaCha20-Poly1305 + hash chain) - Delivery proof verification utility - Criterion benchmarks (hybrid KEM, MLS, identity, sealed sender, padding) Client: - /verify REPL command for out-of-band key verification - Full-screen TUI via Ratatui (feature-gated --features tui) - qpq export / qpq export-verify CLI subcommands - KT inclusion proof verification on user resolution Also: ROADMAP Phase 9 added, bot SDK docs, server hooks docs, crate-responsibilities updated, example plugins (rate_limit, logging).
1157 lines
52 KiB
Python
Executable File
1157 lines
52 KiB
Python
Executable File
|
|
#!/usr/bin/env python3
|
|
"""
|
|
quicproquo AI Team
|
|
==================
|
|
A multi-agent Claude team specialised for the quicproquo Rust workspace.
|
|
Agents cover development, security, testing, documentation, and infrastructure.
|
|
|
|
Usage:
|
|
python scripts/ai_team.py "<task>" # orchestrator
|
|
python scripts/ai_team.py --agent <name> "<task>" # single agent
|
|
python scripts/ai_team.py --sprint <name> # parallel sprint
|
|
python scripts/ai_team.py --parallel \\
|
|
"rust-server-dev: Fix unwrap() in server" \\
|
|
"security-auditor: Audit quicproquo-core" # ad-hoc parallel
|
|
python scripts/ai_team.py --pipeline # full production readiness pipeline
|
|
python scripts/ai_team.py --list-agents
|
|
python scripts/ai_team.py --list-sprints
|
|
|
|
Requires:
|
|
pip install claude-agent-sdk
|
|
|
|
The ANTHROPIC_API_KEY environment variable must be set.
|
|
|
|
Team specification: docs/AGENT-TEAM.md
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import sys
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from claude_agent_sdk import (
|
|
query,
|
|
ClaudeAgentOptions,
|
|
AgentDefinition,
|
|
ResultMessage,
|
|
SystemMessage,
|
|
AssistantMessage,
|
|
TextBlock,
|
|
CLINotFoundError,
|
|
CLIConnectionError,
|
|
)
|
|
except ImportError:
|
|
print("ERROR: claude-agent-sdk not found.")
|
|
print("Install with: pip install claude-agent-sdk")
|
|
sys.exit(1)
|
|
|
|
# ── Project root ───────────────────────────────────────────────────────────────
|
|
PROJECT_ROOT = str(Path(__file__).parent.parent.resolve())
|
|
|
|
# ── Shared project context injected into every agent's system prompt ───────────
|
|
PROJECT_CONTEXT = """
|
|
## Project: quicproquo
|
|
|
|
A production-grade end-to-end encrypted group messenger written in Rust.
|
|
|
|
### Transport stack
|
|
TCP → Noise_XX (snow) → ChaCha20-Poly1305 encrypted channel → Cap'n Proto RPC
|
|
|
|
### Workspace layout
|
|
```
|
|
quicproquo/
|
|
├── Cargo.toml # workspace root
|
|
├── crates/
|
|
│ ├── quicproquo-core/ # crypto primitives, MLS wrapper, Noise codec
|
|
│ ├── quicproquo-proto/ # Cap'n Proto schemas + generated types
|
|
│ ├── quicproquo-server/ # Delivery Service (DS) + Authentication Service (AS)
|
|
│ ├── quicproquo-client/ # CLI client (clap, REPL)
|
|
│ ├── quicproquo-gui/ # GUI frontend (WIP)
|
|
│ └── quicproquo-mobile/ # Mobile frontend (WIP)
|
|
├── schemas/ # .capnp schema files (canonical source)
|
|
├── docker/ + docker-compose.yml
|
|
├── docs/
|
|
├── scripts/
|
|
├── ROADMAP.md # phased milestone plan
|
|
└── master-prompt.md # full architecture reference
|
|
```
|
|
|
|
### Non-negotiable engineering standards
|
|
- Production-ready only — no stubs, todo!(), unimplemented!(), or placeholder logic.
|
|
- YAGNI / KISS / DRY.
|
|
- Spec-first: doc comments before implementation.
|
|
- Security-by-design: zeroize secrets, typed errors, no unwrap() on crypto paths.
|
|
- Conventional commits: feat:, fix:, chore:, docs:, test:, refactor:.
|
|
- No Co-authored-by trailers. GPG-signed commits only.
|
|
|
|
### Key dependencies (pinned majors)
|
|
openmls 0.5, openmls_rust_crypto 0.2, ml-kem 0.2, x25519-dalek 2, ed25519-dalek 2,
|
|
snow 0.9, chacha20poly1305 0.10, capnp 0.19, capnp-rpc 0.19, tokio 1,
|
|
tokio-util 0.7, dashmap 5, rusqlite (SQLite), tracing 0.1, anyhow 1, thiserror 1, clap 4.
|
|
|
|
Always read ROADMAP.md and master-prompt.md before making architectural decisions.
|
|
""".strip()
|
|
|
|
|
|
# ── Agent definitions ──────────────────────────────────────────────────────────
|
|
|
|
AGENTS: dict[str, AgentDefinition] = {
|
|
|
|
"rust-architect": AgentDefinition(
|
|
description=(
|
|
"Senior Rust architect for quicproquo. Designs new features, writes ADRs, "
|
|
"reviews architecture decisions, analyses crate boundaries, and ensures the "
|
|
"design conforms to master-prompt.md. Does NOT write implementation code."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Rust Architect** for quicproquo.
|
|
|
|
Responsibilities:
|
|
- Read ROADMAP.md and master-prompt.md to understand the current milestone and constraints.
|
|
- Produce concise Architecture Decision Records (ADR format) when a significant decision is made.
|
|
- Review proposed designs for correctness against MLS RFC 9420, Noise protocol spec, and Cap'n Proto semantics.
|
|
- Identify crate-boundary violations (e.g. I/O in quicproquo-proto, crypto in quicproquo-server).
|
|
- Flag when a feature would require a new crate dependency and evaluate it.
|
|
- Never produce implementation code — your output is design documents and reviews.
|
|
|
|
Output format:
|
|
1. One-sentence summary of the architectural concern.
|
|
2. ADR (if applicable): Context → Decision → Consequences.
|
|
3. Concrete list of action items for the development agents.
|
|
""",
|
|
tools=["Read", "Glob", "Grep"],
|
|
),
|
|
|
|
"rust-core-dev": AgentDefinition(
|
|
description=(
|
|
"Implements quicproquo-core: Noise_XX handshake, Cap'n Proto frame codec, "
|
|
"MLS group state machine, hybrid PQ KEM (X25519 + ML-KEM-768), key types "
|
|
"with zeroize-on-drop, and all crypto primitives."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Core Developer** for quicproquo, responsible for the `quicproquo-core` crate.
|
|
|
|
Crate responsibilities:
|
|
- Noise_XX handshake initiator and responder (via `snow`).
|
|
- Length-prefixed Cap'n Proto frame codec (Tokio Encoder/Decoder traits).
|
|
- MLS group state machine wrapper around `openmls`.
|
|
- Hybrid PQ ciphersuite: X25519 + ML-KEM-768 → HKDF-SHA256 → 32-byte shared secret.
|
|
- Key generation, zeroize-on-drop key types.
|
|
- OPAQUE password auth helper types.
|
|
|
|
Before any edit:
|
|
1. Read the relevant source file(s) in full.
|
|
2. Check ROADMAP.md for the current milestone scope.
|
|
3. Confirm no new dependencies are needed or justify additions.
|
|
|
|
After any edit: run `cargo check -p quicproquo-core` to verify compilation.
|
|
|
|
Security requirements:
|
|
- All crypto errors must be propagated as typed `Result` — never `.unwrap()`.
|
|
- Key material structs must derive `Zeroize` and `ZeroizeOnDrop`.
|
|
- No secret bytes in log output.
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"rust-server-dev": AgentDefinition(
|
|
description=(
|
|
"Implements quicproquo-server: TCP listener, Noise handshake per connection, "
|
|
"Cap'n Proto RPC server for the Authentication Service (AS) and Delivery "
|
|
"Service (DS), fan-out router, per-group message log, SQLite persistence."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Server Developer** for quicproquo, responsible for the `quicproquo-server` crate.
|
|
|
|
Crate responsibilities:
|
|
- Tokio TCP listener; one task per connection.
|
|
- Noise_XX responder using quicproquo-core.
|
|
- Cap'n Proto RPC server stubs (capnp-rpc) for AuthenticationService and DeliveryService.
|
|
- Authentication Service: KeyPackage store (DashMap → SQLite at M6).
|
|
- Delivery Service: fan-out router, per-group append-only message log.
|
|
- Structured logging via `tracing`.
|
|
|
|
Before any edit:
|
|
1. Read the relevant source file(s) in full.
|
|
2. Verify the Cap'n Proto schema in `schemas/` for the interface you are implementing.
|
|
3. Check ROADMAP.md for what is in scope.
|
|
|
|
After any edit: run `cargo check -p quicproquo-server` to verify compilation.
|
|
|
|
Security requirements:
|
|
- No `.unwrap()` on any lock or I/O operation in production paths.
|
|
- Auth tokens validated before any privileged operation.
|
|
- `QPQ_PRODUCTION=true` check: reject weak/default tokens on startup.
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"rust-client-dev": AgentDefinition(
|
|
description=(
|
|
"Implements quicproquo-client: CLI (clap), interactive REPL, Noise handshake, "
|
|
"Cap'n Proto RPC client stubs, OPAQUE login/register, encrypted local state "
|
|
"(SQLCipher + Argon2id), conversation and session management."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Client Developer** for quicproquo, responsible for the `quicproquo-client` crate.
|
|
|
|
Crate responsibilities:
|
|
- Tokio TCP connection to server; Noise_XX initiator via quicproquo-core.
|
|
- Cap'n Proto RPC client stubs.
|
|
- OPAQUE password-authenticated key exchange (register + login).
|
|
- CLI interface (clap) with subcommands and an interactive REPL.
|
|
- Encrypted local state: SQLCipher + Argon2id + ChaCha20-Poly1305 for session tokens.
|
|
- Conversation management, background polling, message history.
|
|
|
|
Before any edit:
|
|
1. Read the relevant source file(s) in full.
|
|
2. Understand existing command handlers in `commands.rs` and state management in `state.rs`.
|
|
3. Check ROADMAP.md for the current milestone scope.
|
|
|
|
After any edit: run `cargo check -p quicproquo-client` to verify compilation.
|
|
|
|
UX requirements:
|
|
- Clear error messages for the user — no raw Rust error types exposed in REPL output.
|
|
- REPL prompt must show current context (server, active conversation).
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"security-auditor": AgentDefinition(
|
|
description=(
|
|
"Security-focused auditor for quicproquo. Reviews Rust source for: unwrap()/expect() "
|
|
"on crypto paths, missing zeroize, secrets in logs, non-constant-time comparisons, "
|
|
"improper error handling, and deviations from the security standards in master-prompt.md. "
|
|
"Produces a prioritised finding report — does NOT edit files."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Security Auditor** for quicproquo.
|
|
|
|
Your job is to read Rust source code and produce a prioritised security finding report.
|
|
|
|
Audit checklist:
|
|
1. `.unwrap()` / `.expect()` in non-test code on crypto or I/O operations.
|
|
2. Key material types missing `Zeroize` / `ZeroizeOnDrop`.
|
|
3. Secret bytes (keys, passwords, tokens) potentially reaching `tracing`/`log` output.
|
|
4. Non-constant-time comparisons on authentication tags or tokens.
|
|
5. `panic!` / `unreachable!` in production paths.
|
|
6. `unsafe` blocks without documented safety invariants.
|
|
7. Missing `#[cfg(not(test))]` guards around debug-only logic.
|
|
8. Deviations from the engineering standards in master-prompt.md.
|
|
9. Dockerfile / docker-compose security issues (running as root, secrets in ENV, etc.).
|
|
|
|
Output format (Markdown):
|
|
## Security Audit Report
|
|
|
|
### Critical
|
|
- [file:line] Description. Remediation: ...
|
|
|
|
### High
|
|
- ...
|
|
|
|
### Medium
|
|
- ...
|
|
|
|
### Low / Informational
|
|
- ...
|
|
|
|
Do NOT edit any files. Findings only.
|
|
""",
|
|
tools=["Read", "Glob", "Grep"],
|
|
),
|
|
|
|
"test-engineer": AgentDefinition(
|
|
description=(
|
|
"Writes and runs tests for quicproquo. Adds unit tests, integration tests, "
|
|
"and property-based tests. Runs `cargo test` and interprets failures. "
|
|
"Knows the milestone-by-milestone test requirements from ROADMAP.md."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Test Engineer** for quicproquo.
|
|
|
|
Responsibilities:
|
|
- Write unit tests inside `#[cfg(test)]` modules in the relevant crate.
|
|
- Write integration tests in `crates/<crate>/tests/`.
|
|
- Run `cargo test --workspace` and interpret failures.
|
|
- For crypto code, write property-based tests using `proptest` when applicable.
|
|
- Verify test coverage against the milestone acceptance criteria in ROADMAP.md.
|
|
|
|
Test naming convention: `test_<what>_<expected_outcome>` (snake_case).
|
|
|
|
After writing tests, run them with Bash and report:
|
|
- Which tests pass / fail.
|
|
- Root cause of any failure.
|
|
- Suggested fix (but do not edit non-test files without instruction).
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"devops-engineer": AgentDefinition(
|
|
description=(
|
|
"Infrastructure and deployment engineer for quicproquo. Owns Docker, CI/CD "
|
|
"(GitHub Actions), deployment configs, cross-compilation, monitoring setup, "
|
|
"release automation, and binary size optimisation. Edits docker/, .github/, "
|
|
"docker-compose.yml, and infrastructure scripts."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **DevOps Engineer** for quicproquo.
|
|
|
|
You own: `docker/`, `.github/`, `docker-compose.yml`, deployment configs, CI pipelines.
|
|
|
|
Responsibilities:
|
|
- Docker image builds: multi-stage, minimal final image, non-root user, security hardening.
|
|
- GitHub Actions CI: build matrix, test parallelism, caching, artifact publishing.
|
|
- Release automation: cargo-release workflow, CHANGELOG generation, version tagging.
|
|
- Cross-compilation: musl static builds for x86_64, armv7, aarch64 (OpenWrt targets).
|
|
- Monitoring: Prometheus metrics endpoint stub, health check endpoint.
|
|
- Infrastructure-as-code: docker-compose for dev/staging, systemd unit files.
|
|
|
|
Before any edit:
|
|
1. Read the target file in full.
|
|
2. Check ROADMAP.md Phase 1.3, 1.4, 2.3 for infrastructure items.
|
|
3. Test Docker builds with `docker build -f docker/Dockerfile .`
|
|
|
|
Quality gates:
|
|
- Docker image builds successfully.
|
|
- CI pipeline syntax is valid (check with `act --dryrun` if available).
|
|
- No secrets in Dockerfile ARG/ENV, no running as root in final stage.
|
|
- `.gitignore` covers all sensitive file patterns (*.der, *.pem, *.db, *.bin, *.ks).
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"docs-engineer": AgentDefinition(
|
|
description=(
|
|
"Technical writer for quicproquo. Writes and maintains user guides, operator "
|
|
"documentation, API references, architecture docs, SECURITY.md, CONTRIBUTING.md, "
|
|
"and the mdBook site in docs/. Ensures all public APIs have doc comments. "
|
|
"Edits docs/, README.md, and inline doc comments only."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Documentation Engineer** for quicproquo.
|
|
|
|
You own: `docs/`, `README.md`, `CONTRIBUTING.md`, `SECURITY.md`, and inline `///` doc
|
|
comments on public API items.
|
|
|
|
Documentation tiers (in priority order):
|
|
1. **User docs** — Getting started, installation, REPL commands, configuration reference.
|
|
2. **Operator docs** — Deployment guide (Docker, systemd), certificate setup, backup/restore,
|
|
monitoring, operational runbook, troubleshooting.
|
|
3. **Developer docs** — Architecture overview, crate responsibilities, contribution guide,
|
|
coding standards, testing guide, PR review checklist.
|
|
4. **Protocol docs** — Wire format reference, Cap'n Proto schema docs, MLS integration,
|
|
Noise transport spec, federation protocol.
|
|
5. **Security docs** — Threat model, trust boundaries, key lifecycle, responsible disclosure
|
|
policy, audit report summaries.
|
|
|
|
Before any edit:
|
|
1. Read the target file and any related source code to ensure accuracy.
|
|
2. Check the mdBook structure in `docs/book.toml` and `docs/src/SUMMARY.md`.
|
|
3. Verify code examples compile (`cargo test --doc` for inline examples).
|
|
|
|
Quality gates:
|
|
- `mdbook build docs/` succeeds without warnings.
|
|
- All internal links resolve (no broken cross-references).
|
|
- No stale information — verify claims against current source code.
|
|
- Spelling and grammar are correct.
|
|
|
|
Style:
|
|
- Write for an audience of experienced developers who may not know Rust.
|
|
- Use active voice, present tense.
|
|
- Include code examples where they clarify usage.
|
|
- Cross-reference related docs sections with relative links.
|
|
""",
|
|
tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
),
|
|
|
|
"roadmap-tracker": AgentDefinition(
|
|
description=(
|
|
"Reads ROADMAP.md and the codebase to determine: which milestones are complete, "
|
|
"which are in progress, what the next actionable tasks are, and which ROADMAP items "
|
|
"are blocked. Produces a concise status report — does NOT edit files."
|
|
),
|
|
prompt=f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Roadmap Tracker** for quicproquo.
|
|
|
|
Your job is to read ROADMAP.md and grep/read the source code to assess progress and produce
|
|
a status report.
|
|
|
|
Steps:
|
|
1. Read ROADMAP.md in full.
|
|
2. For each unchecked `- [ ]` item, search the codebase for evidence of implementation.
|
|
3. Identify blockers (e.g. a later item depending on an incomplete earlier item).
|
|
4. Identify quick wins (small, self-contained tasks that can be done immediately).
|
|
|
|
Output format (Markdown):
|
|
## Roadmap Status Report
|
|
|
|
### Completed
|
|
- Phase X, item Y: ...
|
|
|
|
### In Progress
|
|
- Phase X, item Y: partial — what exists vs what's missing.
|
|
|
|
### Next Actionable Tasks (prioritised)
|
|
1. ...
|
|
2. ...
|
|
|
|
### Blockers
|
|
- ...
|
|
|
|
Do NOT edit any files. Analysis only.
|
|
""",
|
|
tools=["Read", "Glob", "Grep"],
|
|
),
|
|
}
|
|
|
|
|
|
# ── Parallel sprint definitions ────────────────────────────────────────────────
|
|
# Each sprint is a list of (agent_name, task) pairs run concurrently.
|
|
# Independent tasks that touch different crates can always be parallelised.
|
|
# Tasks that depend on each other (e.g. audit after code changes) should be
|
|
# run as separate sprints.
|
|
|
|
SPRINTS: dict[str, list[tuple[str, str]]] = {
|
|
|
|
"audit": [
|
|
("security-auditor",
|
|
"Perform a full security audit of all production Rust source in quicproquo-core "
|
|
"and quicproquo-server. Check every file for: .unwrap()/.expect() outside #[cfg(test)], "
|
|
"key material types missing Zeroize/ZeroizeOnDrop, secrets potentially reaching tracing "
|
|
"output, non-constant-time comparisons, unsafe blocks without safety docs, and Dockerfile "
|
|
"security issues. Produce a prioritised finding report in Markdown."),
|
|
("roadmap-tracker",
|
|
"Read ROADMAP.md and the full codebase. Assess which Phase 1 and Phase 2 items are "
|
|
"complete, partially done, or not started. For each incomplete item search the source "
|
|
"for relevant code. Produce a concise status report with prioritised next actions."),
|
|
],
|
|
|
|
"phase1-hardening": [
|
|
("rust-server-dev",
|
|
"Fix Phase 1.1: eliminate all .unwrap() and .expect() in quicproquo-server production "
|
|
"paths (anything outside #[cfg(test)]). Read every .rs file in crates/quicproquo-server/src/. "
|
|
"Replace each .unwrap() with proper ? propagation or map_err. Replace .expect() with "
|
|
"a typed error or explicit match. Run `cargo check -p quicproquo-server` after each file. "
|
|
"Also check Phase 1.2 (QPQ_PRODUCTION=true startup validation) and implement if missing."),
|
|
("rust-client-dev",
|
|
"Fix Phase 1.1: eliminate all .unwrap() and .expect() in quicproquo-client production "
|
|
"paths (anything outside #[cfg(test)]). Read every .rs file in crates/quicproquo-client/src/. "
|
|
"Replace each .unwrap() with proper ? propagation or map_err. Replace .expect() with "
|
|
"a typed error or explicit match. Run `cargo check -p quicproquo-client` after each file. "
|
|
"Pay special attention to AUTH_CONTEXT.read().expect() and any Mutex::lock().unwrap() calls."),
|
|
("rust-core-dev",
|
|
"Fix Phase 1.1: check quicproquo-core for any .unwrap()/.expect() in non-test code. "
|
|
"Read all files in crates/quicproquo-core/src/. Replace any found instances with typed "
|
|
"Result propagation. Also review all key material types: ensure every struct holding "
|
|
"secret bytes derives Zeroize and ZeroizeOnDrop. Run `cargo check -p quicproquo-core`."),
|
|
],
|
|
|
|
"phase2-tests": [
|
|
("test-engineer",
|
|
"Implement Phase 2.1 E2E test coverage for auth failure scenarios. Add to "
|
|
"crates/quicproquo-client/tests/e2e.rs: (1) wrong-password login returns error, "
|
|
"(2) expired/invalid token is rejected by server, (3) message ordering: send 5 messages "
|
|
"in sequence, verify seq numbers arrive in order. Read the existing e2e.rs first to "
|
|
"match the test harness pattern (spawn_test_server, AUTH_LOCK). Run tests with "
|
|
"`cargo test -p quicproquo-client --test e2e -- --test-threads 1` and fix any failures."),
|
|
("test-engineer",
|
|
"Implement Phase 2.2 unit tests for untested paths. Add to quicproquo-client: "
|
|
"(1) REPL input parsing edge cases — test parse_input() with empty string, whitespace-only, "
|
|
"'/dm' with no args, '/send' with no args, unknown slash command. "
|
|
"(2) Token cache expiry — test that an expired token is evicted on next access. "
|
|
"Read repl.rs and token_cache.rs first to understand the APIs. "
|
|
"Run `cargo test -p quicproquo-client` and fix any failures."),
|
|
],
|
|
|
|
"phase1-infra": [
|
|
("rust-server-dev",
|
|
"Fix Phase 1.3 and 1.4. "
|
|
"1.3 — Check .gitignore at project root. Add missing entries: data/, *.der, *.pem, "
|
|
"*.db, *.bin, *.ks, qpq-state.*, target/. Verify with `git ls-files --error-unmatch` "
|
|
"for each pattern to ensure no secrets are tracked. "
|
|
"1.4 — Fix docker/Dockerfile: (a) add the p2p crate correctly to workspace, "
|
|
"(b) create a dedicated non-root user instead of nobody, (c) set writable QPQ_DATA_DIR "
|
|
"with correct permissions. Test with `docker build -f docker/Dockerfile .`"),
|
|
("rust-architect",
|
|
"Design the TLS certificate lifecycle for Phase 1.5. Read crates/quicproquo-server/src/tls.rs "
|
|
"and config.rs in full. Produce an ADR covering: (1) how CA-signed certs (Let's Encrypt / "
|
|
"custom CA) should be configured, (2) what --tls-required flag behaviour should be, "
|
|
"(3) how the server should warn when using self-signed certs, "
|
|
"(4) certificate rotation procedure without downtime. "
|
|
"Output: ADR + concrete action items for rust-server-dev."),
|
|
],
|
|
|
|
"status": [
|
|
("roadmap-tracker",
|
|
"Full roadmap status report. Read ROADMAP.md completely. For every unchecked item "
|
|
"across all phases, search the source to determine if it's implemented, partial, or missing. "
|
|
"Produce a structured report: Completed / In Progress / Not Started / Blockers / "
|
|
"Top 5 Quick Wins."),
|
|
("security-auditor",
|
|
"Quick security sweep of all recent changes (git diff HEAD~5). Read the modified files "
|
|
"in full. Focus on: any new .unwrap()/.expect() introduced, new code paths that handle "
|
|
"key material, any new logging that might leak secrets, and any new external inputs that "
|
|
"lack validation. Produce a concise finding report."),
|
|
],
|
|
|
|
# ── Documentation sprints ─────────────────────────────────────────────────
|
|
|
|
"docs-foundation": [
|
|
("docs-engineer",
|
|
"Create a root-level SECURITY.md file for quicproquo. Include: "
|
|
"(1) Responsible disclosure policy — where to report vulnerabilities (email, PGP key if available). "
|
|
"(2) Scope — what's covered (server, client, core crypto, protocol). "
|
|
"(3) Response timeline — acknowledge within 48h, triage within 7 days, fix within 30 days for critical. "
|
|
"(4) Security contact — project maintainer contact info. "
|
|
"(5) Out-of-scope — social engineering, DoS against test instances, etc. "
|
|
"Read existing docs/SECURITY-AUDIT.md for context on known security posture. "
|
|
"Keep it concise and professional. Follow the format used by major open-source crypto projects."),
|
|
("docs-engineer",
|
|
"Create a root-level CONTRIBUTING.md file for quicproquo. Read the existing guidance in "
|
|
"docs/src/contributing/coding-standards.md and docs/src/contributing/testing.md first. "
|
|
"Include: (1) Development setup (Rust toolchain, Cap'n Proto compiler, SQLCipher). "
|
|
"(2) Building the project (cargo build --workspace, feature flags). "
|
|
"(3) Running tests (cargo test --workspace, E2E with --test-threads 1). "
|
|
"(4) PR process (branch naming, conventional commits, review checklist). "
|
|
"(5) Coding standards summary (link to full docs). "
|
|
"(6) Security requirements for contributions (no unwrap on crypto, zeroize, etc). "
|
|
"Keep it actionable — a new contributor should be able to submit a PR after reading this."),
|
|
("docs-engineer",
|
|
"Write a comprehensive operator deployment guide at docs/src/getting-started/deployment.md. "
|
|
"Read the existing docs/src/getting-started/ pages and docker/Dockerfile first. "
|
|
"Cover: (1) Docker deployment (docker-compose, volume mounts, env vars). "
|
|
"(2) Bare-metal deployment (systemd unit file example, user/group setup). "
|
|
"(3) TLS certificate setup (self-signed for dev, Let's Encrypt for prod). "
|
|
"(4) Configuration reference (all QPQ_* environment variables). "
|
|
"(5) Backup and restore (SQLite/SQLCipher database, key material). "
|
|
"(6) Monitoring (structured log output, health checks). "
|
|
"(7) Troubleshooting common issues. "
|
|
"Update docs/src/SUMMARY.md to include the new page if needed."),
|
|
("docs-engineer",
|
|
"Audit all existing docs/src/ pages for accuracy against the current codebase. "
|
|
"Read each .md file in docs/src/ and cross-reference claims against actual source code. "
|
|
"Fix: (1) Stale API references (function names, struct names that changed). "
|
|
"(2) Broken internal links between docs pages. "
|
|
"(3) Outdated architecture descriptions (e.g. references to MessagePack, old ALPN strings). "
|
|
"(4) Missing entries in docs/src/SUMMARY.md for pages that exist. "
|
|
"Produce a list of all changes made and any issues you couldn't fix."),
|
|
],
|
|
|
|
"docs-api": [
|
|
("docs-engineer",
|
|
"Ensure every public API item in quicproquo-core has a doc comment (/// or //!). "
|
|
"Read crates/quicproquo-core/src/lib.rs to find all pub exports. For each pub fn, "
|
|
"pub struct, pub enum, and pub trait: check if it has a doc comment. If missing, "
|
|
"read the implementation to understand what it does, then add a concise doc comment "
|
|
"with: one-line summary, parameters, return value, errors, and a short example where "
|
|
"appropriate. Run `cargo doc -p quicproquo-core --no-deps` to verify."),
|
|
("docs-engineer",
|
|
"Document all Cap'n Proto schemas in schemas/. For each .capnp file (auth.capnp, "
|
|
"delivery.capnp, federation.capnp, node.capnp): read the schema and the Rust "
|
|
"implementation that uses it. Write or update docs/src/wire-format/ pages with: "
|
|
"(1) Purpose of each interface. (2) Method signatures with parameter semantics. "
|
|
"(3) Error conditions. (4) Example message flows (e.g. auth flow, message send flow). "
|
|
"Ensure docs/src/wire-format/overview.md links to all sub-pages."),
|
|
],
|
|
|
|
# ── Infrastructure sprints ────────────────────────────────────────────────
|
|
|
|
"infra-hardening": [
|
|
("devops-engineer",
|
|
"Fix the Dockerfile at docker/Dockerfile for production readiness. Read it first. "
|
|
"Changes needed: (1) Create a dedicated non-root user 'qpq' (not nobody) with a "
|
|
"specific UID/GID. (2) Set QPQ_DATA_DIR=/var/lib/qpq with correct ownership. "
|
|
"(3) Handle the excluded p2p crate correctly in workspace build. "
|
|
"(4) Add HEALTHCHECK instruction. (5) Use specific base image tags (not :latest). "
|
|
"(6) Ensure COPY commands don't pull in .git, target/, logs/, or test data. "
|
|
"Test with: docker build -f docker/Dockerfile ."),
|
|
("devops-engineer",
|
|
"Harden .gitignore at project root. Read the current .gitignore first. Add missing "
|
|
"patterns: data/, *.der, *.pem, *.db, *.db-shm, *.db-wal, *.bin, *.ks, "
|
|
"qpq-state.*, logs/ai_team/, .env, .env.*, *.key. "
|
|
"Verify no sensitive files are already tracked: run git ls-files for each pattern. "
|
|
"If any are tracked, report them (do NOT remove from git without confirmation)."),
|
|
("devops-engineer",
|
|
"Enhance CI pipeline at .github/workflows/ci.yml. Read it first. Add or verify: "
|
|
"(1) cargo fmt check passes. (2) cargo clippy --workspace -- -D warnings. "
|
|
"(3) cargo test --workspace (with --test-threads 1 for E2E). "
|
|
"(4) cargo deny check runs on every PR. (5) cargo audit as blocking check. "
|
|
"(6) Docker build validation job (docker build -f docker/Dockerfile .). "
|
|
"(7) Rust cache action for faster builds. (8) Matrix for stable + nightly Rust. "
|
|
"Also check .github/CODEOWNERS is correctly configured for crypto paths."),
|
|
],
|
|
|
|
# ── Security sprints ──────────────────────────────────────────────────────
|
|
|
|
"security-full": [
|
|
("security-auditor",
|
|
"Perform a FULL security audit of the entire quicproquo codebase. Read every .rs file "
|
|
"in crates/quicproquo-core/src/, crates/quicproquo-server/src/, and "
|
|
"crates/quicproquo-client/src/. Check every file for ALL of: "
|
|
"(1) .unwrap()/.expect() outside #[cfg(test)] on crypto, I/O, lock, or parse operations. "
|
|
"(2) Key material types missing Zeroize/ZeroizeOnDrop. "
|
|
"(3) Secret bytes (keys, passwords, tokens, nonces) potentially reaching tracing/log/println. "
|
|
"(4) Non-constant-time comparisons on auth tags, tokens, MACs, or passwords. "
|
|
"(5) panic!/unreachable! in production paths. "
|
|
"(6) unsafe blocks without // SAFETY: documentation. "
|
|
"(7) Missing input validation on RPC boundaries (data from network). "
|
|
"(8) Race conditions in shared state (DashMap, Mutex, RwLock). "
|
|
"(9) Replay attack vectors in message delivery. "
|
|
"(10) Timing side channels in OPAQUE or token validation. "
|
|
"Produce a prioritised finding report: Critical > High > Medium > Low > Informational. "
|
|
"Each finding must include: file:line, description, attack scenario, remediation."),
|
|
("security-auditor",
|
|
"Audit infrastructure security. Read docker/Dockerfile, docker-compose.yml, "
|
|
".github/workflows/ci.yml, and all files in scripts/. Check: "
|
|
"(1) Dockerfile: running as root? secrets in ENV/ARG? base image pinned? "
|
|
"(2) docker-compose: volumes expose host paths? ports exposed unnecessarily? "
|
|
"(3) CI: secrets handled correctly? artifact permissions? supply chain attacks? "
|
|
"(4) Scripts: command injection? path traversal? unsafe eval? "
|
|
"(5) Dependencies: check deny.toml config, look for unmaintained/yanked crates. "
|
|
"Produce a separate infrastructure security report."),
|
|
("security-auditor",
|
|
"Review the threat model at docs/src/cryptography/threat-model.md against the current "
|
|
"implementation. Read the threat model doc, then verify each claim: "
|
|
"(1) Are the stated trust boundaries correctly implemented in code? "
|
|
"(2) Does the OPAQUE flow match the documented auth model? "
|
|
"(3) Is the Noise_XX handshake configured as documented (XX pattern, not IK/KK)? "
|
|
"(4) Does the MLS integration follow RFC 9420 as claimed? "
|
|
"(5) Is the hybrid KEM combiner implemented as documented (HKDF-SHA256 with correct info string)? "
|
|
"(6) Are there attack vectors NOT covered by the threat model? "
|
|
"Produce a threat model gap analysis report."),
|
|
],
|
|
|
|
"security-review": [
|
|
("security-auditor",
|
|
"Post-change security review. Read all modified files from the most recent work. "
|
|
"Focus on: any new .unwrap()/.expect() introduced, new code paths handling key material, "
|
|
"new logging that might leak secrets, new external inputs lacking validation, and "
|
|
"any new unsafe blocks. Compare against the engineering standards in master-prompt.md. "
|
|
"Produce a concise pass/fail report with findings."),
|
|
("roadmap-tracker",
|
|
"Quick progress check after recent changes. Read ROADMAP.md and check which Phase 1 "
|
|
"and Phase 2 items have been completed by the recent work. Update the status report "
|
|
"with: items newly completed, items still in progress, next priorities."),
|
|
],
|
|
|
|
# ── Release preparation ───────────────────────────────────────────────────
|
|
|
|
"release-prep": [
|
|
("devops-engineer",
|
|
"Prepare release infrastructure. Read Cargo.toml (workspace root) and all crate "
|
|
"Cargo.toml files. (1) Verify version numbers are consistent across all crates. "
|
|
"(2) Create or update CHANGELOG.md at project root — read git log for recent commits "
|
|
"and categorise by: Added, Changed, Fixed, Security. Follow keepachangelog.com format. "
|
|
"(3) Verify docker/Dockerfile builds successfully with release profile. "
|
|
"(4) Check that cargo package -p quicproquo-server would succeed (dry run). "
|
|
"(5) Verify .github/workflows/ci.yml has a release/tag-triggered job if applicable."),
|
|
("docs-engineer",
|
|
"Final documentation review for release readiness. "
|
|
"(1) Verify README.md: feature matrix matches actual implementation, quick start "
|
|
"instructions work, crate layout is accurate, all badges are correct. "
|
|
"(2) Verify docs/src/getting-started/ pages are up to date. "
|
|
"(3) Check that SECURITY.md and CONTRIBUTING.md exist and are accurate. "
|
|
"(4) Run mdbook build docs/ and verify no warnings. "
|
|
"(5) Produce a docs readiness report: pass/fail with specific issues found."),
|
|
("roadmap-tracker",
|
|
"Final pre-release status report. Read ROADMAP.md completely. Classify every item as: "
|
|
"Complete (implemented + tested), Deferred (not blocking release), or Blocking (must fix "
|
|
"before release). Focus on Phase 1 (Production Hardening) — all items must be Complete "
|
|
"or have documented mitigations. Produce a release readiness assessment."),
|
|
],
|
|
}
|
|
|
|
# ── Production readiness pipeline ─────────────────────────────────────────────
|
|
# Ordered list of sprints that form the full production readiness path.
|
|
# Each sprint must pass its quality gate before the next begins.
|
|
# Sprints within a step run in parallel; steps run sequentially.
|
|
|
|
PIPELINE: list[tuple[str, str]] = [
|
|
("status", "Baseline: assess current state and recent security posture"),
|
|
("audit", "Deep dive: full security audit + detailed roadmap analysis"),
|
|
("phase1-hardening", "Code: eliminate crash paths across all crates (parallel by crate)"),
|
|
("phase1-infra", "Infra: fix Dockerfile, .gitignore, design TLS lifecycle"),
|
|
("infra-hardening", "Infra: CI hardening, Docker production config, .gitignore completion"),
|
|
("phase2-tests", "Tests: E2E coverage, unit tests for untested paths"),
|
|
("docs-foundation", "Docs: SECURITY.md, CONTRIBUTING.md, deployment guide, accuracy audit"),
|
|
("docs-api", "Docs: public API doc comments, Cap'n Proto schema documentation"),
|
|
("security-full", "Security: comprehensive audit of all code + infra + threat model"),
|
|
("security-review", "Gate: post-change security review + progress check"),
|
|
("release-prep", "Release: changelog, version consistency, final docs review"),
|
|
]
|
|
|
|
|
|
# ── Orchestrator system prompt ─────────────────────────────────────────────────
|
|
|
|
ORCHESTRATOR_PROMPT = f"""{PROJECT_CONTEXT}
|
|
|
|
You are the **Orchestrator** for the quicproquo AI development team.
|
|
|
|
Your team of specialist subagents:
|
|
|
|
| Agent | Role | Edits? |
|
|
|-------|------|--------|
|
|
| rust-architect | Architecture design, ADRs, design reviews | No |
|
|
| rust-core-dev | quicproquo-core: crypto, MLS, Noise codec | Yes |
|
|
| rust-server-dev | quicproquo-server: AS, DS, RPC server | Yes |
|
|
| rust-client-dev | quicproquo-client: CLI, REPL, local state | Yes |
|
|
| security-auditor | Security review: code, infra, threat model | No |
|
|
| test-engineer | Unit, integration, E2E tests | Yes (tests) |
|
|
| devops-engineer | Docker, CI/CD, deployment, monitoring | Yes (infra) |
|
|
| docs-engineer | User/operator/developer/protocol docs | Yes (docs) |
|
|
| roadmap-tracker | Roadmap progress assessment | No |
|
|
|
|
Parallelisation rules:
|
|
- Agents that own DIFFERENT crates or concern areas can run in parallel.
|
|
- rust-core-dev, rust-server-dev, rust-client-dev ALWAYS run in parallel (different crates).
|
|
- security-auditor runs AFTER code-changing agents complete (reads their output).
|
|
- test-engineer runs AFTER code-changing agents complete (tests their changes).
|
|
- docs-engineer and devops-engineer can run in parallel with each other and with dev agents.
|
|
- roadmap-tracker can run in parallel with anything (read-only).
|
|
|
|
Workflow:
|
|
1. Read the task carefully.
|
|
2. Decide which agent(s) are needed. For multi-step tasks, sequence them logically.
|
|
3. Maximise parallelism: launch agents that touch different files simultaneously.
|
|
4. Call each required agent with a precise, scoped prompt.
|
|
5. Synthesise the agents' outputs into a final report or code deliverable.
|
|
6. Always end with: "Next suggested task: ..." based on the ROADMAP.
|
|
|
|
Rules:
|
|
- Read master-prompt.md and ROADMAP.md before delegating significant tasks.
|
|
- Do NOT delegate everything to one agent — split by crate/concern.
|
|
- If a task touches security, always invoke security-auditor AFTER code changes.
|
|
- If a task adds/modifies functionality, always invoke test-engineer LAST.
|
|
- docs-engineer and devops-engineer work independently — launch them in parallel.
|
|
- Keep your synthesis concise — prefer structured output (headers, bullet lists).
|
|
"""
|
|
|
|
|
|
# ── Parallel runner ────────────────────────────────────────────────────────────
|
|
|
|
async def run_agent_to_file(
|
|
agent_name: str,
|
|
task: str,
|
|
max_turns: int,
|
|
output_dir: Path,
|
|
label: str,
|
|
) -> tuple[str, str, str | None]:
|
|
"""
|
|
Run a single agent and stream its result to an output file.
|
|
|
|
Returns (agent_name, label, result_text_or_None).
|
|
`result_text` is None if the agent produced no ResultMessage.
|
|
"""
|
|
output_file = output_dir / f"{label}.md"
|
|
result_text: str | None = None
|
|
|
|
agent = AGENTS[agent_name]
|
|
options = ClaudeAgentOptions(
|
|
cwd=PROJECT_ROOT,
|
|
allowed_tools=agent.tools or ["Read", "Glob", "Grep"],
|
|
system_prompt=agent.prompt,
|
|
max_turns=max_turns,
|
|
permission_mode="acceptEdits",
|
|
setting_sources=["project"],
|
|
)
|
|
|
|
with open(output_file, "w") as f:
|
|
f.write(f"# Agent: {agent_name}\n\n")
|
|
f.write(f"**Task:** {task}\n\n")
|
|
f.write(f"**Started:** {datetime.now().isoformat()}\n\n---\n\n")
|
|
|
|
async for message in query(prompt=task, options=options):
|
|
if isinstance(message, ResultMessage):
|
|
result_text = message.result
|
|
f.write(f"## Result\n\n{result_text}\n")
|
|
f.write(f"\n**Finished:** {datetime.now().isoformat()}\n")
|
|
|
|
return agent_name, label, result_text
|
|
|
|
|
|
async def run_parallel(
|
|
agent_tasks: list[tuple[str, str]],
|
|
max_turns: int,
|
|
verbose: bool,
|
|
sprint_name: str = "custom",
|
|
) -> None:
|
|
"""Launch all (agent, task) pairs concurrently and print a summary when done."""
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
output_dir = Path(PROJECT_ROOT) / "logs" / "ai_team" / f"{sprint_name}_{timestamp}"
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"\n{'═' * 70}")
|
|
print(f" quicproquo AI Team — Parallel Sprint: {sprint_name}")
|
|
print(f" Agents: {len(agent_tasks)} | Max turns each: {max_turns}")
|
|
print(f" Logs: {output_dir}/")
|
|
print(f"{'═' * 70}\n")
|
|
|
|
for i, (agent, task) in enumerate(agent_tasks, 1):
|
|
label = f"{i:02d}_{agent}"
|
|
print(f" [{i}] {agent}")
|
|
print(f" {task[:80]}{'…' if len(task) > 80 else ''}")
|
|
print()
|
|
|
|
# Build coroutines with stable labels for output files.
|
|
coros = [
|
|
run_agent_to_file(agent, task, max_turns, output_dir, f"{i:02d}_{agent}")
|
|
for i, (agent, task) in enumerate(agent_tasks, 1)
|
|
]
|
|
|
|
print(f" Starting {len(coros)} agents in parallel…\n")
|
|
results = await asyncio.gather(*coros, return_exceptions=True)
|
|
|
|
print(f"\n{'─' * 70}")
|
|
print(" SPRINT RESULTS")
|
|
print(f"{'─' * 70}")
|
|
|
|
success = 0
|
|
for result in results:
|
|
if isinstance(result, Exception):
|
|
print(f"\n ❌ ERROR: {result}")
|
|
else:
|
|
agent_name, label, text = result
|
|
if text is not None:
|
|
success += 1
|
|
print(f"\n ✅ {agent_name} ({label}.md)")
|
|
# Show first 300 chars of result as a preview.
|
|
preview = text.strip()[:300]
|
|
for line in preview.splitlines():
|
|
print(f" {line}")
|
|
if len(text.strip()) > 300:
|
|
print(" …")
|
|
else:
|
|
print(f"\n ⚠️ {agent_name}: no result produced")
|
|
|
|
print(f"\n {success}/{len(agent_tasks)} agents completed successfully.")
|
|
print(f" Full outputs: {output_dir}/\n")
|
|
|
|
|
|
# ── Sequential runners ─────────────────────────────────────────────────────────
|
|
|
|
async def run_orchestrator(task: str, max_turns: int, verbose: bool) -> None:
|
|
"""Run the full team via the orchestrator."""
|
|
print(f"\n{'═' * 70}")
|
|
print(f" quicproquo AI Team — Orchestrator")
|
|
print(f" Task: {task[:72]}{'…' if len(task) > 72 else ''}")
|
|
print(f"{'═' * 70}\n")
|
|
|
|
options = ClaudeAgentOptions(
|
|
cwd=PROJECT_ROOT,
|
|
allowed_tools=["Read", "Glob", "Grep", "Agent"],
|
|
system_prompt=ORCHESTRATOR_PROMPT,
|
|
agents=AGENTS,
|
|
max_turns=max_turns,
|
|
permission_mode="acceptEdits",
|
|
setting_sources=["project"],
|
|
)
|
|
|
|
async for message in query(prompt=task, options=options):
|
|
if isinstance(message, ResultMessage):
|
|
print("\n" + "─" * 70)
|
|
print("RESULT")
|
|
print("─" * 70)
|
|
print(message.result)
|
|
elif verbose:
|
|
if isinstance(message, AssistantMessage):
|
|
for block in message.content:
|
|
if isinstance(block, TextBlock) and block.text.strip():
|
|
print(block.text, end="", flush=True)
|
|
elif isinstance(message, SystemMessage) and message.subtype == "init":
|
|
print(f"[Session: {message.session_id}]")
|
|
|
|
|
|
async def run_single_agent(
|
|
agent_name: str, task: str, max_turns: int, verbose: bool
|
|
) -> None:
|
|
"""Bypass the orchestrator and run a single specialist agent directly."""
|
|
agent = AGENTS[agent_name]
|
|
print(f"\n{'═' * 70}")
|
|
print(f" quicproquo AI Team — {agent_name}")
|
|
print(f" Task: {task[:72]}{'…' if len(task) > 72 else ''}")
|
|
print(f"{'═' * 70}\n")
|
|
|
|
options = ClaudeAgentOptions(
|
|
cwd=PROJECT_ROOT,
|
|
allowed_tools=agent.tools or ["Read", "Glob", "Grep"],
|
|
system_prompt=agent.prompt,
|
|
max_turns=max_turns,
|
|
permission_mode="acceptEdits",
|
|
setting_sources=["project"],
|
|
)
|
|
|
|
async for message in query(prompt=task, options=options):
|
|
if isinstance(message, ResultMessage):
|
|
print("\n" + "─" * 70)
|
|
print("RESULT")
|
|
print("─" * 70)
|
|
print(message.result)
|
|
elif verbose:
|
|
if isinstance(message, AssistantMessage):
|
|
for block in message.content:
|
|
if isinstance(block, TextBlock) and block.text.strip():
|
|
print(block.text, end="", flush=True)
|
|
|
|
|
|
# ── CLI ────────────────────────────────────────────────────────────────────────
|
|
|
|
def build_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser(
|
|
prog="ai_team",
|
|
description="quicproquo multi-agent Claude team",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=__doc__,
|
|
)
|
|
parser.add_argument(
|
|
"task",
|
|
nargs="?",
|
|
help="Task description for the orchestrator",
|
|
)
|
|
parser.add_argument(
|
|
"--agent", "-a",
|
|
choices=list(AGENTS.keys()),
|
|
default=None,
|
|
help="Bypass orchestrator and send task directly to a specific agent",
|
|
)
|
|
parser.add_argument(
|
|
"--sprint", "-s",
|
|
choices=list(SPRINTS.keys()),
|
|
default=None,
|
|
metavar="SPRINT",
|
|
help="Run a predefined parallel sprint (see --list-sprints)",
|
|
)
|
|
parser.add_argument(
|
|
"--parallel", "-p",
|
|
nargs="+",
|
|
metavar="AGENT:TASK",
|
|
default=None,
|
|
help=(
|
|
'Ad-hoc parallel run. Each argument is "agent-name: task description". '
|
|
'Example: --parallel "rust-server-dev: Fix unwrap() in server" '
|
|
'"security-auditor: Audit core crate"'
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--list-agents", "-l",
|
|
action="store_true",
|
|
help="List available agents and exit",
|
|
)
|
|
parser.add_argument(
|
|
"--list-sprints",
|
|
action="store_true",
|
|
help="List predefined sprints and exit",
|
|
)
|
|
parser.add_argument(
|
|
"--pipeline",
|
|
action="store_true",
|
|
help="Run the full production readiness pipeline (all sprints in dependency order)",
|
|
)
|
|
parser.add_argument(
|
|
"--pipeline-from",
|
|
metavar="SPRINT",
|
|
default=None,
|
|
help="Start the pipeline from a specific sprint (skip earlier steps)",
|
|
)
|
|
parser.add_argument(
|
|
"--max-turns",
|
|
type=int,
|
|
default=60,
|
|
help="Maximum agentic turns per agent (default: 60)",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v",
|
|
action="store_true",
|
|
help="Print all message types (not just results)",
|
|
)
|
|
return parser
|
|
|
|
|
|
def list_agents() -> None:
|
|
print("Available agents:\n")
|
|
for name, defn in AGENTS.items():
|
|
print(f" {name}")
|
|
desc = defn.description
|
|
wrapped = "\n ".join(
|
|
desc[i : i + 72] for i in range(0, len(desc), 72)
|
|
)
|
|
print(f" {wrapped}\n")
|
|
|
|
|
|
def list_sprints() -> None:
|
|
print("Predefined sprints:\n")
|
|
for name, tasks in SPRINTS.items():
|
|
print(f" {name} ({len(tasks)} agents in parallel)")
|
|
for agent, task in tasks:
|
|
preview = task[:60] + ("…" if len(task) > 60 else "")
|
|
print(f" [{agent}] {preview}")
|
|
print()
|
|
|
|
print("Production readiness pipeline (--pipeline):\n")
|
|
for i, (sprint_name, description) in enumerate(PIPELINE, 1):
|
|
count = len(SPRINTS[sprint_name])
|
|
print(f" {i:2d}. {sprint_name:<20s} {count} agent(s) — {description}")
|
|
print()
|
|
|
|
|
|
def parse_parallel_args(args: list[str]) -> list[tuple[str, str]]:
|
|
"""
|
|
Parse --parallel arguments of the form "agent-name: task description".
|
|
The colon after the agent name is required.
|
|
"""
|
|
pairs: list[tuple[str, str]] = []
|
|
valid = set(AGENTS.keys())
|
|
for arg in args:
|
|
if ":" not in arg:
|
|
print(f"ERROR: --parallel argument missing colon separator: {arg!r}")
|
|
print(" Expected format: \"agent-name: task description\"")
|
|
sys.exit(1)
|
|
agent, _, task = arg.partition(":")
|
|
agent = agent.strip()
|
|
task = task.strip()
|
|
if agent not in valid:
|
|
print(f"ERROR: unknown agent {agent!r}. Valid: {', '.join(sorted(valid))}")
|
|
sys.exit(1)
|
|
if not task:
|
|
print(f"ERROR: empty task for agent {agent!r}")
|
|
sys.exit(1)
|
|
pairs.append((agent, task))
|
|
return pairs
|
|
|
|
|
|
# ── Pipeline runner ────────────────────────────────────────────────────────────
|
|
|
|
async def run_pipeline(
|
|
max_turns: int,
|
|
verbose: bool,
|
|
start_from: str | None = None,
|
|
) -> None:
|
|
"""
|
|
Run the full production readiness pipeline: all sprints in dependency order.
|
|
|
|
Each sprint runs its agents in parallel. Sprints run sequentially because
|
|
later sprints depend on earlier ones (e.g. security-review after code changes).
|
|
|
|
If start_from is set, skip all sprints before that one.
|
|
"""
|
|
pipeline = list(PIPELINE)
|
|
if start_from:
|
|
names = [name for name, _ in PIPELINE]
|
|
if start_from not in names:
|
|
print(f"ERROR: unknown sprint {start_from!r} in pipeline.")
|
|
print(f" Valid: {', '.join(names)}")
|
|
sys.exit(1)
|
|
idx = names.index(start_from)
|
|
pipeline = pipeline[idx:]
|
|
print(f"\n Skipping {idx} sprint(s), starting from: {start_from}\n")
|
|
|
|
total = len(pipeline)
|
|
print(f"\n{'=' * 70}")
|
|
print(f" quicproquo AI Team — Production Readiness Pipeline")
|
|
print(f" Steps: {total} | Max turns per agent: {max_turns}")
|
|
print(f"{'=' * 70}")
|
|
for i, (name, desc) in enumerate(pipeline, 1):
|
|
count = len(SPRINTS[name])
|
|
print(f" {i:2d}. [{name}] {count} agent(s) — {desc}")
|
|
print(f"{'=' * 70}\n")
|
|
|
|
for step, (sprint_name, description) in enumerate(pipeline, 1):
|
|
print(f"\n{'#' * 70}")
|
|
print(f" PIPELINE STEP {step}/{total}: {sprint_name}")
|
|
print(f" {description}")
|
|
print(f"{'#' * 70}\n")
|
|
|
|
agent_tasks = SPRINTS[sprint_name]
|
|
await run_parallel(
|
|
agent_tasks, max_turns, verbose, sprint_name=sprint_name
|
|
)
|
|
|
|
if step < total:
|
|
print(f"\n Step {step}/{total} complete. Proceeding to next step...\n")
|
|
|
|
print(f"\n{'=' * 70}")
|
|
print(f" PIPELINE COMPLETE — {total} steps executed")
|
|
print(f" Review outputs in: logs/ai_team/")
|
|
print(f"{'=' * 70}\n")
|
|
|
|
|
|
# ── Entry point ────────────────────────────────────────────────────────────────
|
|
|
|
async def main() -> None:
|
|
parser = build_parser()
|
|
args = parser.parse_args()
|
|
|
|
if args.list_agents:
|
|
list_agents()
|
|
return
|
|
|
|
if args.list_sprints:
|
|
list_sprints()
|
|
return
|
|
|
|
if not os.environ.get("ANTHROPIC_API_KEY"):
|
|
print("ERROR: ANTHROPIC_API_KEY environment variable is not set.")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
if args.pipeline or args.pipeline_from:
|
|
await run_pipeline(
|
|
args.max_turns, args.verbose, start_from=args.pipeline_from
|
|
)
|
|
|
|
elif args.sprint:
|
|
agent_tasks = SPRINTS[args.sprint]
|
|
await run_parallel(
|
|
agent_tasks, args.max_turns, args.verbose, sprint_name=args.sprint
|
|
)
|
|
|
|
elif args.parallel:
|
|
agent_tasks = parse_parallel_args(args.parallel)
|
|
await run_parallel(
|
|
agent_tasks, args.max_turns, args.verbose, sprint_name="custom"
|
|
)
|
|
|
|
elif args.agent:
|
|
if not args.task:
|
|
print("ERROR: --agent requires a task argument.")
|
|
sys.exit(1)
|
|
await run_single_agent(
|
|
args.agent, args.task, args.max_turns, args.verbose
|
|
)
|
|
|
|
elif args.task:
|
|
await run_orchestrator(args.task, args.max_turns, args.verbose)
|
|
|
|
else:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
except CLINotFoundError:
|
|
print(
|
|
"\nERROR: Claude Code CLI not found.\n"
|
|
"Install with: pip install claude-agent-sdk"
|
|
)
|
|
sys.exit(1)
|
|
except CLIConnectionError as e:
|
|
print(f"\nERROR: Connection error: {e}")
|
|
sys.exit(1)
|
|
except KeyboardInterrupt:
|
|
print("\n\nInterrupted.")
|
|
sys.exit(0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|