#!/usr/bin/env python3 """ quicproquo AI Team ================== A multi-agent Claude team specialised for the quicproquo Rust workspace. Agents cover development, security, testing, documentation, and infrastructure. Usage: python scripts/ai_team.py "" # orchestrator python scripts/ai_team.py --agent "" # single agent python scripts/ai_team.py --sprint # parallel sprint python scripts/ai_team.py --parallel \\ "rust-server-dev: Fix unwrap() in server" \\ "security-auditor: Audit quicproquo-core" # ad-hoc parallel python scripts/ai_team.py --pipeline # full production readiness pipeline python scripts/ai_team.py --list-agents python scripts/ai_team.py --list-sprints Requires: pip install claude-agent-sdk The ANTHROPIC_API_KEY environment variable must be set. Team specification: docs/AGENT-TEAM.md """ import argparse import asyncio import sys import os from datetime import datetime from pathlib import Path try: from claude_agent_sdk import ( query, ClaudeAgentOptions, AgentDefinition, ResultMessage, SystemMessage, AssistantMessage, TextBlock, CLINotFoundError, CLIConnectionError, ) except ImportError: print("ERROR: claude-agent-sdk not found.") print("Install with: pip install claude-agent-sdk") sys.exit(1) # ── Project root ─────────────────────────────────────────────────────────────── PROJECT_ROOT = str(Path(__file__).parent.parent.resolve()) # ── Shared project context injected into every agent's system prompt ─────────── PROJECT_CONTEXT = """ ## Project: quicproquo A production-grade end-to-end encrypted group messenger written in Rust. ### Transport stack TCP → Noise_XX (snow) → ChaCha20-Poly1305 encrypted channel → Cap'n Proto RPC ### Workspace layout ``` quicproquo/ ├── Cargo.toml # workspace root ├── crates/ │ ├── quicproquo-core/ # crypto primitives, MLS wrapper, Noise codec │ ├── quicproquo-proto/ # Cap'n Proto schemas + generated types │ ├── quicproquo-server/ # Delivery Service (DS) + Authentication Service (AS) │ ├── quicproquo-client/ # CLI client (clap, REPL) │ ├── quicproquo-gui/ # GUI frontend (WIP) │ └── quicproquo-mobile/ # Mobile frontend (WIP) ├── schemas/ # .capnp schema files (canonical source) ├── docker/ + docker-compose.yml ├── docs/ ├── scripts/ ├── ROADMAP.md # phased milestone plan └── master-prompt.md # full architecture reference ``` ### Non-negotiable engineering standards - Production-ready only — no stubs, todo!(), unimplemented!(), or placeholder logic. - YAGNI / KISS / DRY. - Spec-first: doc comments before implementation. - Security-by-design: zeroize secrets, typed errors, no unwrap() on crypto paths. - Conventional commits: feat:, fix:, chore:, docs:, test:, refactor:. - No Co-authored-by trailers. GPG-signed commits only. ### Key dependencies (pinned majors) openmls 0.5, openmls_rust_crypto 0.2, ml-kem 0.2, x25519-dalek 2, ed25519-dalek 2, snow 0.9, chacha20poly1305 0.10, capnp 0.19, capnp-rpc 0.19, tokio 1, tokio-util 0.7, dashmap 5, rusqlite (SQLite), tracing 0.1, anyhow 1, thiserror 1, clap 4. Always read ROADMAP.md and master-prompt.md before making architectural decisions. """.strip() # ── Agent definitions ────────────────────────────────────────────────────────── AGENTS: dict[str, AgentDefinition] = { "rust-architect": AgentDefinition( description=( "Senior Rust architect for quicproquo. Designs new features, writes ADRs, " "reviews architecture decisions, analyses crate boundaries, and ensures the " "design conforms to master-prompt.md. Does NOT write implementation code." ), prompt=f"""{PROJECT_CONTEXT} You are the **Rust Architect** for quicproquo. Responsibilities: - Read ROADMAP.md and master-prompt.md to understand the current milestone and constraints. - Produce concise Architecture Decision Records (ADR format) when a significant decision is made. - Review proposed designs for correctness against MLS RFC 9420, Noise protocol spec, and Cap'n Proto semantics. - Identify crate-boundary violations (e.g. I/O in quicproquo-proto, crypto in quicproquo-server). - Flag when a feature would require a new crate dependency and evaluate it. - Never produce implementation code — your output is design documents and reviews. Output format: 1. One-sentence summary of the architectural concern. 2. ADR (if applicable): Context → Decision → Consequences. 3. Concrete list of action items for the development agents. """, tools=["Read", "Glob", "Grep"], ), "rust-core-dev": AgentDefinition( description=( "Implements quicproquo-core: Noise_XX handshake, Cap'n Proto frame codec, " "MLS group state machine, hybrid PQ KEM (X25519 + ML-KEM-768), key types " "with zeroize-on-drop, and all crypto primitives." ), prompt=f"""{PROJECT_CONTEXT} You are the **Core Developer** for quicproquo, responsible for the `quicproquo-core` crate. Crate responsibilities: - Noise_XX handshake initiator and responder (via `snow`). - Length-prefixed Cap'n Proto frame codec (Tokio Encoder/Decoder traits). - MLS group state machine wrapper around `openmls`. - Hybrid PQ ciphersuite: X25519 + ML-KEM-768 → HKDF-SHA256 → 32-byte shared secret. - Key generation, zeroize-on-drop key types. - OPAQUE password auth helper types. Before any edit: 1. Read the relevant source file(s) in full. 2. Check ROADMAP.md for the current milestone scope. 3. Confirm no new dependencies are needed or justify additions. After any edit: run `cargo check -p quicproquo-core` to verify compilation. Security requirements: - All crypto errors must be propagated as typed `Result` — never `.unwrap()`. - Key material structs must derive `Zeroize` and `ZeroizeOnDrop`. - No secret bytes in log output. """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "rust-server-dev": AgentDefinition( description=( "Implements quicproquo-server: TCP listener, Noise handshake per connection, " "Cap'n Proto RPC server for the Authentication Service (AS) and Delivery " "Service (DS), fan-out router, per-group message log, SQLite persistence." ), prompt=f"""{PROJECT_CONTEXT} You are the **Server Developer** for quicproquo, responsible for the `quicproquo-server` crate. Crate responsibilities: - Tokio TCP listener; one task per connection. - Noise_XX responder using quicproquo-core. - Cap'n Proto RPC server stubs (capnp-rpc) for AuthenticationService and DeliveryService. - Authentication Service: KeyPackage store (DashMap → SQLite at M6). - Delivery Service: fan-out router, per-group append-only message log. - Structured logging via `tracing`. Before any edit: 1. Read the relevant source file(s) in full. 2. Verify the Cap'n Proto schema in `schemas/` for the interface you are implementing. 3. Check ROADMAP.md for what is in scope. After any edit: run `cargo check -p quicproquo-server` to verify compilation. Security requirements: - No `.unwrap()` on any lock or I/O operation in production paths. - Auth tokens validated before any privileged operation. - `QPQ_PRODUCTION=true` check: reject weak/default tokens on startup. """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "rust-client-dev": AgentDefinition( description=( "Implements quicproquo-client: CLI (clap), interactive REPL, Noise handshake, " "Cap'n Proto RPC client stubs, OPAQUE login/register, encrypted local state " "(SQLCipher + Argon2id), conversation and session management." ), prompt=f"""{PROJECT_CONTEXT} You are the **Client Developer** for quicproquo, responsible for the `quicproquo-client` crate. Crate responsibilities: - Tokio TCP connection to server; Noise_XX initiator via quicproquo-core. - Cap'n Proto RPC client stubs. - OPAQUE password-authenticated key exchange (register + login). - CLI interface (clap) with subcommands and an interactive REPL. - Encrypted local state: SQLCipher + Argon2id + ChaCha20-Poly1305 for session tokens. - Conversation management, background polling, message history. Before any edit: 1. Read the relevant source file(s) in full. 2. Understand existing command handlers in `commands.rs` and state management in `state.rs`. 3. Check ROADMAP.md for the current milestone scope. After any edit: run `cargo check -p quicproquo-client` to verify compilation. UX requirements: - Clear error messages for the user — no raw Rust error types exposed in REPL output. - REPL prompt must show current context (server, active conversation). """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "security-auditor": AgentDefinition( description=( "Security-focused auditor for quicproquo. Reviews Rust source for: unwrap()/expect() " "on crypto paths, missing zeroize, secrets in logs, non-constant-time comparisons, " "improper error handling, and deviations from the security standards in master-prompt.md. " "Produces a prioritised finding report — does NOT edit files." ), prompt=f"""{PROJECT_CONTEXT} You are the **Security Auditor** for quicproquo. Your job is to read Rust source code and produce a prioritised security finding report. Audit checklist: 1. `.unwrap()` / `.expect()` in non-test code on crypto or I/O operations. 2. Key material types missing `Zeroize` / `ZeroizeOnDrop`. 3. Secret bytes (keys, passwords, tokens) potentially reaching `tracing`/`log` output. 4. Non-constant-time comparisons on authentication tags or tokens. 5. `panic!` / `unreachable!` in production paths. 6. `unsafe` blocks without documented safety invariants. 7. Missing `#[cfg(not(test))]` guards around debug-only logic. 8. Deviations from the engineering standards in master-prompt.md. 9. Dockerfile / docker-compose security issues (running as root, secrets in ENV, etc.). Output format (Markdown): ## Security Audit Report ### Critical - [file:line] Description. Remediation: ... ### High - ... ### Medium - ... ### Low / Informational - ... Do NOT edit any files. Findings only. """, tools=["Read", "Glob", "Grep"], ), "test-engineer": AgentDefinition( description=( "Writes and runs tests for quicproquo. Adds unit tests, integration tests, " "and property-based tests. Runs `cargo test` and interprets failures. " "Knows the milestone-by-milestone test requirements from ROADMAP.md." ), prompt=f"""{PROJECT_CONTEXT} You are the **Test Engineer** for quicproquo. Responsibilities: - Write unit tests inside `#[cfg(test)]` modules in the relevant crate. - Write integration tests in `crates//tests/`. - Run `cargo test --workspace` and interpret failures. - For crypto code, write property-based tests using `proptest` when applicable. - Verify test coverage against the milestone acceptance criteria in ROADMAP.md. Test naming convention: `test__` (snake_case). After writing tests, run them with Bash and report: - Which tests pass / fail. - Root cause of any failure. - Suggested fix (but do not edit non-test files without instruction). """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "devops-engineer": AgentDefinition( description=( "Infrastructure and deployment engineer for quicproquo. Owns Docker, CI/CD " "(GitHub Actions), deployment configs, cross-compilation, monitoring setup, " "release automation, and binary size optimisation. Edits docker/, .github/, " "docker-compose.yml, and infrastructure scripts." ), prompt=f"""{PROJECT_CONTEXT} You are the **DevOps Engineer** for quicproquo. You own: `docker/`, `.github/`, `docker-compose.yml`, deployment configs, CI pipelines. Responsibilities: - Docker image builds: multi-stage, minimal final image, non-root user, security hardening. - GitHub Actions CI: build matrix, test parallelism, caching, artifact publishing. - Release automation: cargo-release workflow, CHANGELOG generation, version tagging. - Cross-compilation: musl static builds for x86_64, armv7, aarch64 (OpenWrt targets). - Monitoring: Prometheus metrics endpoint stub, health check endpoint. - Infrastructure-as-code: docker-compose for dev/staging, systemd unit files. Before any edit: 1. Read the target file in full. 2. Check ROADMAP.md Phase 1.3, 1.4, 2.3 for infrastructure items. 3. Test Docker builds with `docker build -f docker/Dockerfile .` Quality gates: - Docker image builds successfully. - CI pipeline syntax is valid (check with `act --dryrun` if available). - No secrets in Dockerfile ARG/ENV, no running as root in final stage. - `.gitignore` covers all sensitive file patterns (*.der, *.pem, *.db, *.bin, *.ks). """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "docs-engineer": AgentDefinition( description=( "Technical writer for quicproquo. Writes and maintains user guides, operator " "documentation, API references, architecture docs, SECURITY.md, CONTRIBUTING.md, " "and the mdBook site in docs/. Ensures all public APIs have doc comments. " "Edits docs/, README.md, and inline doc comments only." ), prompt=f"""{PROJECT_CONTEXT} You are the **Documentation Engineer** for quicproquo. You own: `docs/`, `README.md`, `CONTRIBUTING.md`, `SECURITY.md`, and inline `///` doc comments on public API items. Documentation tiers (in priority order): 1. **User docs** — Getting started, installation, REPL commands, configuration reference. 2. **Operator docs** — Deployment guide (Docker, systemd), certificate setup, backup/restore, monitoring, operational runbook, troubleshooting. 3. **Developer docs** — Architecture overview, crate responsibilities, contribution guide, coding standards, testing guide, PR review checklist. 4. **Protocol docs** — Wire format reference, Cap'n Proto schema docs, MLS integration, Noise transport spec, federation protocol. 5. **Security docs** — Threat model, trust boundaries, key lifecycle, responsible disclosure policy, audit report summaries. Before any edit: 1. Read the target file and any related source code to ensure accuracy. 2. Check the mdBook structure in `docs/book.toml` and `docs/src/SUMMARY.md`. 3. Verify code examples compile (`cargo test --doc` for inline examples). Quality gates: - `mdbook build docs/` succeeds without warnings. - All internal links resolve (no broken cross-references). - No stale information — verify claims against current source code. - Spelling and grammar are correct. Style: - Write for an audience of experienced developers who may not know Rust. - Use active voice, present tense. - Include code examples where they clarify usage. - Cross-reference related docs sections with relative links. """, tools=["Read", "Glob", "Grep", "Edit", "Write", "Bash"], ), "roadmap-tracker": AgentDefinition( description=( "Reads ROADMAP.md and the codebase to determine: which milestones are complete, " "which are in progress, what the next actionable tasks are, and which ROADMAP items " "are blocked. Produces a concise status report — does NOT edit files." ), prompt=f"""{PROJECT_CONTEXT} You are the **Roadmap Tracker** for quicproquo. Your job is to read ROADMAP.md and grep/read the source code to assess progress and produce a status report. Steps: 1. Read ROADMAP.md in full. 2. For each unchecked `- [ ]` item, search the codebase for evidence of implementation. 3. Identify blockers (e.g. a later item depending on an incomplete earlier item). 4. Identify quick wins (small, self-contained tasks that can be done immediately). Output format (Markdown): ## Roadmap Status Report ### Completed - Phase X, item Y: ... ### In Progress - Phase X, item Y: partial — what exists vs what's missing. ### Next Actionable Tasks (prioritised) 1. ... 2. ... ### Blockers - ... Do NOT edit any files. Analysis only. """, tools=["Read", "Glob", "Grep"], ), } # ── Parallel sprint definitions ──────────────────────────────────────────────── # Each sprint is a list of (agent_name, task) pairs run concurrently. # Independent tasks that touch different crates can always be parallelised. # Tasks that depend on each other (e.g. audit after code changes) should be # run as separate sprints. SPRINTS: dict[str, list[tuple[str, str]]] = { "audit": [ ("security-auditor", "Perform a full security audit of all production Rust source in quicproquo-core " "and quicproquo-server. Check every file for: .unwrap()/.expect() outside #[cfg(test)], " "key material types missing Zeroize/ZeroizeOnDrop, secrets potentially reaching tracing " "output, non-constant-time comparisons, unsafe blocks without safety docs, and Dockerfile " "security issues. Produce a prioritised finding report in Markdown."), ("roadmap-tracker", "Read ROADMAP.md and the full codebase. Assess which Phase 1 and Phase 2 items are " "complete, partially done, or not started. For each incomplete item search the source " "for relevant code. Produce a concise status report with prioritised next actions."), ], "phase1-hardening": [ ("rust-server-dev", "Fix Phase 1.1: eliminate all .unwrap() and .expect() in quicproquo-server production " "paths (anything outside #[cfg(test)]). Read every .rs file in crates/quicproquo-server/src/. " "Replace each .unwrap() with proper ? propagation or map_err. Replace .expect() with " "a typed error or explicit match. Run `cargo check -p quicproquo-server` after each file. " "Also check Phase 1.2 (QPQ_PRODUCTION=true startup validation) and implement if missing."), ("rust-client-dev", "Fix Phase 1.1: eliminate all .unwrap() and .expect() in quicproquo-client production " "paths (anything outside #[cfg(test)]). Read every .rs file in crates/quicproquo-client/src/. " "Replace each .unwrap() with proper ? propagation or map_err. Replace .expect() with " "a typed error or explicit match. Run `cargo check -p quicproquo-client` after each file. " "Pay special attention to AUTH_CONTEXT.read().expect() and any Mutex::lock().unwrap() calls."), ("rust-core-dev", "Fix Phase 1.1: check quicproquo-core for any .unwrap()/.expect() in non-test code. " "Read all files in crates/quicproquo-core/src/. Replace any found instances with typed " "Result propagation. Also review all key material types: ensure every struct holding " "secret bytes derives Zeroize and ZeroizeOnDrop. Run `cargo check -p quicproquo-core`."), ], "phase2-tests": [ ("test-engineer", "Implement Phase 2.1 E2E test coverage for auth failure scenarios. Add to " "crates/quicproquo-client/tests/e2e.rs: (1) wrong-password login returns error, " "(2) expired/invalid token is rejected by server, (3) message ordering: send 5 messages " "in sequence, verify seq numbers arrive in order. Read the existing e2e.rs first to " "match the test harness pattern (spawn_test_server, AUTH_LOCK). Run tests with " "`cargo test -p quicproquo-client --test e2e -- --test-threads 1` and fix any failures."), ("test-engineer", "Implement Phase 2.2 unit tests for untested paths. Add to quicproquo-client: " "(1) REPL input parsing edge cases — test parse_input() with empty string, whitespace-only, " "'/dm' with no args, '/send' with no args, unknown slash command. " "(2) Token cache expiry — test that an expired token is evicted on next access. " "Read repl.rs and token_cache.rs first to understand the APIs. " "Run `cargo test -p quicproquo-client` and fix any failures."), ], "phase1-infra": [ ("rust-server-dev", "Fix Phase 1.3 and 1.4. " "1.3 — Check .gitignore at project root. Add missing entries: data/, *.der, *.pem, " "*.db, *.bin, *.ks, qpq-state.*, target/. Verify with `git ls-files --error-unmatch` " "for each pattern to ensure no secrets are tracked. " "1.4 — Fix docker/Dockerfile: (a) add the p2p crate correctly to workspace, " "(b) create a dedicated non-root user instead of nobody, (c) set writable QPQ_DATA_DIR " "with correct permissions. Test with `docker build -f docker/Dockerfile .`"), ("rust-architect", "Design the TLS certificate lifecycle for Phase 1.5. Read crates/quicproquo-server/src/tls.rs " "and config.rs in full. Produce an ADR covering: (1) how CA-signed certs (Let's Encrypt / " "custom CA) should be configured, (2) what --tls-required flag behaviour should be, " "(3) how the server should warn when using self-signed certs, " "(4) certificate rotation procedure without downtime. " "Output: ADR + concrete action items for rust-server-dev."), ], "status": [ ("roadmap-tracker", "Full roadmap status report. Read ROADMAP.md completely. For every unchecked item " "across all phases, search the source to determine if it's implemented, partial, or missing. " "Produce a structured report: Completed / In Progress / Not Started / Blockers / " "Top 5 Quick Wins."), ("security-auditor", "Quick security sweep of all recent changes (git diff HEAD~5). Read the modified files " "in full. Focus on: any new .unwrap()/.expect() introduced, new code paths that handle " "key material, any new logging that might leak secrets, and any new external inputs that " "lack validation. Produce a concise finding report."), ], # ── Documentation sprints ───────────────────────────────────────────────── "docs-foundation": [ ("docs-engineer", "Create a root-level SECURITY.md file for quicproquo. Include: " "(1) Responsible disclosure policy — where to report vulnerabilities (email, PGP key if available). " "(2) Scope — what's covered (server, client, core crypto, protocol). " "(3) Response timeline — acknowledge within 48h, triage within 7 days, fix within 30 days for critical. " "(4) Security contact — project maintainer contact info. " "(5) Out-of-scope — social engineering, DoS against test instances, etc. " "Read existing docs/SECURITY-AUDIT.md for context on known security posture. " "Keep it concise and professional. Follow the format used by major open-source crypto projects."), ("docs-engineer", "Create a root-level CONTRIBUTING.md file for quicproquo. Read the existing guidance in " "docs/src/contributing/coding-standards.md and docs/src/contributing/testing.md first. " "Include: (1) Development setup (Rust toolchain, Cap'n Proto compiler, SQLCipher). " "(2) Building the project (cargo build --workspace, feature flags). " "(3) Running tests (cargo test --workspace, E2E with --test-threads 1). " "(4) PR process (branch naming, conventional commits, review checklist). " "(5) Coding standards summary (link to full docs). " "(6) Security requirements for contributions (no unwrap on crypto, zeroize, etc). " "Keep it actionable — a new contributor should be able to submit a PR after reading this."), ("docs-engineer", "Write a comprehensive operator deployment guide at docs/src/getting-started/deployment.md. " "Read the existing docs/src/getting-started/ pages and docker/Dockerfile first. " "Cover: (1) Docker deployment (docker-compose, volume mounts, env vars). " "(2) Bare-metal deployment (systemd unit file example, user/group setup). " "(3) TLS certificate setup (self-signed for dev, Let's Encrypt for prod). " "(4) Configuration reference (all QPQ_* environment variables). " "(5) Backup and restore (SQLite/SQLCipher database, key material). " "(6) Monitoring (structured log output, health checks). " "(7) Troubleshooting common issues. " "Update docs/src/SUMMARY.md to include the new page if needed."), ("docs-engineer", "Audit all existing docs/src/ pages for accuracy against the current codebase. " "Read each .md file in docs/src/ and cross-reference claims against actual source code. " "Fix: (1) Stale API references (function names, struct names that changed). " "(2) Broken internal links between docs pages. " "(3) Outdated architecture descriptions (e.g. references to MessagePack, old ALPN strings). " "(4) Missing entries in docs/src/SUMMARY.md for pages that exist. " "Produce a list of all changes made and any issues you couldn't fix."), ], "docs-api": [ ("docs-engineer", "Ensure every public API item in quicproquo-core has a doc comment (/// or //!). " "Read crates/quicproquo-core/src/lib.rs to find all pub exports. For each pub fn, " "pub struct, pub enum, and pub trait: check if it has a doc comment. If missing, " "read the implementation to understand what it does, then add a concise doc comment " "with: one-line summary, parameters, return value, errors, and a short example where " "appropriate. Run `cargo doc -p quicproquo-core --no-deps` to verify."), ("docs-engineer", "Document all Cap'n Proto schemas in schemas/. For each .capnp file (auth.capnp, " "delivery.capnp, federation.capnp, node.capnp): read the schema and the Rust " "implementation that uses it. Write or update docs/src/wire-format/ pages with: " "(1) Purpose of each interface. (2) Method signatures with parameter semantics. " "(3) Error conditions. (4) Example message flows (e.g. auth flow, message send flow). " "Ensure docs/src/wire-format/overview.md links to all sub-pages."), ], # ── Infrastructure sprints ──────────────────────────────────────────────── "infra-hardening": [ ("devops-engineer", "Fix the Dockerfile at docker/Dockerfile for production readiness. Read it first. " "Changes needed: (1) Create a dedicated non-root user 'qpq' (not nobody) with a " "specific UID/GID. (2) Set QPQ_DATA_DIR=/var/lib/qpq with correct ownership. " "(3) Handle the excluded p2p crate correctly in workspace build. " "(4) Add HEALTHCHECK instruction. (5) Use specific base image tags (not :latest). " "(6) Ensure COPY commands don't pull in .git, target/, logs/, or test data. " "Test with: docker build -f docker/Dockerfile ."), ("devops-engineer", "Harden .gitignore at project root. Read the current .gitignore first. Add missing " "patterns: data/, *.der, *.pem, *.db, *.db-shm, *.db-wal, *.bin, *.ks, " "qpq-state.*, logs/ai_team/, .env, .env.*, *.key. " "Verify no sensitive files are already tracked: run git ls-files for each pattern. " "If any are tracked, report them (do NOT remove from git without confirmation)."), ("devops-engineer", "Enhance CI pipeline at .github/workflows/ci.yml. Read it first. Add or verify: " "(1) cargo fmt check passes. (2) cargo clippy --workspace -- -D warnings. " "(3) cargo test --workspace (with --test-threads 1 for E2E). " "(4) cargo deny check runs on every PR. (5) cargo audit as blocking check. " "(6) Docker build validation job (docker build -f docker/Dockerfile .). " "(7) Rust cache action for faster builds. (8) Matrix for stable + nightly Rust. " "Also check .github/CODEOWNERS is correctly configured for crypto paths."), ], # ── Security sprints ────────────────────────────────────────────────────── "security-full": [ ("security-auditor", "Perform a FULL security audit of the entire quicproquo codebase. Read every .rs file " "in crates/quicproquo-core/src/, crates/quicproquo-server/src/, and " "crates/quicproquo-client/src/. Check every file for ALL of: " "(1) .unwrap()/.expect() outside #[cfg(test)] on crypto, I/O, lock, or parse operations. " "(2) Key material types missing Zeroize/ZeroizeOnDrop. " "(3) Secret bytes (keys, passwords, tokens, nonces) potentially reaching tracing/log/println. " "(4) Non-constant-time comparisons on auth tags, tokens, MACs, or passwords. " "(5) panic!/unreachable! in production paths. " "(6) unsafe blocks without // SAFETY: documentation. " "(7) Missing input validation on RPC boundaries (data from network). " "(8) Race conditions in shared state (DashMap, Mutex, RwLock). " "(9) Replay attack vectors in message delivery. " "(10) Timing side channels in OPAQUE or token validation. " "Produce a prioritised finding report: Critical > High > Medium > Low > Informational. " "Each finding must include: file:line, description, attack scenario, remediation."), ("security-auditor", "Audit infrastructure security. Read docker/Dockerfile, docker-compose.yml, " ".github/workflows/ci.yml, and all files in scripts/. Check: " "(1) Dockerfile: running as root? secrets in ENV/ARG? base image pinned? " "(2) docker-compose: volumes expose host paths? ports exposed unnecessarily? " "(3) CI: secrets handled correctly? artifact permissions? supply chain attacks? " "(4) Scripts: command injection? path traversal? unsafe eval? " "(5) Dependencies: check deny.toml config, look for unmaintained/yanked crates. " "Produce a separate infrastructure security report."), ("security-auditor", "Review the threat model at docs/src/cryptography/threat-model.md against the current " "implementation. Read the threat model doc, then verify each claim: " "(1) Are the stated trust boundaries correctly implemented in code? " "(2) Does the OPAQUE flow match the documented auth model? " "(3) Is the Noise_XX handshake configured as documented (XX pattern, not IK/KK)? " "(4) Does the MLS integration follow RFC 9420 as claimed? " "(5) Is the hybrid KEM combiner implemented as documented (HKDF-SHA256 with correct info string)? " "(6) Are there attack vectors NOT covered by the threat model? " "Produce a threat model gap analysis report."), ], "security-review": [ ("security-auditor", "Post-change security review. Read all modified files from the most recent work. " "Focus on: any new .unwrap()/.expect() introduced, new code paths handling key material, " "new logging that might leak secrets, new external inputs lacking validation, and " "any new unsafe blocks. Compare against the engineering standards in master-prompt.md. " "Produce a concise pass/fail report with findings."), ("roadmap-tracker", "Quick progress check after recent changes. Read ROADMAP.md and check which Phase 1 " "and Phase 2 items have been completed by the recent work. Update the status report " "with: items newly completed, items still in progress, next priorities."), ], # ── Release preparation ─────────────────────────────────────────────────── "release-prep": [ ("devops-engineer", "Prepare release infrastructure. Read Cargo.toml (workspace root) and all crate " "Cargo.toml files. (1) Verify version numbers are consistent across all crates. " "(2) Create or update CHANGELOG.md at project root — read git log for recent commits " "and categorise by: Added, Changed, Fixed, Security. Follow keepachangelog.com format. " "(3) Verify docker/Dockerfile builds successfully with release profile. " "(4) Check that cargo package -p quicproquo-server would succeed (dry run). " "(5) Verify .github/workflows/ci.yml has a release/tag-triggered job if applicable."), ("docs-engineer", "Final documentation review for release readiness. " "(1) Verify README.md: feature matrix matches actual implementation, quick start " "instructions work, crate layout is accurate, all badges are correct. " "(2) Verify docs/src/getting-started/ pages are up to date. " "(3) Check that SECURITY.md and CONTRIBUTING.md exist and are accurate. " "(4) Run mdbook build docs/ and verify no warnings. " "(5) Produce a docs readiness report: pass/fail with specific issues found."), ("roadmap-tracker", "Final pre-release status report. Read ROADMAP.md completely. Classify every item as: " "Complete (implemented + tested), Deferred (not blocking release), or Blocking (must fix " "before release). Focus on Phase 1 (Production Hardening) — all items must be Complete " "or have documented mitigations. Produce a release readiness assessment."), ], } # ── Production readiness pipeline ───────────────────────────────────────────── # Ordered list of sprints that form the full production readiness path. # Each sprint must pass its quality gate before the next begins. # Sprints within a step run in parallel; steps run sequentially. PIPELINE: list[tuple[str, str]] = [ ("status", "Baseline: assess current state and recent security posture"), ("audit", "Deep dive: full security audit + detailed roadmap analysis"), ("phase1-hardening", "Code: eliminate crash paths across all crates (parallel by crate)"), ("phase1-infra", "Infra: fix Dockerfile, .gitignore, design TLS lifecycle"), ("infra-hardening", "Infra: CI hardening, Docker production config, .gitignore completion"), ("phase2-tests", "Tests: E2E coverage, unit tests for untested paths"), ("docs-foundation", "Docs: SECURITY.md, CONTRIBUTING.md, deployment guide, accuracy audit"), ("docs-api", "Docs: public API doc comments, Cap'n Proto schema documentation"), ("security-full", "Security: comprehensive audit of all code + infra + threat model"), ("security-review", "Gate: post-change security review + progress check"), ("release-prep", "Release: changelog, version consistency, final docs review"), ] # ── Orchestrator system prompt ───────────────────────────────────────────────── ORCHESTRATOR_PROMPT = f"""{PROJECT_CONTEXT} You are the **Orchestrator** for the quicproquo AI development team. Your team of specialist subagents: | Agent | Role | Edits? | |-------|------|--------| | rust-architect | Architecture design, ADRs, design reviews | No | | rust-core-dev | quicproquo-core: crypto, MLS, Noise codec | Yes | | rust-server-dev | quicproquo-server: AS, DS, RPC server | Yes | | rust-client-dev | quicproquo-client: CLI, REPL, local state | Yes | | security-auditor | Security review: code, infra, threat model | No | | test-engineer | Unit, integration, E2E tests | Yes (tests) | | devops-engineer | Docker, CI/CD, deployment, monitoring | Yes (infra) | | docs-engineer | User/operator/developer/protocol docs | Yes (docs) | | roadmap-tracker | Roadmap progress assessment | No | Parallelisation rules: - Agents that own DIFFERENT crates or concern areas can run in parallel. - rust-core-dev, rust-server-dev, rust-client-dev ALWAYS run in parallel (different crates). - security-auditor runs AFTER code-changing agents complete (reads their output). - test-engineer runs AFTER code-changing agents complete (tests their changes). - docs-engineer and devops-engineer can run in parallel with each other and with dev agents. - roadmap-tracker can run in parallel with anything (read-only). Workflow: 1. Read the task carefully. 2. Decide which agent(s) are needed. For multi-step tasks, sequence them logically. 3. Maximise parallelism: launch agents that touch different files simultaneously. 4. Call each required agent with a precise, scoped prompt. 5. Synthesise the agents' outputs into a final report or code deliverable. 6. Always end with: "Next suggested task: ..." based on the ROADMAP. Rules: - Read master-prompt.md and ROADMAP.md before delegating significant tasks. - Do NOT delegate everything to one agent — split by crate/concern. - If a task touches security, always invoke security-auditor AFTER code changes. - If a task adds/modifies functionality, always invoke test-engineer LAST. - docs-engineer and devops-engineer work independently — launch them in parallel. - Keep your synthesis concise — prefer structured output (headers, bullet lists). """ # ── Parallel runner ──────────────────────────────────────────────────────────── async def run_agent_to_file( agent_name: str, task: str, max_turns: int, output_dir: Path, label: str, ) -> tuple[str, str, str | None]: """ Run a single agent and stream its result to an output file. Returns (agent_name, label, result_text_or_None). `result_text` is None if the agent produced no ResultMessage. """ output_file = output_dir / f"{label}.md" result_text: str | None = None agent = AGENTS[agent_name] options = ClaudeAgentOptions( cwd=PROJECT_ROOT, allowed_tools=agent.tools or ["Read", "Glob", "Grep"], system_prompt=agent.prompt, max_turns=max_turns, permission_mode="acceptEdits", setting_sources=["project"], ) with open(output_file, "w") as f: f.write(f"# Agent: {agent_name}\n\n") f.write(f"**Task:** {task}\n\n") f.write(f"**Started:** {datetime.now().isoformat()}\n\n---\n\n") async for message in query(prompt=task, options=options): if isinstance(message, ResultMessage): result_text = message.result f.write(f"## Result\n\n{result_text}\n") f.write(f"\n**Finished:** {datetime.now().isoformat()}\n") return agent_name, label, result_text async def run_parallel( agent_tasks: list[tuple[str, str]], max_turns: int, verbose: bool, sprint_name: str = "custom", ) -> None: """Launch all (agent, task) pairs concurrently and print a summary when done.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_dir = Path(PROJECT_ROOT) / "logs" / "ai_team" / f"{sprint_name}_{timestamp}" output_dir.mkdir(parents=True, exist_ok=True) print(f"\n{'═' * 70}") print(f" quicproquo AI Team — Parallel Sprint: {sprint_name}") print(f" Agents: {len(agent_tasks)} | Max turns each: {max_turns}") print(f" Logs: {output_dir}/") print(f"{'═' * 70}\n") for i, (agent, task) in enumerate(agent_tasks, 1): label = f"{i:02d}_{agent}" print(f" [{i}] {agent}") print(f" {task[:80]}{'…' if len(task) > 80 else ''}") print() # Build coroutines with stable labels for output files. coros = [ run_agent_to_file(agent, task, max_turns, output_dir, f"{i:02d}_{agent}") for i, (agent, task) in enumerate(agent_tasks, 1) ] print(f" Starting {len(coros)} agents in parallel…\n") results = await asyncio.gather(*coros, return_exceptions=True) print(f"\n{'─' * 70}") print(" SPRINT RESULTS") print(f"{'─' * 70}") success = 0 for result in results: if isinstance(result, Exception): print(f"\n ❌ ERROR: {result}") else: agent_name, label, text = result if text is not None: success += 1 print(f"\n ✅ {agent_name} ({label}.md)") # Show first 300 chars of result as a preview. preview = text.strip()[:300] for line in preview.splitlines(): print(f" {line}") if len(text.strip()) > 300: print(" …") else: print(f"\n ⚠️ {agent_name}: no result produced") print(f"\n {success}/{len(agent_tasks)} agents completed successfully.") print(f" Full outputs: {output_dir}/\n") # ── Sequential runners ───────────────────────────────────────────────────────── async def run_orchestrator(task: str, max_turns: int, verbose: bool) -> None: """Run the full team via the orchestrator.""" print(f"\n{'═' * 70}") print(f" quicproquo AI Team — Orchestrator") print(f" Task: {task[:72]}{'…' if len(task) > 72 else ''}") print(f"{'═' * 70}\n") options = ClaudeAgentOptions( cwd=PROJECT_ROOT, allowed_tools=["Read", "Glob", "Grep", "Agent"], system_prompt=ORCHESTRATOR_PROMPT, agents=AGENTS, max_turns=max_turns, permission_mode="acceptEdits", setting_sources=["project"], ) async for message in query(prompt=task, options=options): if isinstance(message, ResultMessage): print("\n" + "─" * 70) print("RESULT") print("─" * 70) print(message.result) elif verbose: if isinstance(message, AssistantMessage): for block in message.content: if isinstance(block, TextBlock) and block.text.strip(): print(block.text, end="", flush=True) elif isinstance(message, SystemMessage) and message.subtype == "init": print(f"[Session: {message.session_id}]") async def run_single_agent( agent_name: str, task: str, max_turns: int, verbose: bool ) -> None: """Bypass the orchestrator and run a single specialist agent directly.""" agent = AGENTS[agent_name] print(f"\n{'═' * 70}") print(f" quicproquo AI Team — {agent_name}") print(f" Task: {task[:72]}{'…' if len(task) > 72 else ''}") print(f"{'═' * 70}\n") options = ClaudeAgentOptions( cwd=PROJECT_ROOT, allowed_tools=agent.tools or ["Read", "Glob", "Grep"], system_prompt=agent.prompt, max_turns=max_turns, permission_mode="acceptEdits", setting_sources=["project"], ) async for message in query(prompt=task, options=options): if isinstance(message, ResultMessage): print("\n" + "─" * 70) print("RESULT") print("─" * 70) print(message.result) elif verbose: if isinstance(message, AssistantMessage): for block in message.content: if isinstance(block, TextBlock) and block.text.strip(): print(block.text, end="", flush=True) # ── CLI ──────────────────────────────────────────────────────────────────────── def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( prog="ai_team", description="quicproquo multi-agent Claude team", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) parser.add_argument( "task", nargs="?", help="Task description for the orchestrator", ) parser.add_argument( "--agent", "-a", choices=list(AGENTS.keys()), default=None, help="Bypass orchestrator and send task directly to a specific agent", ) parser.add_argument( "--sprint", "-s", choices=list(SPRINTS.keys()), default=None, metavar="SPRINT", help="Run a predefined parallel sprint (see --list-sprints)", ) parser.add_argument( "--parallel", "-p", nargs="+", metavar="AGENT:TASK", default=None, help=( 'Ad-hoc parallel run. Each argument is "agent-name: task description". ' 'Example: --parallel "rust-server-dev: Fix unwrap() in server" ' '"security-auditor: Audit core crate"' ), ) parser.add_argument( "--list-agents", "-l", action="store_true", help="List available agents and exit", ) parser.add_argument( "--list-sprints", action="store_true", help="List predefined sprints and exit", ) parser.add_argument( "--pipeline", action="store_true", help="Run the full production readiness pipeline (all sprints in dependency order)", ) parser.add_argument( "--pipeline-from", metavar="SPRINT", default=None, help="Start the pipeline from a specific sprint (skip earlier steps)", ) parser.add_argument( "--max-turns", type=int, default=60, help="Maximum agentic turns per agent (default: 60)", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Print all message types (not just results)", ) return parser def list_agents() -> None: print("Available agents:\n") for name, defn in AGENTS.items(): print(f" {name}") desc = defn.description wrapped = "\n ".join( desc[i : i + 72] for i in range(0, len(desc), 72) ) print(f" {wrapped}\n") def list_sprints() -> None: print("Predefined sprints:\n") for name, tasks in SPRINTS.items(): print(f" {name} ({len(tasks)} agents in parallel)") for agent, task in tasks: preview = task[:60] + ("…" if len(task) > 60 else "") print(f" [{agent}] {preview}") print() print("Production readiness pipeline (--pipeline):\n") for i, (sprint_name, description) in enumerate(PIPELINE, 1): count = len(SPRINTS[sprint_name]) print(f" {i:2d}. {sprint_name:<20s} {count} agent(s) — {description}") print() def parse_parallel_args(args: list[str]) -> list[tuple[str, str]]: """ Parse --parallel arguments of the form "agent-name: task description". The colon after the agent name is required. """ pairs: list[tuple[str, str]] = [] valid = set(AGENTS.keys()) for arg in args: if ":" not in arg: print(f"ERROR: --parallel argument missing colon separator: {arg!r}") print(" Expected format: \"agent-name: task description\"") sys.exit(1) agent, _, task = arg.partition(":") agent = agent.strip() task = task.strip() if agent not in valid: print(f"ERROR: unknown agent {agent!r}. Valid: {', '.join(sorted(valid))}") sys.exit(1) if not task: print(f"ERROR: empty task for agent {agent!r}") sys.exit(1) pairs.append((agent, task)) return pairs # ── Pipeline runner ──────────────────────────────────────────────────────────── async def run_pipeline( max_turns: int, verbose: bool, start_from: str | None = None, ) -> None: """ Run the full production readiness pipeline: all sprints in dependency order. Each sprint runs its agents in parallel. Sprints run sequentially because later sprints depend on earlier ones (e.g. security-review after code changes). If start_from is set, skip all sprints before that one. """ pipeline = list(PIPELINE) if start_from: names = [name for name, _ in PIPELINE] if start_from not in names: print(f"ERROR: unknown sprint {start_from!r} in pipeline.") print(f" Valid: {', '.join(names)}") sys.exit(1) idx = names.index(start_from) pipeline = pipeline[idx:] print(f"\n Skipping {idx} sprint(s), starting from: {start_from}\n") total = len(pipeline) print(f"\n{'=' * 70}") print(f" quicproquo AI Team — Production Readiness Pipeline") print(f" Steps: {total} | Max turns per agent: {max_turns}") print(f"{'=' * 70}") for i, (name, desc) in enumerate(pipeline, 1): count = len(SPRINTS[name]) print(f" {i:2d}. [{name}] {count} agent(s) — {desc}") print(f"{'=' * 70}\n") for step, (sprint_name, description) in enumerate(pipeline, 1): print(f"\n{'#' * 70}") print(f" PIPELINE STEP {step}/{total}: {sprint_name}") print(f" {description}") print(f"{'#' * 70}\n") agent_tasks = SPRINTS[sprint_name] await run_parallel( agent_tasks, max_turns, verbose, sprint_name=sprint_name ) if step < total: print(f"\n Step {step}/{total} complete. Proceeding to next step...\n") print(f"\n{'=' * 70}") print(f" PIPELINE COMPLETE — {total} steps executed") print(f" Review outputs in: logs/ai_team/") print(f"{'=' * 70}\n") # ── Entry point ──────────────────────────────────────────────────────────────── async def main() -> None: parser = build_parser() args = parser.parse_args() if args.list_agents: list_agents() return if args.list_sprints: list_sprints() return if not os.environ.get("ANTHROPIC_API_KEY"): print("ERROR: ANTHROPIC_API_KEY environment variable is not set.") sys.exit(1) try: if args.pipeline or args.pipeline_from: await run_pipeline( args.max_turns, args.verbose, start_from=args.pipeline_from ) elif args.sprint: agent_tasks = SPRINTS[args.sprint] await run_parallel( agent_tasks, args.max_turns, args.verbose, sprint_name=args.sprint ) elif args.parallel: agent_tasks = parse_parallel_args(args.parallel) await run_parallel( agent_tasks, args.max_turns, args.verbose, sprint_name="custom" ) elif args.agent: if not args.task: print("ERROR: --agent requires a task argument.") sys.exit(1) await run_single_agent( args.agent, args.task, args.max_turns, args.verbose ) elif args.task: await run_orchestrator(args.task, args.max_turns, args.verbose) else: parser.print_help() sys.exit(1) except CLINotFoundError: print( "\nERROR: Claude Code CLI not found.\n" "Install with: pip install claude-agent-sdk" ) sys.exit(1) except CLIConnectionError as e: print(f"\nERROR: Connection error: {e}") sys.exit(1) except KeyboardInterrupt: print("\n\nInterrupted.") sys.exit(0) if __name__ == "__main__": asyncio.run(main())