Files
quicproquo/docker-compose.prod.yml
Christian Nennemann 91c5495ab7 docs: add operational runbook, Grafana dashboard, and production docker-compose
Add comprehensive operational documentation:
- docs/operations/backup-restore.md: SQLCipher, file backend, blob backup/restore
- docs/operations/key-rotation.md: auth token, TLS, federation, DB key, OPAQUE rotation
- docs/operations/incident-response.md: playbook for common incidents
- docs/operations/scaling-guide.md: resource sizing, scaling triggers, capacity planning
- docs/operations/monitoring.md: Prometheus metrics, alert rules, log monitoring
- docs/operations/dashboards/qpq-overview.json: Grafana dashboard template
- docs/operations/prometheus.yml + alerts: Prometheus scrape and alert config
- docs/operations/grafana-provisioning/: auto-provisioning for datasources and dashboards
- docker-compose.prod.yml: production stack (server + Prometheus + Grafana)
- .env.example: documented environment variable template
2026-03-04 20:30:57 +01:00

114 lines
3.6 KiB
YAML

# Production Docker Compose for quicproquo
#
# Usage:
# 1. Copy .env.example to .env and fill in secrets
# 2. Place TLS certificates in ./certs/
# 3. docker compose -f docker-compose.prod.yml up -d
#
# Prerequisites:
# - TLS certificate and key in DER format (no auto-generation in production)
# - Strong auth token (>= 16 characters)
# - Database encryption key
networks:
qpq:
driver: bridge
volumes:
qpq-data:
prometheus-data:
grafana-data:
services:
# ── quicproquo server ────────────────────────────────────────────────────────
server:
build:
context: .
dockerfile: docker/Dockerfile
restart: unless-stopped
ports:
- "${QPQ_LISTEN_PORT:-7000}:7000/udp" # QUIC
- "${QPQ_WS_PORT:-9000}:9000" # WebSocket bridge (optional)
environment:
RUST_LOG: info
QPQ_PRODUCTION: "true"
QPQ_LISTEN: "0.0.0.0:7000"
QPQ_DATA_DIR: /var/lib/quicproquo
QPQ_TLS_CERT: /var/lib/quicproquo/certs/server-cert.der
QPQ_TLS_KEY: /var/lib/quicproquo/certs/server-key.der
QPQ_AUTH_TOKEN: "${QPQ_AUTH_TOKEN}"
QPQ_STORE_BACKEND: sql
QPQ_DB_PATH: /var/lib/quicproquo/qpq.db
QPQ_DB_KEY: "${QPQ_DB_KEY}"
QPQ_METRICS_LISTEN: "0.0.0.0:9090"
QPQ_METRICS_ENABLED: "true"
QPQ_SEALED_SENDER: "${QPQ_SEALED_SENDER:-false}"
QPQ_REDACT_LOGS: "${QPQ_REDACT_LOGS:-true}"
QPQ_WS_LISTEN: "${QPQ_WS_LISTEN:-}"
volumes:
- qpq-data:/var/lib/quicproquo
- ./certs:/var/lib/quicproquo/certs:ro
networks:
- qpq
deploy:
resources:
limits:
cpus: '4'
memory: 4G
reservations:
cpus: '2'
memory: 1G
ulimits:
nofile:
soft: 65536
hard: 65536
healthcheck:
test: ["CMD", "test", "-f", "/var/lib/quicproquo/certs/server-cert.der"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s
logging:
driver: json-file
options:
max-size: "50m"
max-file: "5"
# ── Prometheus ───────────────────────────────────────────────────────────────
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
ports:
- "127.0.0.1:9091:9090"
volumes:
- prometheus-data:/prometheus
- ./docs/operations/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./docs/operations/prometheus-alerts.yml:/etc/prometheus/alerts.yml:ro
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
networks:
- qpq
depends_on:
- server
# ── Grafana ──────────────────────────────────────────────────────────────────
grafana:
image: grafana/grafana:latest
restart: unless-stopped
ports:
- "127.0.0.1:3000:3000"
environment:
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-changeme}"
GF_USERS_ALLOW_SIGN_UP: "false"
volumes:
- grafana-data:/var/lib/grafana
- ./docs/operations/dashboards:/var/lib/grafana/dashboards:ro
- ./docs/operations/grafana-provisioning:/etc/grafana/provisioning:ro
networks:
- qpq
depends_on:
- prometheus