Add comprehensive operational documentation: - docs/operations/backup-restore.md: SQLCipher, file backend, blob backup/restore - docs/operations/key-rotation.md: auth token, TLS, federation, DB key, OPAQUE rotation - docs/operations/incident-response.md: playbook for common incidents - docs/operations/scaling-guide.md: resource sizing, scaling triggers, capacity planning - docs/operations/monitoring.md: Prometheus metrics, alert rules, log monitoring - docs/operations/dashboards/qpq-overview.json: Grafana dashboard template - docs/operations/prometheus.yml + alerts: Prometheus scrape and alert config - docs/operations/grafana-provisioning/: auto-provisioning for datasources and dashboards - docker-compose.prod.yml: production stack (server + Prometheus + Grafana) - .env.example: documented environment variable template
114 lines
3.6 KiB
YAML
114 lines
3.6 KiB
YAML
# Production Docker Compose for quicproquo
|
|
#
|
|
# Usage:
|
|
# 1. Copy .env.example to .env and fill in secrets
|
|
# 2. Place TLS certificates in ./certs/
|
|
# 3. docker compose -f docker-compose.prod.yml up -d
|
|
#
|
|
# Prerequisites:
|
|
# - TLS certificate and key in DER format (no auto-generation in production)
|
|
# - Strong auth token (>= 16 characters)
|
|
# - Database encryption key
|
|
|
|
networks:
|
|
qpq:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
qpq-data:
|
|
prometheus-data:
|
|
grafana-data:
|
|
|
|
services:
|
|
# ── quicproquo server ────────────────────────────────────────────────────────
|
|
server:
|
|
build:
|
|
context: .
|
|
dockerfile: docker/Dockerfile
|
|
restart: unless-stopped
|
|
ports:
|
|
- "${QPQ_LISTEN_PORT:-7000}:7000/udp" # QUIC
|
|
- "${QPQ_WS_PORT:-9000}:9000" # WebSocket bridge (optional)
|
|
environment:
|
|
RUST_LOG: info
|
|
QPQ_PRODUCTION: "true"
|
|
QPQ_LISTEN: "0.0.0.0:7000"
|
|
QPQ_DATA_DIR: /var/lib/quicproquo
|
|
QPQ_TLS_CERT: /var/lib/quicproquo/certs/server-cert.der
|
|
QPQ_TLS_KEY: /var/lib/quicproquo/certs/server-key.der
|
|
QPQ_AUTH_TOKEN: "${QPQ_AUTH_TOKEN}"
|
|
QPQ_STORE_BACKEND: sql
|
|
QPQ_DB_PATH: /var/lib/quicproquo/qpq.db
|
|
QPQ_DB_KEY: "${QPQ_DB_KEY}"
|
|
QPQ_METRICS_LISTEN: "0.0.0.0:9090"
|
|
QPQ_METRICS_ENABLED: "true"
|
|
QPQ_SEALED_SENDER: "${QPQ_SEALED_SENDER:-false}"
|
|
QPQ_REDACT_LOGS: "${QPQ_REDACT_LOGS:-true}"
|
|
QPQ_WS_LISTEN: "${QPQ_WS_LISTEN:-}"
|
|
volumes:
|
|
- qpq-data:/var/lib/quicproquo
|
|
- ./certs:/var/lib/quicproquo/certs:ro
|
|
networks:
|
|
- qpq
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '4'
|
|
memory: 4G
|
|
reservations:
|
|
cpus: '2'
|
|
memory: 1G
|
|
ulimits:
|
|
nofile:
|
|
soft: 65536
|
|
hard: 65536
|
|
healthcheck:
|
|
test: ["CMD", "test", "-f", "/var/lib/quicproquo/certs/server-cert.der"]
|
|
interval: 30s
|
|
timeout: 5s
|
|
retries: 3
|
|
start_period: 10s
|
|
logging:
|
|
driver: json-file
|
|
options:
|
|
max-size: "50m"
|
|
max-file: "5"
|
|
|
|
# ── Prometheus ───────────────────────────────────────────────────────────────
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:9091:9090"
|
|
volumes:
|
|
- prometheus-data:/prometheus
|
|
- ./docs/operations/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- ./docs/operations/prometheus-alerts.yml:/etc/prometheus/alerts.yml:ro
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--storage.tsdb.retention.time=30d'
|
|
- '--web.enable-lifecycle'
|
|
networks:
|
|
- qpq
|
|
depends_on:
|
|
- server
|
|
|
|
# ── Grafana ──────────────────────────────────────────────────────────────────
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
restart: unless-stopped
|
|
ports:
|
|
- "127.0.0.1:3000:3000"
|
|
environment:
|
|
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-changeme}"
|
|
GF_USERS_ALLOW_SIGN_UP: "false"
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
- ./docs/operations/dashboards:/var/lib/grafana/dashboards:ro
|
|
- ./docs/operations/grafana-provisioning:/etc/grafana/provisioning:ro
|
|
networks:
|
|
- qpq
|
|
depends_on:
|
|
- prometheus
|