From d1ddef4cea3df34f14bd062a31549fa5a7264641 Mon Sep 17 00:00:00 2001 From: Christian Nennemann Date: Sun, 22 Feb 2026 06:31:16 +0100 Subject: [PATCH] Add channel-aware delivery and update roadmap --- M3_STATUS.md | 14 ++- README.md | 99 +++++++++++++-------- ROADMAP_WBS.md | 57 ++++++++++++ crates/quicnprotochat-client/src/main.rs | 22 ++--- crates/quicnprotochat-server/src/main.rs | 74 +++++++++++++-- crates/quicnprotochat-server/src/storage.rs | 72 ++++++++++++--- design/DM_CHANNELS.md | 38 ++++++++ design/TECHNOLOGY_SUGGESTIONS.md | 57 ++++++++++++ docker-compose.yml | 8 +- schemas/delivery.capnp | 9 +- schemas/node.capnp | 9 +- 11 files changed, 374 insertions(+), 85 deletions(-) create mode 100644 ROADMAP_WBS.md create mode 100644 design/DM_CHANNELS.md create mode 100644 design/TECHNOLOGY_SUGGESTIONS.md diff --git a/M3_STATUS.md b/M3_STATUS.md index d8a3824..2741b08 100644 --- a/M3_STATUS.md +++ b/M3_STATUS.md @@ -8,7 +8,7 @@ ## What is M3? M3 adds: -1. **Delivery Service (DS)** — store-and-forward relay for MLS messages (Cap'n Proto RPC on port 7001) +1. **Delivery Service (DS)** — store-and-forward relay for MLS messages (Cap'n Proto RPC on the unified NodeService endpoint) 2. **MLS Group Lifecycle** — `GroupMember` struct: create group, add member (Welcome), join group, send/receive encrypted application messages --- @@ -47,14 +47,10 @@ Exposes `pub mod delivery_capnp`. Exposes `pub use group::GroupMember`. ### `quicnprotochat-server/src/main.rs` ✅ -Two listeners on one `LocalSet`: -- Port 7000 (AS): `AuthServiceImpl` — unchanged from M2 -- Port 7001 (DS): `DeliveryServiceImpl` — new; uses `DashMap, VecDeque>>` keyed by Ed25519 public key - -New CLI flag: `--ds-listen` (default `0.0.0.0:7001`, env `QUICNPROTOCHAT_DS_LISTEN`). +Unified NodeService listener (Auth + Delivery) on one QUIC/TLS endpoint; uses `DashMap, VecDeque>>` keyed by Ed25519 public key. ### `quicnprotochat-client/src/main.rs` ✅ -Added `demo-group` subcommand to exercise the full Alice↔Bob MLS flow against live AS (7000) and DS (7001): uploads both KeyPackages, delivers Welcome via DS, and exchanges application messages. +Added `demo-group` subcommand to exercise the full Alice↔Bob MLS flow against live NodeService (4201): uploads both KeyPackages, delivers Welcome, and exchanges application messages. ### `quicnprotochat-client/tests` ✅ `cargo test -p quicnprotochat-client --tests` passes, including the MLS round-trip integration test. @@ -69,8 +65,8 @@ Open question (future work): if we need persistent groups instead of ephemeral d ## Key Design Decisions -### DS Port (7001) vs same port -The server uses **two separate listeners** (7000 for AS, 7001 for DS) because capnp-rpc supports only one bootstrap capability per connection. No new schema was needed. +### DS Port (single endpoint) +The server now exposes a **single NodeService** endpoint (default 4201) that combines Authentication and Delivery over one capnp-rpc bootstrap capability. ### GroupMember lifecycle (CRITICAL) The `OpenMlsRustCrypto` backend holds the HPKE init private key **in memory**. The **same `GroupMember` instance** must be used from `generate_key_package()` through `join_group()`. Do NOT create a new GroupMember between these calls. diff --git a/README.md b/README.md index 3ff3dba..6aa04bd 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # quicnprotochat -> End-to-end encrypted group messaging over **Noise_XX + MLS** (RFC 9420), written in Rust. +> End-to-end encrypted group messaging over **QUIC + TLS 1.3 + MLS** (RFC 9420), written in Rust. -Every byte on the wire is double-protected: the outer **Noise_XX** channel -authenticates both sides and provides forward secrecy for the transport, while -the inner **MLS** layer provides post-compromise security and ratcheted group -key agreement across any number of participants. Messages are framed with -**Cap'n Proto**, keeping serialisation zero-copy and schema-versioned. +Every byte on the wire is protected by a QUIC transport secured with TLS 1.3 +(`quinn` + `rustls`). The inner **MLS** layer provides post-compromise security +and ratcheted group key agreement across any number of participants. Messages +are framed with **Cap'n Proto**, keeping serialisation zero-copy and +schema-versioned. --- @@ -18,16 +18,14 @@ key agreement across any number of participants. Messages are framed with ├─────────────────────────────────────────────┤ │ Cap'n Proto RPC │ <- typed, schema-versioned framing ├─────────────────────────────────────────────┤ -│ Noise_XX (X25519 · ChaChaPoly · SHA-256) │ <- mutual auth + transport secrecy -├─────────────────────────────────────────────┤ -│ TCP │ +│ QUIC + TLS 1.3 (quinn/rustls) │ <- mutual auth + transport secrecy └─────────────────────────────────────────────┘ ``` | Property | Mechanism | |---|---| -| Transport confidentiality | Noise_XX (ChaCha20-Poly1305) | -| Transport authentication | Noise_XX static X25519 keys | +| Transport confidentiality | TLS 1.3 over QUIC (rustls) | +| Transport authentication | TLS 1.3 server cert (self-signed by default) | | Group key agreement | MLS `MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519` | | Post-compromise security | MLS epoch ratchet | | Identity | Ed25519 (MLS credential + leaf node signature) | @@ -40,12 +38,11 @@ key agreement across any number of participants. Messages are framed with ``` quicnprotochat/ ├── crates/ -│ ├── quicnprotochat-core/ # Crypto primitives, Noise transport, MLS group state machine +│ ├── quicnprotochat-core/ # Crypto primitives, QUIC/TLS client helpers, MLS group state machine │ │ ├── src/codec.rs # LengthPrefixedCodec — Tokio Encoder + Decoder -│ │ ├── src/keypair.rs # NoiseKeypair — X25519 static key, zeroize-on-drop +│ │ ├── src/keypair.rs # Transport key helpers (X25519, zeroize-on-drop) │ │ ├── src/identity.rs # IdentityKeypair — Ed25519 identity + MLS Signer │ │ ├── src/keypackage.rs# generate_key_package — standalone KeyPackage helper -│ │ ├── src/noise.rs # handshake_initiator / handshake_responder / NoiseTransport │ │ └── src/group.rs # GroupMember — full MLS group lifecycle │ │ │ ├── quicnprotochat-proto/ # Cap'n Proto schemas + generated types + serde helpers @@ -64,9 +61,10 @@ quicnprotochat/ ## Services -### Authentication Service (AS) — port 7000 +### Node Service (Auth + Delivery) — port 4201 -Stores single-use MLS KeyPackages so peers can add each other to groups. +Single QUIC + TLS 1.3 endpoint exposing Cap'n Proto `NodeService` that combines +Authentication (KeyPackage upload/fetch) and Delivery (enqueue/fetch) operations. ``` uploadKeyPackage(identityKey: Data, package: Data) -> (fingerprint: Data) @@ -76,9 +74,7 @@ fetchKeyPackage(identityKey: Data) -> (package: Data) Packages are indexed by the raw Ed25519 public key (32 bytes) and consumed exactly once on fetch, matching the MLS single-use KeyPackage requirement. -### Delivery Service (DS) — port 7001 - -A simple store-and-forward relay for MLS messages. The DS never inspects +A simple store-and-forward relay for MLS messages. The server never inspects payloads — it routes opaque blobs by recipient public key. ``` @@ -144,14 +140,23 @@ cargo test --workspace ## Running -**Start the server** (AS on :7000, DS on :7001): +**Start the server** (NodeService on :4201): ```bash cargo run -p quicnprotochat-server -# or with custom ports: -cargo run -p quicnprotochat-server -- --listen 0.0.0.0:7000 --ds-listen 0.0.0.0:7001 +# or with a custom port: +cargo run -p quicnprotochat-server -- --listen 0.0.0.0:4201 ``` +Current TLS defaults (development): self-signed cert/key written to `data/` if +missing. Override via CLI flags or env vars: + +| Purpose | Flag | Env var | Default | +|---|---|---|---| +| Listen address | `--listen` | `QUICNPROTOCHAT_LISTEN` | `0.0.0.0:4201` | +| TLS cert (DER) | `--tls-cert` | `QUICNPROTOCHAT_TLS_CERT` | `data/server-cert.der` | +| TLS key (DER) | `--tls-key` | `QUICNPROTOCHAT_TLS_KEY` | `data/server-key.der` | + **Client commands:** ```bash @@ -167,21 +172,21 @@ cargo run -p quicnprotochat-client -- fetch-key <64-hex-char identity key> # Run an end-to-end Alice↔Bob demo against live AS + DS cargo run -p quicnprotochat-client -- demo-group \ - --server 127.0.0.1:7000 \ - --ds-server 127.0.0.1:7001 + --server 127.0.0.1:4201 \ + --ds-server 127.0.0.1:4201 # Persistent group CLI (stateful) -cargo run -p quicnprotochat-client -- register-state --state state.bin --server 127.0.0.1:7000 +cargo run -p quicnprotochat-client -- register-state --state state.bin --server 127.0.0.1:4201 cargo run -p quicnprotochat-client -- create-group --state state.bin --group-id my-group -cargo run -p quicnprotochat-client -- invite --state state.bin --peer-key --server 127.0.0.1:7000 --ds-server 127.0.0.1:7001 -cargo run -p quicnprotochat-client -- join --state state.bin --ds-server 127.0.0.1:7001 -cargo run -p quicnprotochat-client -- send --state state.bin --peer-key --msg "hello" --ds-server 127.0.0.1:7001 -cargo run -p quicnprotochat-client -- recv --state state.bin --ds-server 127.0.0.1:7001 +cargo run -p quicnprotochat-client -- invite --state state.bin --peer-key --server 127.0.0.1:4201 --ds-server 127.0.0.1:4201 +cargo run -p quicnprotochat-client -- join --state state.bin --ds-server 127.0.0.1:4201 +cargo run -p quicnprotochat-client -- send --state state.bin --peer-key --msg "hello" --ds-server 127.0.0.1:4201 +cargo run -p quicnprotochat-client -- recv --state state.bin --ds-server 127.0.0.1:4201 ``` -Server address defaults to `127.0.0.1:7000`; override with `--server` or -`QUICNPROTOCHAT_SERVER`. Delivery Service defaults to `127.0.0.1:7001`; override with -`--ds-server` or `QUICNPROTOCHAT_DS_SERVER`. +Server address defaults to `127.0.0.1:4201`; override with `--server` or +`QUICNPROTOCHAT_SERVER`. The same endpoint serves both Authentication and +Delivery. State file notes: the persisted state stores your identity and MLS group state after you have joined. If you generate a KeyPackage (`register-state`) and then @@ -194,24 +199,44 @@ key is not retained; run join in the same session you register. | # | Name | Status | What it adds | |---|------|--------|--------------| -| M1 | Noise transport | ✅ | Noise_XX handshake, length-prefixed framing, Ping/Pong | +| M1 | QUIC/TLS transport | ✅ | QUIC + TLS 1.3 endpoint, length-prefixed framing, Ping/Pong | | M2 | Authentication Service | ✅ | Ed25519 identity, KeyPackage generation, AS upload/fetch | | M3 | Delivery Service + MLS groups | ✅ | DS relay, `GroupMember` create/join/add/send/recv | | M4 | Group CLI subcommands | 🔜 | Persistent CLI (`create-group`, `invite`, `join`, `send`, `recv`); demo-group already available | | M5 | Multi-party groups | 🔜 | N > 2 members, Commit fan-out, Proposal handling | | M6 | Persistence | 🔜 | SQLite key store, durable group state | -| M7 | Post-quantum | 🔜 | ML-KEM-768 hybrid in Noise layer | +| M7 | Post-quantum | 🔜 | PQ hybrid for MLS/HPKE | + +--- + +## Production hardening roadmap (high level) + +1) **Transport & identity**: ACME/Let’s Encrypt, pinned identities, TLS policy + hardening, server identity via CA. +2) **Persistence**: Move AS/DS and MLS state to Postgres; encrypted at rest; + retention/TTL and migrations. +3) **AuthZ & accounts**: User/device accounts (OIDC/passwordless), device + binding, revocation/recovery; bind MLS credentials to issued identities. +4) **Delivery semantics**: Message IDs, idempotent enqueue/fetch, ordering per + conversation, backpressure/retries; attachment pipeline via encrypted + object storage. +5) **Observability & ops**: Structured logs with correlation IDs; Prometheus + metrics; tracing; alerting + SLOs; audit logs for auth/key events. +6) **Client resilience**: Reconnect/resume, offline queue, multi-device key + handling; key verification UX (QR/safety numbers); recovery flows. +7) **Security & compliance**: Dependency audits, fuzzing, SAST/DAST, pentest; + SBOM/signed releases; PII minimization and retention controls. --- ## Security notes - This is a **proof-of-concept**. It has not been audited. -- The server Noise keypair is **ephemeral** — regenerated on every restart. - Clients perform no server key pinning in the current milestone. +- The server uses a self-signed TLS cert by default; clients trust it via a + local DER file. No pinning or CA-based identity is enforced yet. - MLS credentials use `CredentialType::Basic` (public key only). A real deployment would bind credentials to a certificate authority. -- The Delivery Service does no authentication of the `recipientKey` field — +- The Delivery operation does no authentication of the `recipientKey` field — anyone can enqueue for any recipient. Access control is a future milestone. --- diff --git a/ROADMAP_WBS.md b/ROADMAP_WBS.md new file mode 100644 index 0000000..6d50866 --- /dev/null +++ b/ROADMAP_WBS.md @@ -0,0 +1,57 @@ +# Production Readiness Work Breakdown + +## Feature Scope (must-have) +- Identity and Auth: account/device model, signup/login, short-lived tokens + refresh, device binding/revocation, rate limits, audit events. +- Key and MLS Lifecycle: keypackage create/rotate/expire, add/remove member, epoch advance, replay/downgrade protection, external commits, keystore encryption at rest. +- Transport and Delivery: QUIC/TLS endpoint on 4201, health/readiness, ordering and dedup policy, idempotent delivery IDs, backpressure, resumable sessions, payload size caps. +- Private 1:1 Channels: first-class DM abstraction (channel IDs), authz on enqueue/fetch, per-channel history/retention policy, same MLS encryption with pairwise groups, spam/rate controls. +- Storage and Persistence: durable queues and keypackages, migrations and schema versioning, integrity checksums, backup/restore playbook. +- Observability and Ops: structured logs with correlation IDs, metrics (auth latency, handshake success, delivery lag, queue depth), traces across auth→delivery→storage, alerting/SLO dashboards. +- Client Resilience and UX: offline queue with retry/jitter, reconnect/resume, state persistence, basic key verification surface, compatibility handling for server upgrades. +- Compatibility and Protocols: Cap'n Proto schema versioning rules, golden-wire fixtures, N-1 client/server matrix tests, ciphersuite allowlist. + +## Security Plan (by design) +- Governance: CODEOWNERS on crypto/proto/auth paths; required review; cargo-audit/deny + SBOM in CI; threat model maintained per release. +- Transport Policy: TLS 1.3 strict ciphers, mTLS option, pinned server identity, downgrade detection; QUIC rate limits/connection caps. +- MLS Policy: enforce lifetime/usage on keypackages, replay/downgrade checks, epoch monotonicity, credential validation. +- Input Validation: strict length/type checks on all RPC inputs; reject oversize or malformed payloads; explicit error mapping with no panics on untrusted data. +- Secrets: config via env/secret manager only; no secrets in repo/images; rotation hooks; memory zeroize where feasible. +- Abuse/DoS Controls: per-IP/account rate limits, request/body size caps, cheap pre-auth drops, bounded queues/backpressure. +- Data Protection: encryption at rest for keystore/state; backups with integrity verification; deletion/retention policies. +- Logging Safety: redaction of secrets/PII; correlation IDs; audit log for auth/device/key events; access-controlled log sinks. +- Testing: unit/prop tests for codecs/crypto/state machines; integration tests for auth/storage; e2e security cases (tamper/replay/downgrade/expiry); fuzzing targets for parsers; periodic pentest. + +## Work Breakdown (phased) +1) Baselines and Governance +- CODEOWNERS + review gates; fmt/clippy/test and cargo-audit/deny in CI; SBOM generation; threat model + release criteria (SLOs, ciphersuites, compat policy). + +2) Protocols and Core Hardening +- Cap'n Proto versioning rules + compat tests + golden-wire fixtures. +- Enforce ciphersuite allowlist; downgrade/replay guards; keypackage lifetime/expiry; keystore encryption; structured error taxonomy. +- Wire guardrails: TLS 1.3 only; MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519 only; schema version tags on all Cap'n Proto messages; reject unknown versions; golden captures for auth/envelope/delivery; N-1 compatibility tests. + +3) Auth/Device and Server Hardening +- Account/device schema and storage; signup/login + token/refresh; device bind/revoke; rate limits and size caps; audit events; health/readiness; graceful shutdown/backpressure. +- AuthZ/RBAC hooks on enqueue/fetch keyed to identity/device; session TTLs; lockout/backoff; audit log on auth/device/key events; per-IP/account limits (50 r/s, 5 MB payload cap, 50 conns/IP). + +4) Delivery Semantics and Client Resilience +- Idempotent delivery IDs, ordering/dedup policy, resumable sessions, offline queue with retry/jitter, state persistence; client/server config for port 4201; telemetry hooks. +- First-class 1:1 channels: channel IDs, authz on enqueue/fetch, per-channel retention (7d), keypackage TTL 24h, spam/rate controls, optional history toggle. + +5) E2E Harness and Security Tests +- docker-compose testnet; Rust e2e driver; happy-path flows (register, upload/fetch, create/join/send/recv, resume); negative cases (tamper, replay, downgrade, expired keypackage, oversize, rate limit); compatibility matrix (N-1 clients/servers). + +6) Reliability, Perf, and Operations +- Soak/load tests with thresholds; chaos (loss/latency/reorder); backups/restore drills; staging parity; canary/rollback runbooks; alerting + dashboards. + +## Planning Checklist (before implementation) +- Define release criteria and SLOs: availability, p99 latencies (auth, handshake, enqueue/fetch), error budgets. +- Threat model sign-off: auth/device, transport, MLS lifecycle, storage, abuse/DoS; document mitigations and gaps. +- Protocol policy: allowed ciphersuites, Cap'n Proto versioning rules, backward/forward compatibility guarantees, keypackage lifetime/rotation cadence. +- Identity and auth model: account/device lifecycle, token TTL/refresh, revocation flows, audit requirements. +- Data model decisions: schema for keypackages, delivery queues, audit logs; retention and deletion policy (per-message, per-channel). +- Abuse controls: rate limits (per IP/account/channel), size caps, connection caps, cheap pre-auth drops; defaults and override policy. +- Observability contracts: required metrics/log fields/traces, correlation IDs; dashboards to build; alert thresholds. +- Environments and secrets: how configs are injected (env/secret manager), key rotation plan, no-secrets-in-repo enforcement. +- Testing matrix: target platforms, N-1 compatibility scope, minimum e2e acceptance set, perf thresholds. +- Rollout and ops: staging parity definition, canary/rollback procedure, backup/restore drill cadence, on-call/runbook ownership. diff --git a/crates/quicnprotochat-client/src/main.rs b/crates/quicnprotochat-client/src/main.rs index b05d634..e9046ee 100644 --- a/crates/quicnprotochat-client/src/main.rs +++ b/crates/quicnprotochat-client/src/main.rs @@ -12,7 +12,7 @@ //! //! | Env var | CLI flag | Default | //! |-----------------|--------------|---------------------| -//! | `QUICNPROTOCHAT_SERVER`| `--server` | `127.0.0.1:7000` | +//! | `QUICNPROTOCHAT_SERVER`| `--server` | `127.0.0.1:4201` | //! | `RUST_LOG` | — | `warn` | use std::fs; @@ -66,7 +66,7 @@ enum Command { /// Send a Ping to the server and print the round-trip time. Ping { /// Server address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, }, @@ -76,7 +76,7 @@ enum Command { /// Ed25519 identity public key bytes (hex), which peers need to fetch it. Register { /// Server address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, }, @@ -86,7 +86,7 @@ enum Command { /// hex characters (32 bytes). FetchKey { /// Server address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, /// Target peer's Ed25519 identity public key (64 hex chars = 32 bytes). @@ -96,7 +96,7 @@ enum Command { /// Run a full Alice↔Bob MLS round-trip against live AS and DS endpoints. DemoGroup { /// Server address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, }, @@ -111,7 +111,7 @@ enum Command { state: PathBuf, /// Authentication Service address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, }, @@ -126,7 +126,7 @@ enum Command { state: PathBuf, /// Server address (host:port). - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, /// Group identifier (arbitrary bytes, typically a human-readable name). @@ -142,7 +142,7 @@ enum Command { env = "QUICNPROTOCHAT_STATE" )] state: PathBuf, - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, /// Peer identity public key (64 hex chars = 32 bytes). #[arg(long)] @@ -157,7 +157,7 @@ enum Command { env = "QUICNPROTOCHAT_STATE" )] state: PathBuf, - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, }, @@ -169,7 +169,7 @@ enum Command { env = "QUICNPROTOCHAT_STATE" )] state: PathBuf, - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, /// Recipient identity key (hex, 32 bytes -> 64 chars). #[arg(long)] @@ -187,7 +187,7 @@ enum Command { env = "QUICNPROTOCHAT_STATE" )] state: PathBuf, - #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")] + #[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] server: String, /// Wait for up to this many milliseconds if no messages are queued. diff --git a/crates/quicnprotochat-server/src/main.rs b/crates/quicnprotochat-server/src/main.rs index f1ec544..2c1f8aa 100644 --- a/crates/quicnprotochat-server/src/main.rs +++ b/crates/quicnprotochat-server/src/main.rs @@ -8,7 +8,7 @@ //! # Architecture //! //! ```text -//! QUIC endpoint (7000) +//! QUIC endpoint (4201) //! └─ TLS 1.3 handshake (self-signed by default) //! └─ capnp-rpc VatNetwork (LocalSet, !Send) //! └─ NodeServiceImpl (KeyPackage + Delivery queues) @@ -22,7 +22,7 @@ //! //! | Env var | CLI flag | Default | //! |---------------------|----------------|-----------------| -//! | `QUICNPROTOCHAT_LISTEN` | `--listen` | `0.0.0.0:7000` | +//! | `QUICNPROTOCHAT_LISTEN` | `--listen` | `0.0.0.0:4201` | //! | `RUST_LOG` | — | `info` | use std::{fs, net::SocketAddr, path::PathBuf, sync::Arc, time::Duration}; @@ -37,6 +37,7 @@ use quinn::{Endpoint, ServerConfig}; use quinn_proto::crypto::rustls::QuicServerConfig; use rcgen::generate_simple_self_signed; use rustls::pki_types::{CertificateDer, PrivateKeyDer}; +use rustls::version::TLS13; use sha2::{Digest, Sha256}; use tokio::sync::Notify; use tokio::time::timeout; @@ -45,6 +46,10 @@ use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; mod storage; use storage::{FileBackedStore, StorageError}; +const MAX_PAYLOAD_BYTES: usize = 5 * 1024 * 1024; // 5 MB cap per message +const MAX_KEYPACKAGE_BYTES: usize = 1 * 1024 * 1024; // 1 MB cap per KeyPackage +const CURRENT_WIRE_VERSION: u16 = 1; // allow 0 (legacy) and 1 (current) + // ── CLI ─────────────────────────────────────────────────────────────────────── #[derive(Debug, Parser)] @@ -55,7 +60,7 @@ use storage::{FileBackedStore, StorageError}; )] struct Args { /// QUIC listen address (host:port). - #[arg(long, default_value = "0.0.0.0:7000", env = "QUICNPROTOCHAT_LISTEN")] + #[arg(long, default_value = "0.0.0.0:4201", env = "QUICNPROTOCHAT_LISTEN")] listen: String, /// Directory for persisted server data (KeyPackages + delivery queues). @@ -133,6 +138,12 @@ impl node_service::Server for NodeServiceImpl { "package must not be empty".to_string(), )); } + if package.len() > MAX_KEYPACKAGE_BYTES { + return Promise::err(capnp::Error::failed(format!( + "package exceeds max size ({} bytes)", + MAX_KEYPACKAGE_BYTES + ))); + } let fingerprint: Vec = Sha256::digest(&package).to_vec(); if let Err(e) = self @@ -221,6 +232,8 @@ impl node_service::Server for NodeServiceImpl { Ok(v) => v.to_vec(), Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))), }; + let channel_id = p.get_channel_id().unwrap_or_default().to_vec(); + let version = p.get_version(); if recipient_key.len() != 32 { return Promise::err(capnp::Error::failed(format!( @@ -233,10 +246,22 @@ impl node_service::Server for NodeServiceImpl { "payload must not be empty".to_string(), )); } + if payload.len() > MAX_PAYLOAD_BYTES { + return Promise::err(capnp::Error::failed(format!( + "payload exceeds max size ({} bytes)", + MAX_PAYLOAD_BYTES + ))); + } + if version != 0 && version != CURRENT_WIRE_VERSION { + return Promise::err(capnp::Error::failed(format!( + "unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", + version + ))); + } if let Err(e) = self .store - .enqueue(&recipient_key, payload) + .enqueue(&recipient_key, &channel_id, payload) .map_err(storage_err) { return Promise::err(e); @@ -265,6 +290,17 @@ impl node_service::Server for NodeServiceImpl { }, Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))), }; + let channel_id = params + .get() + .ok() + .and_then(|p| p.get_channel_id().ok()) + .map(|c| c.to_vec()) + .unwrap_or_default(); + let version = params + .get() + .ok() + .map(|p| p.get_version()) + .unwrap_or(0); if recipient_key.len() != 32 { return Promise::err(capnp::Error::failed(format!( @@ -272,8 +308,18 @@ impl node_service::Server for NodeServiceImpl { recipient_key.len() ))); } + if version != 0 && version != CURRENT_WIRE_VERSION { + return Promise::err(capnp::Error::failed(format!( + "unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", + version + ))); + } - let messages = match self.store.fetch(&recipient_key).map_err(storage_err) { + let messages = match self + .store + .fetch(&recipient_key, &channel_id) + .map_err(storage_err) + { Ok(m) => m, Err(e) => return Promise::err(e), }; @@ -306,6 +352,8 @@ impl node_service::Server for NodeServiceImpl { Ok(v) => v.to_vec(), Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))), }; + let channel_id = p.get_channel_id().unwrap_or_default().to_vec(); + let version = p.get_version(); let timeout_ms = p.get_timeout_ms(); if recipient_key.len() != 32 { @@ -314,12 +362,20 @@ impl node_service::Server for NodeServiceImpl { recipient_key.len() ))); } + if version != 0 && version != CURRENT_WIRE_VERSION { + return Promise::err(capnp::Error::failed(format!( + "unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", + version + ))); + } let store = Arc::clone(&self.store); let waiters = self.waiters.clone(); Promise::from_future(async move { - let messages = store.fetch(&recipient_key).map_err(storage_err)?; + let messages = store + .fetch(&recipient_key, &channel_id) + .map_err(storage_err)?; if messages.is_empty() && timeout_ms > 0 { let waiter = waiters @@ -327,7 +383,9 @@ impl node_service::Server for NodeServiceImpl { .or_insert_with(|| Arc::new(Notify::new())) .clone(); let _ = timeout(Duration::from_millis(timeout_ms), waiter.notified()).await; - let msgs = store.fetch(&recipient_key).map_err(storage_err)?; + let msgs = store + .fetch(&recipient_key, &channel_id) + .map_err(storage_err)?; fill_payloads_wait(&mut results, msgs); return Ok(()); } @@ -467,7 +525,7 @@ fn build_server_config(cert_path: &PathBuf, key_path: &PathBuf) -> anyhow::Resul let cert_chain = vec![CertificateDer::from(cert_bytes)]; let key = PrivateKeyDer::try_from(key_bytes).map_err(|_| anyhow::anyhow!("invalid key"))?; - let mut tls = rustls::ServerConfig::builder() + let mut tls = rustls::ServerConfig::builder_with_protocol_versions(&[&TLS13]) .with_no_client_auth() .with_single_cert(cert_chain, key)?; tls.alpn_protocols = vec![b"capnp".to_vec()]; diff --git a/crates/quicnprotochat-server/src/storage.rs b/crates/quicnprotochat-server/src/storage.rs index d8a0643..cd04dd2 100644 --- a/crates/quicnprotochat-server/src/storage.rs +++ b/crates/quicnprotochat-server/src/storage.rs @@ -1,6 +1,7 @@ use std::{ collections::{HashMap, VecDeque}, fs, + hash::{Hash, Hasher}, path::{Path, PathBuf}, sync::Mutex, }; @@ -16,10 +17,28 @@ pub enum StorageError { } #[derive(Serialize, Deserialize, Default)] -struct QueueMap { +struct QueueMapV1 { map: HashMap, VecDeque>>, } +#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)] +pub struct ChannelKey { + pub channel_id: Vec, + pub recipient_key: Vec, +} + +impl Hash for ChannelKey { + fn hash(&self, state: &mut H) { + self.channel_id.hash(state); + self.recipient_key.hash(state); + } +} + +#[derive(Serialize, Deserialize, Default)] +struct QueueMapV2 { + map: HashMap>>, +} + /// File-backed storage for KeyPackages and delivery queues. /// /// Each mutation flushes the entire map to disk. Suitable for MVP-scale loads. @@ -27,7 +46,7 @@ pub struct FileBackedStore { kp_path: PathBuf, ds_path: PathBuf, key_packages: Mutex, VecDeque>>>, - deliveries: Mutex, VecDeque>>>, + deliveries: Mutex>>>, } impl FileBackedStore { @@ -69,25 +88,42 @@ impl FileBackedStore { Ok(package) } - pub fn enqueue(&self, recipient_key: &[u8], payload: Vec) -> Result<(), StorageError> { + pub fn enqueue( + &self, + recipient_key: &[u8], + channel_id: &[u8], + payload: Vec, + ) -> Result<(), StorageError> { let mut map = self.deliveries.lock().unwrap(); - map.entry(recipient_key.to_vec()) + let key = ChannelKey { + channel_id: channel_id.to_vec(), + recipient_key: recipient_key.to_vec(), + }; + map.entry(key) .or_default() .push_back(payload); self.flush_map(&self.ds_path, &*map) } - pub fn fetch(&self, recipient_key: &[u8]) -> Result>, StorageError> { + pub fn fetch( + &self, + recipient_key: &[u8], + channel_id: &[u8], + ) -> Result>, StorageError> { let mut map = self.deliveries.lock().unwrap(); + let key = ChannelKey { + channel_id: channel_id.to_vec(), + recipient_key: recipient_key.to_vec(), + }; let messages = map - .get_mut(recipient_key) + .get_mut(&key) .map(|q| q.drain(..).collect()) .unwrap_or_default(); self.flush_map(&self.ds_path, &*map)?; Ok(messages) } - fn load_map(path: &Path) -> Result, VecDeque>>, StorageError> { + fn load_map(path: &Path) -> Result>>, StorageError> { if !path.exists() { return Ok(HashMap::new()); } @@ -95,16 +131,30 @@ impl FileBackedStore { if bytes.is_empty() { return Ok(HashMap::new()); } - let map: QueueMap = bincode::deserialize(&bytes).map_err(|_| StorageError::Serde)?; - Ok(map.map) + // Try v2 format (channel-aware). Fallback to legacy v1. + if let Ok(map) = bincode::deserialize::(&bytes) { + return Ok(map.map); + } + let legacy: QueueMapV1 = bincode::deserialize(&bytes).map_err(|_| StorageError::Serde)?; + let mut upgraded = HashMap::new(); + for (recipient_key, queue) in legacy.map.into_iter() { + upgraded.insert( + ChannelKey { + channel_id: Vec::new(), + recipient_key, + }, + queue, + ); + } + Ok(upgraded) } fn flush_map( &self, path: &Path, - map: &HashMap, VecDeque>>, + map: &HashMap>>, ) -> Result<(), StorageError> { - let payload = QueueMap { map: map.clone() }; + let payload = QueueMapV2 { map: map.clone() }; let bytes = bincode::serialize(&payload).map_err(|_| StorageError::Serde)?; if let Some(parent) = path.parent() { fs::create_dir_all(parent).map_err(|e| StorageError::Io(e.to_string()))?; diff --git a/design/DM_CHANNELS.md b/design/DM_CHANNELS.md new file mode 100644 index 0000000..ba11fc1 --- /dev/null +++ b/design/DM_CHANNELS.md @@ -0,0 +1,38 @@ +# 1:1 Channel Design (MVP) + +## Goals +- First-class 1:1 channels (DMs) atop NodeService. +- Authz on enqueue/fetch per channel, not just recipient key. +- Privacy: MLS-encrypted payloads; metadata limited to channel ID + participants. +- Retention: 7d message retention; keypackages expire after 24h (configurable later). +- Compatibility: additive schema change with version tagging; N-1 clients can interop if they ignore new fields. + +## Schema changes (Cap'n Proto) +- Add `channelId :Data` (UUID/16B) to enqueue/fetch/fetchWait requests. +- Add `version :UInt16` to NodeService messages (reject unknown > current). +- Keep `recipientKey` for routing; server authz uses `(channelId, caller identity)`. + +## AuthZ model +- Channel membership: exactly two identities (A,B). Server stores membership map `{channelId -> {a_key, b_key}}`. +- Enqueue allowed if caller identity ∈ channel members; fetch/fetchWait allowed only for caller identity. +- Rate limits applied per channel and per identity (50 r/s per IP/identity, 5 MB max payload). + +## Storage model +- New table/map: `channels` with `channelId`, `member_keys[2]`, `created_at`. +- Deliveries keyed by `(channelId, recipient_key)`; queues retain per recipient, per channel. +- Messages carry `received_at` timestamp; TTL eviction at fetch time and background sweep. + +## Flows +- Create channel: caller provides peer identity; server generates channelId, stores membership, returns channelId. +- Send: client includes channelId + recipientKey; server authz + size/TTL checks; enqueue. +- Receive: fetch/fetchWait drains messages for `(channelId, caller_key)`; applies TTL, returns non-expired. + +## Backward compatibility +- Old clients without channelId: server treats channelId=nil as legacy mode (current behavior) for interim. +- Version field allows rejecting future schema changes cleanly. + +## Open items +- Persistence backend: extend FileBackedStore or move to proper DB for channels + TTL metadata. +- API surface: add `createChannel(channelMembers)` RPC or reuse auth service. +- Client UX: map peer identity → channelId discovery; cache channelId in state file. +- Auditing: log channel create, authz failures, send/recv events with redaction. diff --git a/design/TECHNOLOGY_SUGGESTIONS.md b/design/TECHNOLOGY_SUGGESTIONS.md new file mode 100644 index 0000000..4926ca7 --- /dev/null +++ b/design/TECHNOLOGY_SUGGESTIONS.md @@ -0,0 +1,57 @@ +# Technology Suggestions for quicnprotochat + +## Transport & Networking + +- **LibP2P or iroh (from n0)** — Decentralized peer discovery, NAT traversal (hole-punching), and relay fallback. Move beyond client-server to a mesh/hybrid topology where peers can communicate directly when possible. +- **WebTransport (HTTP/3)** — Expose QUIC transport to browsers, enabling a web client without WebSocket degradation. +- **Tor / I2P integration** — Onion-routed transport layer for metadata resistance. MLS protects content, but connection metadata still leaks to the server. + +## Storage & Persistence + +- **SQLCipher or libsql (Turso)** — Encrypted-at-rest SQLite for durable group state, key stores, and message history. +- **CRDT-based sync (Automerge / Yrs)** — Conflict-free replicated data types for multi-device state synchronization without a central authority. +- **Object storage (S3-compatible)** — For encrypted file/media attachments with server-side ignorance of content. + +## Cryptography & Privacy + +- **ML-KEM + ML-DSA hybrid** — Hybrid X25519+ML-KEM-768 KEM for MLS init keys. One of the first post-quantum MLS implementations. +- **Private Information Retrieval (PIR)** — Let clients fetch messages/key packages without revealing which recipient they are (SealPIR / SimplePIR). +- **Sealed Sender (Signal-style)** — Encrypt sender identity inside the MLS ciphertext so the server can't see who sent a message to whom. +- **Key Transparency (RFC draft)** — Verifiable log of public keys to detect server-side key substitution attacks. + +## Identity & Authentication + +- **DID (Decentralized Identifiers)** — Self-sovereign `did:key` or `did:web` identifiers. Portable across servers. +- **OPAQUE (aPAKE)** — Password-authenticated key exchange where the server never sees the password. +- **WebAuthn / Passkeys** — Hardware-backed authentication for device binding (YubiKey, Touch ID, etc.). +- **Verifiable Credentials (W3C VC)** — Prove attributes (org membership, role) without revealing full identity. + +## Application Layer + +- **Matrix-style federation** — Let multiple quicnprotochat servers federate for cross-server communication. +- **WASM plugin system** — Sandboxed WASM plugins for bots, bridges, custom message types. +- **Double-ratchet DM layer** — Signal-style double ratchet (X3DH + Axolotl) for efficient 1:1 conversations. + +## Observability & Operations + +- **OpenTelemetry (tracing + metrics)** — OTLP export for distributed tracing, latency histograms, and dashboards. +- **Prometheus + Grafana** — Metrics on message throughput, MLS epoch advancement rate, queue depths. +- **Testcontainers-rs** — Docker stack in Rust integration tests for true end-to-end CI. + +## Developer Experience + +- **Tauri or Dioxus** — Native cross-platform GUI client in Rust, sharing core crate. +- **uniffi or diplomat** — FFI bindings from Rust core to Swift/Kotlin for mobile clients. +- **Nix flakes** — Reproducible dev environment bundling capnp, Rust toolchain, and test infra. + +--- + +## Top 5 Priority Implementations + +| Priority | Technology | Why | +|----------|-----------|-----| +| 1 | **Post-quantum hybrid KEM** | `ml-kem` already vendored — finishing this makes the project cutting-edge | +| 2 | **SQLCipher persistence** | Unlocks M6, multi-device, and offline usage | +| 3 | **OPAQUE auth** | Zero-knowledge passwords, massive security uplift for auth layer | +| 4 | **iroh / LibP2P** | NAT traversal + optional P2P mesh makes this deployable without central infra | +| 5 | **Sealed Sender + PIR** | Metadata resistance is the frontier — content encryption is table stakes now | diff --git a/docker-compose.yml b/docker-compose.yml index eb94c94..cd57fde 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,14 +4,14 @@ services: context: . dockerfile: docker/Dockerfile ports: - - "7000:7000" + - "4201:4201" environment: RUST_LOG: "info" - QUICNPROTOCHAT_LISTEN: "0.0.0.0:7000" - # Healthcheck: attempt a TCP connection to port 7000. + QUICNPROTOCHAT_LISTEN: "0.0.0.0:4201" + # Healthcheck: attempt a TCP connection to port 4201. # Uses bash /dev/tcp — available in debian:bookworm-slim without extra packages. healthcheck: - test: ["CMD", "bash", "-c", "echo '' > /dev/tcp/localhost/7000"] + test: ["CMD", "bash", "-c", "echo '' > /dev/tcp/localhost/4201"] interval: 5s timeout: 3s retries: 10 diff --git a/schemas/delivery.capnp b/schemas/delivery.capnp index 4057cfb..4b0b06b 100644 --- a/schemas/delivery.capnp +++ b/schemas/delivery.capnp @@ -20,16 +20,21 @@ interface DeliveryService { # recipientKey : Ed25519 public key of the intended recipient (exactly 32 bytes). # payload : Opaque byte string — a TLS-encoded MlsMessageOut blob or any # other framed data the application layer wants to deliver. + # channelId : Optional channel identifier (empty for legacy). A 16-byte UUID + # is recommended for 1:1 channels. + # version : Schema/wire version. Must be 0 (legacy) or 1 (this spec). # # The payload is appended to the recipient's FIFO queue. Returns immediately; # the recipient retrieves it via `fetch`. - enqueue @0 (recipientKey :Data, payload :Data) -> (); + enqueue @0 (recipientKey :Data, payload :Data, channelId :Data, version :UInt16) -> (); # Fetch and atomically drain all queued payloads for a given recipient. # # recipientKey : Ed25519 public key of the caller (exactly 32 bytes). + # channelId : Optional channel identifier (empty for legacy). + # version : Schema/wire version. Must be 0 (legacy) or 1 (this spec). # # Returns the complete queue in FIFO order and clears it. Returns an empty # list if there are no pending messages. - fetch @1 (recipientKey :Data) -> (payloads :List(Data)); + fetch @1 (recipientKey :Data, channelId :Data, version :UInt16) -> (payloads :List(Data)); } diff --git a/schemas/node.capnp b/schemas/node.capnp index b7cc90f..a455f91 100644 --- a/schemas/node.capnp +++ b/schemas/node.capnp @@ -16,13 +16,16 @@ interface NodeService { fetchKeyPackage @1 (identityKey :Data) -> (package :Data); # Enqueue an opaque payload for delivery to a recipient. - enqueue @2 (recipientKey :Data, payload :Data) -> (); + # channelId : Optional channel identifier (empty for legacy). A 16-byte UUID + # is recommended for 1:1 channels. + # version : Schema/wire version. Must be 0 (legacy) or 1 (this spec). + enqueue @2 (recipientKey :Data, payload :Data, channelId :Data, version :UInt16) -> (); # Fetch and drain all queued payloads for the recipient. - fetch @3 (recipientKey :Data) -> (payloads :List(Data)); + fetch @3 (recipientKey :Data, channelId :Data, version :UInt16) -> (payloads :List(Data)); # Long-poll: wait up to timeoutMs for new payloads, then drain queue. - fetchWait @4 (recipientKey :Data, timeoutMs :UInt64) -> (payloads :List(Data)); + fetchWait @4 (recipientKey :Data, channelId :Data, version :UInt16, timeoutMs :UInt64) -> (payloads :List(Data)); # Health probe for readiness/liveness. health @5 () -> (status :Text);