feat: add post-quantum hybrid KEM + SQLCipher persistence

Feature 1 — Post-Quantum Hybrid KEM (X25519 + ML-KEM-768):
- Create hybrid_kem.rs with keygen, encrypt, decrypt + 11 unit tests
- Wire format: version(1) | x25519_eph_pk(32) | mlkem_ct(1088) | nonce(12) | ct
- Add uploadHybridKey/fetchHybridKey RPCs to node.capnp schema
- Server: hybrid key storage in FileBackedStore + RPC handlers
- Client: hybrid keypair in StoredState, auto-wrap/unwrap in send/recv/invite/join
- demo-group runs full hybrid PQ envelope round-trip

Feature 2 — SQLCipher Persistence:
- Extract Store trait from FileBackedStore API
- Create SqlStore (rusqlite + bundled-sqlcipher) with encrypted-at-rest SQLite
- Schema: key_packages, deliveries, hybrid_keys tables with indexes
- Server CLI: --store-backend=sql, --db-path, --db-key flags
- 5 unit tests for SqlStore (FIFO, round-trip, upsert, channel isolation)

Also includes: client lib.rs refactor, auth config, TOML config file support,
mdBook documentation, and various cleanups by user.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 08:07:48 +01:00
parent d1ddef4cea
commit f334ed3d43
81 changed files with 14502 additions and 2289 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
**/*.rs.bk **/*.rs.bk
.vscode/ .vscode/
gitea-mcp.json gitea-mcp.json
docs/book/

210
Cargo.lock generated
View File

@@ -87,6 +87,18 @@ dependencies = [
"subtle", "subtle",
] ]
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]] [[package]]
name = "aho-corasick" name = "aho-corasick"
version = "1.1.4" version = "1.1.4"
@@ -594,7 +606,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"hashbrown", "hashbrown 0.14.5",
"lock_api", "lock_api",
"once_cell", "once_cell",
"parking_lot_core", "parking_lot_core",
@@ -736,6 +748,12 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.3.14" version = "0.3.14"
@@ -746,6 +764,18 @@ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]] [[package]]
name = "fastbloom" name = "fastbloom"
version = "0.14.1" version = "0.14.1"
@@ -959,6 +989,24 @@ name = "hashbrown"
version = "0.14.5" version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
]
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "hashlink"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ba4ff7128dee98c7dc9794b6a411377e1404dba1c97deb8d1a55297bd25d8af"
dependencies = [
"hashbrown 0.14.5",
]
[[package]] [[package]]
name = "heck" name = "heck"
@@ -1030,6 +1078,25 @@ dependencies = [
"x25519-dalek-ng", "x25519-dalek-ng",
] ]
[[package]]
name = "hybrid-array"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2d35805454dc9f8662a98d6d61886ffe26bd465f5960e0e55345c70d5c0d2a9"
dependencies = [
"typenum",
]
[[package]]
name = "indexmap"
version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
dependencies = [
"equivalent",
"hashbrown 0.16.1",
]
[[package]] [[package]]
name = "inout" name = "inout"
version = "0.1.4" version = "0.1.4"
@@ -1083,6 +1150,25 @@ dependencies = [
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "keccak"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb26cec98cce3a3d96cbb7bced3c4b16e3d13f27ec56dbd62cbc8f39cfb9d653"
dependencies = [
"cpufeatures",
]
[[package]]
name = "kem"
version = "0.3.0-pre.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b8645470337db67b01a7f966decf7d0bafedbae74147d33e641c67a91df239f"
dependencies = [
"rand_core 0.6.4",
"zeroize",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.5.0" version = "1.5.0"
@@ -1101,6 +1187,17 @@ version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
[[package]]
name = "libsqlite3-sys"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.14" version = "0.4.14"
@@ -1157,6 +1254,18 @@ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
[[package]]
name = "ml-kem"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de49b3df74c35498c0232031bb7e85f9389f913e2796169c8ab47a53993a18f"
dependencies = [
"hybrid-array",
"kem",
"rand_core 0.6.4",
"sha3",
]
[[package]] [[package]]
name = "nu-ansi-term" name = "nu-ansi-term"
version = "0.50.3" version = "0.50.3"
@@ -1348,6 +1457,12 @@ dependencies = [
"spki", "spki",
] ]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]] [[package]]
name = "poly1305" name = "poly1305"
version = "0.7.2" version = "0.7.2"
@@ -1461,9 +1576,11 @@ dependencies = [
"bincode", "bincode",
"bytes", "bytes",
"capnp", "capnp",
"chacha20poly1305 0.10.1",
"ed25519-dalek 2.2.0", "ed25519-dalek 2.2.0",
"futures", "futures",
"hkdf", "hkdf",
"ml-kem",
"openmls", "openmls",
"openmls_rust_crypto", "openmls_rust_crypto",
"openmls_traits", "openmls_traits",
@@ -1505,12 +1622,14 @@ dependencies = [
"quinn", "quinn",
"quinn-proto", "quinn-proto",
"rcgen", "rcgen",
"rusqlite",
"rustls", "rustls",
"serde", "serde",
"sha2 0.10.9", "sha2 0.10.9",
"thiserror 1.0.69", "thiserror 1.0.69",
"tokio", "tokio",
"tokio-util", "tokio-util",
"toml",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
] ]
@@ -1770,6 +1889,20 @@ dependencies = [
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
[[package]]
name = "rusqlite"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b838eba278d213a8beaf485bd313fd580ca4505a00d5871caeb1457c55322cae"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec",
]
[[package]] [[package]]
name = "rustc-demangle" name = "rustc-demangle"
version = "0.1.27" version = "0.1.27"
@@ -1981,6 +2114,15 @@ dependencies = [
"zmij", "zmij",
] ]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.9.9" version = "0.9.9"
@@ -2005,6 +2147,16 @@ dependencies = [
"digest 0.10.7", "digest 0.10.7",
] ]
[[package]]
name = "sha3"
version = "0.10.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60"
dependencies = [
"digest 0.10.7",
"keccak",
]
[[package]] [[package]]
name = "sharded-slab" name = "sharded-slab"
version = "0.1.7" version = "0.1.7"
@@ -2298,6 +2450,47 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"toml_write",
"winnow",
]
[[package]]
name = "toml_write"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]] [[package]]
name = "tracing" name = "tracing"
version = "0.1.44" version = "0.1.44"
@@ -2410,6 +2603,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.5" version = "0.9.5"
@@ -2748,6 +2947,15 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
[[package]]
name = "winnow"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "wit-bindgen" name = "wit-bindgen"
version = "0.51.0" version = "0.51.0"

View File

@@ -22,9 +22,9 @@ tls_codec = { version = "0.3", features = ["derive"] }
ml-kem = { version = "0.2" } ml-kem = { version = "0.2" }
x25519-dalek = { version = "2", features = ["static_secrets"] } x25519-dalek = { version = "2", features = ["static_secrets"] }
ed25519-dalek = { version = "2", features = ["rand_core"] } ed25519-dalek = { version = "2", features = ["rand_core"] }
snow = { version = "0.9", features = ["default-resolver"] }
sha2 = { version = "0.10" } sha2 = { version = "0.10" }
hkdf = { version = "0.12" } hkdf = { version = "0.12" }
chacha20poly1305 = { version = "0.10" }
zeroize = { version = "1", features = ["derive"] } zeroize = { version = "1", features = ["derive"] }
rand = { version = "0.8" } rand = { version = "0.8" }
serde = { version = "1", features = ["derive"] } serde = { version = "1", features = ["derive"] }
@@ -44,6 +44,9 @@ quinn-proto = { version = "0.11" }
rustls = { version = "0.23", default-features = false, features = ["std"] } rustls = { version = "0.23", default-features = false, features = ["std"] }
rcgen = { version = "0.13" } rcgen = { version = "0.13" }
# ── Database ─────────────────────────────────────────────────────────────
rusqlite = { version = "0.31", features = ["bundled-sqlcipher"] }
# ── Server utilities ────────────────────────────────────────────────────────── # ── Server utilities ──────────────────────────────────────────────────────────
dashmap = { version = "5" } dashmap = { version = "5" }
tracing = { version = "0.1" } tracing = { version = "0.1" }

View File

@@ -1,106 +0,0 @@
# M3 Implementation Status
**Last updated:** 2026-02-20
**Branch:** feat/m1-noise-transport (all milestones on this branch so far)
---
## What is M3?
M3 adds:
1. **Delivery Service (DS)** — store-and-forward relay for MLS messages (Cap'n Proto RPC on the unified NodeService endpoint)
2. **MLS Group Lifecycle**`GroupMember` struct: create group, add member (Welcome), join group, send/receive encrypted application messages
---
## Completed in M3
### `schemas/delivery.capnp` ✅
Simple DS schema: `enqueue(recipientKey, payload)` + `fetch(recipientKey) → List(Data)`.
### `quicnprotochat-proto/build.rs` ✅
Compiles `delivery.capnp` alongside `envelope.capnp` and `auth.capnp`.
### `quicnprotochat-proto/src/lib.rs` ✅
Exposes `pub mod delivery_capnp`.
### `quicnprotochat-core/src/group.rs` ✅ (FULLY FIXED, ALL TESTS PASS)
`GroupMember` struct with methods:
- `new(identity: Arc<IdentityKeypair>) -> Self`
- `generate_key_package() -> Result<Vec<u8>, CoreError>` — TLS-encoded KeyPackage bytes
- `create_group(group_id: &[u8]) -> Result<(), CoreError>`
- `add_member(key_package_bytes: &[u8]) -> Result<(commit_bytes, welcome_bytes), CoreError>`
- `join_group(welcome_bytes: &[u8]) -> Result<(), CoreError>`
- `send_message(plaintext: &[u8]) -> Result<Vec<u8>, CoreError>` — returns TLS-encoded PrivateMessage
- `receive_message(bytes: &[u8]) -> Result<Option<Vec<u8>>, CoreError>` — returns plaintext or None for Commit
- `group_id() -> Option<Vec<u8>>`
- `identity() -> &IdentityKeypair`
**openmls 0.5 API gotchas resolved:**
- `KeyPackage` only has `TlsSerialize`, not `TlsDeserialize` → use `KeyPackageIn::tls_deserialize(...)?.validate(backend.crypto(), ProtocolVersion::Mls10)?`
- `MlsMessageIn::into_welcome()` is `#[cfg(any(feature = "test-utils", test))]` → use `match msg_in.extract() { MlsMessageInBody::Welcome(w) => w, ... }`
- `MlsMessageIn::into_protocol_message()` is similarly feature-gated → use `match msg_in.extract() { MlsMessageInBody::PrivateMessage(m) => ProtocolMessage::PrivateMessage(m), MlsMessageInBody::PublicMessage(m) => ProtocolMessage::PublicMessage(m), ... }`
- `From<MlsMessageIn> for ProtocolMessage` is also feature-gated
- Must use `OpenMlsCryptoProvider` trait in scope for `backend.crypto()`
### `quicnprotochat-core/src/lib.rs` ✅
Exposes `pub use group::GroupMember`.
### `quicnprotochat-server/src/main.rs` ✅
Unified NodeService listener (Auth + Delivery) on one QUIC/TLS endpoint; uses `DashMap<Vec<u8>, VecDeque<Vec<u8>>>` keyed by Ed25519 public key.
### `quicnprotochat-client/src/main.rs` ✅
Added `demo-group` subcommand to exercise the full Alice↔Bob MLS flow against live NodeService (4201): uploads both KeyPackages, delivers Welcome, and exchanges application messages.
### `quicnprotochat-client/tests` ✅
`cargo test -p quicnprotochat-client --tests` passes, including the MLS round-trip integration test.
---
## Notes
Open question (future work): if we need persistent groups instead of ephemeral demo runs, enable openmls `serde` feature and add statefile-backed subcommands (`create-group`, `invite`, `join`, `send`, `recv`). For M3, the demo path is sufficient.
---
## Key Design Decisions
### DS Port (single endpoint)
The server now exposes a **single NodeService** endpoint (default 4201) that combines Authentication and Delivery over one capnp-rpc bootstrap capability.
### GroupMember lifecycle (CRITICAL)
The `OpenMlsRustCrypto` backend holds the HPKE init private key **in memory**. The **same `GroupMember` instance** must be used from `generate_key_package()` through `join_group()`. Do NOT create a new GroupMember between these calls.
### KeyPackage wire format
`GroupMember::generate_key_package()` returns raw TLS-encoded `KeyPackage` bytes (NOT wrapped in `MlsMessageOut`). This is the same format as the standalone `generate_key_package()` function used in M2 tests. The AS stores these raw bytes.
When adding a member, `add_member()` deserializes via `KeyPackageIn::tls_deserialize(...)?.validate(...)`.
---
## Test Results (all passing)
```
test codec::tests::* ... ok (5 tests)
test keypair::tests::* ... ok (3 tests)
test group::tests::two_party_mls_round_trip ... ok
test group::tests::group_id_lifecycle ... ok
```
---
## How to continue tomorrow
```bash
cd /home/c/projects/poc-mes
git log --oneline -5 # see where we are
cargo test -p quicnprotochat-core # verify green
```
Then:
1. Write `crates/quicnprotochat-client/tests/mls_group.rs` (integration test) — highest priority
2. Add group subcommands to `crates/quicnprotochat-client/src/main.rs`
The integration test is the most important piece — it proves the full M3 stack works end-to-end.
For the test, see the pattern in `crates/quicnprotochat-client/tests/auth_service.rs` (M2 test) for how to spin up the server and connect clients.

231
README.md
View File

@@ -8,10 +8,6 @@ and ratcheted group key agreement across any number of participants. Messages
are framed with **Cap'n Proto**, keeping serialisation zero-copy and are framed with **Cap'n Proto**, keeping serialisation zero-copy and
schema-versioned. schema-versioned.
---
## Protocol stack
``` ```
┌─────────────────────────────────────────────┐ ┌─────────────────────────────────────────────┐
│ Application / MLS ciphertext │ <- group key ratchet (RFC 9420) │ Application / MLS ciphertext │ <- group key ratchet (RFC 9420)
@@ -33,165 +29,64 @@ schema-versioned.
--- ---
## Repository layout ## Documentation
``` Full documentation is available as an **mdBook** wiki in [`docs/`](docs/):
quicnprotochat/
├── crates/
│ ├── quicnprotochat-core/ # Crypto primitives, QUIC/TLS client helpers, MLS group state machine
│ │ ├── src/codec.rs # LengthPrefixedCodec — Tokio Encoder + Decoder
│ │ ├── src/keypair.rs # Transport key helpers (X25519, zeroize-on-drop)
│ │ ├── src/identity.rs # IdentityKeypair — Ed25519 identity + MLS Signer
│ │ ├── src/keypackage.rs# generate_key_package — standalone KeyPackage helper
│ │ └── src/group.rs # GroupMember — full MLS group lifecycle
│ │
│ ├── quicnprotochat-proto/ # Cap'n Proto schemas + generated types + serde helpers
│ │ └── schemas/ → # (symlinked to workspace root schemas/)
│ │
│ ├── quicnprotochat-server/ # Authentication Service (AS) + Delivery Service (DS) binary
│ └── quicnprotochat-client/ # CLI client (ping, register, fetch-key, …)
└── schemas/
├── envelope.capnp # Top-level wire envelope (MsgType discriminant + payload)
├── auth.capnp # AuthenticationService RPC (KeyPackage upload / fetch)
└── delivery.capnp # DeliveryService RPC (enqueue / fetch MLS messages)
```
---
## Services
### Node Service (Auth + Delivery) — port 4201
Single QUIC + TLS 1.3 endpoint exposing Cap'n Proto `NodeService` that combines
Authentication (KeyPackage upload/fetch) and Delivery (enqueue/fetch) operations.
```
uploadKeyPackage(identityKey: Data, package: Data) -> (fingerprint: Data)
fetchKeyPackage(identityKey: Data) -> (package: Data)
```
Packages are indexed by the raw Ed25519 public key (32 bytes) and consumed
exactly once on fetch, matching the MLS single-use KeyPackage requirement.
A simple store-and-forward relay for MLS messages. The server never inspects
payloads — it routes opaque blobs by recipient public key.
```
enqueue(recipientKey: Data, payload: Data) -> ()
fetch(recipientKey: Data) -> (payloads: List(Data))
```
`fetch` atomically drains the entire queue in FIFO order.
---
## MLS group lifecycle
```
GroupMember::new(identity)
├─ generate_key_package() → upload bytes to AS
├─ create_group(group_id) → epoch 0, sole member
│ └─ add_member(kp_bytes)→ (commit_bytes, welcome_bytes)
│ ↑ │ │
│ fetched from AS discard send to joiner via DS
└─ join_group(welcome_bytes) → joined; ready to encrypt
├─ send_message(plain) → TLS-encoded PrivateMessage → DS
└─ receive_message(ct) → Some(plaintext) | None (Commit)
```
The `OpenMlsRustCrypto` backend is **persistent across calls** on the same
`GroupMember` instance — it holds the HPKE init private key in its in-memory
key store between `generate_key_package` and `join_group`.
---
## Building
**Prerequisites:**
- Rust (stable, 1.77+)
- `capnp` CLI — the Cap'n Proto schema compiler
```bash ```bash
# Debian / Ubuntu # Install mdBook (once)
apt-get install capnproto cargo install mdbook
# macOS # Build and serve locally
brew install capnp mdbook serve docs
# Open http://localhost:3000
``` ```
**Build everything:** ### Highlights
- **[Architecture Overview](docs/src/architecture/overview.md)** — Two-service model, dual-key design, crate layout
- **[Protocol Deep Dives](docs/src/protocol-layers/overview.md)** — QUIC/TLS, Noise_XX, Cap'n Proto, MLS, Hybrid KEM
- **[Cryptographic Properties](docs/src/cryptography/overview.md)** — Forward secrecy, post-compromise security, PQ readiness, threat model
- **[Design Rationale](docs/src/design-rationale/overview.md)** — Why MLS over Signal/Matrix, ADRs for all key decisions
- **[Wire Format Reference](docs/src/wire-format/overview.md)** — Annotated Cap'n Proto schemas
- **[Getting Started](docs/src/getting-started/prerequisites.md)** — Build, run, demo walkthrough
- **[Roadmap](docs/src/roadmap/milestones.md)** — Milestones, production readiness, future research
---
## Quick start
```bash ```bash
# Prerequisites: Rust 1.77+, capnp CLI
brew install capnp # macOS
# apt-get install capnproto # Debian/Ubuntu
# Build and test
cargo build --workspace cargo build --workspace
```
**Run tests:**
```bash
cargo test --workspace cargo test --workspace
```
--- # Start the server (port 7000 by default)
## Running
**Start the server** (NodeService on :4201):
```bash
cargo run -p quicnprotochat-server cargo run -p quicnprotochat-server
# or with a custom port:
cargo run -p quicnprotochat-server -- --listen 0.0.0.0:4201
```
Current TLS defaults (development): self-signed cert/key written to `data/` if # Or via a config file (TOML)
missing. Override via CLI flags or env vars: cat > quicnprotochat-server.toml <<'EOF'
listen = "0.0.0.0:7000"
data_dir = "data"
tls_cert = "data/server-cert.der"
tls_key = "data/server-key.der"
auth_token = "devtoken"
store_backend = "file" # or "sql"
db_path = "data/quicnprotochat.db"
db_key = ""
EOF
cargo run -p quicnprotochat-server -- --config quicnprotochat-server.toml
| Purpose | Flag | Env var | Default | # Run the Alice/Bob demo
|---|---|---|---|
| Listen address | `--listen` | `QUICNPROTOCHAT_LISTEN` | `0.0.0.0:4201` |
| TLS cert (DER) | `--tls-cert` | `QUICNPROTOCHAT_TLS_CERT` | `data/server-cert.der` |
| TLS key (DER) | `--tls-key` | `QUICNPROTOCHAT_TLS_KEY` | `data/server-key.der` |
**Client commands:**
```bash
# Check connectivity
cargo run -p quicnprotochat-client -- ping
# Generate a fresh identity + KeyPackage, upload to AS
# Prints your identity_key (hex) — share this with peers
cargo run -p quicnprotochat-client -- register
# Fetch a peer's KeyPackage (they must have registered first)
cargo run -p quicnprotochat-client -- fetch-key <64-hex-char identity key>
# Run an end-to-end Alice↔Bob demo against live AS + DS
cargo run -p quicnprotochat-client -- demo-group \ cargo run -p quicnprotochat-client -- demo-group \
--server 127.0.0.1:4201 \ --server 127.0.0.1:7000 --ds-server 127.0.0.1:7000
--ds-server 127.0.0.1:4201
# Persistent group CLI (stateful)
cargo run -p quicnprotochat-client -- register-state --state state.bin --server 127.0.0.1:4201
cargo run -p quicnprotochat-client -- create-group --state state.bin --group-id my-group
cargo run -p quicnprotochat-client -- invite --state state.bin --peer-key <peer hex> --server 127.0.0.1:4201 --ds-server 127.0.0.1:4201
cargo run -p quicnprotochat-client -- join --state state.bin --ds-server 127.0.0.1:4201
cargo run -p quicnprotochat-client -- send --state state.bin --peer-key <peer hex> --msg "hello" --ds-server 127.0.0.1:4201
cargo run -p quicnprotochat-client -- recv --state state.bin --ds-server 127.0.0.1:4201
``` ```
Server address defaults to `127.0.0.1:4201`; override with `--server` or See the [full demo walkthrough](docs/src/getting-started/demo-walkthrough.md) for a step-by-step guide.
`QUICNPROTOCHAT_SERVER`. The same endpoint serves both Authentication and
Delivery.
State file notes: the persisted state stores your identity and MLS group state
after you have joined. If you generate a KeyPackage (`register-state`) and then
restart before consuming the Welcome, the join may fail because the HPKE init
key is not retained; run join in the same session you register.
--- ---
@@ -199,45 +94,21 @@ key is not retained; run join in the same session you register.
| # | Name | Status | What it adds | | # | Name | Status | What it adds |
|---|------|--------|--------------| |---|------|--------|--------------|
| M1 | QUIC/TLS transport | | QUIC + TLS 1.3 endpoint, length-prefixed framing, Ping/Pong | | M1 | QUIC/TLS transport | Done | QUIC + TLS 1.3 endpoint, length-prefixed framing, Ping/Pong |
| M2 | Authentication Service | | Ed25519 identity, KeyPackage generation, AS upload/fetch | | M2 | Authentication Service | Done | Ed25519 identity, KeyPackage generation, AS upload/fetch |
| M3 | Delivery Service + MLS groups | | DS relay, `GroupMember` create/join/add/send/recv | | M3 | Delivery Service + MLS groups | Done | DS relay, `GroupMember` create/join/add/send/recv |
| M4 | Group CLI subcommands | 🔜 | Persistent CLI (`create-group`, `invite`, `join`, `send`, `recv`); demo-group already available | | M4 | Group CLI subcommands | Next | Persistent CLI (`create-group`, `invite`, `join`, `send`, `recv`) |
| M5 | Multi-party groups | 🔜 | N > 2 members, Commit fan-out, Proposal handling | | M5 | Multi-party groups | Planned | N > 2 members, Commit fan-out, Proposal handling |
| M6 | Persistence | 🔜 | SQLite key store, durable group state | | M6 | Persistence | Planned | SQLite key store, durable group state |
| M7 | Post-quantum | 🔜 | PQ hybrid for MLS/HPKE | | M7 | Post-quantum | Planned | PQ hybrid for MLS/HPKE (X25519 + ML-KEM-768) |
---
## Production hardening roadmap (high level)
1) **Transport & identity**: ACME/Lets Encrypt, pinned identities, TLS policy
hardening, server identity via CA.
2) **Persistence**: Move AS/DS and MLS state to Postgres; encrypted at rest;
retention/TTL and migrations.
3) **AuthZ & accounts**: User/device accounts (OIDC/passwordless), device
binding, revocation/recovery; bind MLS credentials to issued identities.
4) **Delivery semantics**: Message IDs, idempotent enqueue/fetch, ordering per
conversation, backpressure/retries; attachment pipeline via encrypted
object storage.
5) **Observability & ops**: Structured logs with correlation IDs; Prometheus
metrics; tracing; alerting + SLOs; audit logs for auth/key events.
6) **Client resilience**: Reconnect/resume, offline queue, multi-device key
handling; key verification UX (QR/safety numbers); recovery flows.
7) **Security & compliance**: Dependency audits, fuzzing, SAST/DAST, pentest;
SBOM/signed releases; PII minimization and retention controls.
--- ---
## Security notes ## Security notes
- This is a **proof-of-concept**. It has not been audited. This is a **proof-of-concept research project**. It has not been audited.
- The server uses a self-signed TLS cert by default; clients trust it via a See the [threat model](docs/src/cryptography/threat-model.md) for a detailed
local DER file. No pinning or CA-based identity is enforced yet. analysis of what is and isn't protected.
- MLS credentials use `CredentialType::Basic` (public key only). A real
deployment would bind credentials to a certificate authority.
- The Delivery operation does no authentication of the `recipientKey` field —
anyone can enqueue for any recipient. Access control is a future milestone.
--- ---

View File

@@ -1,57 +0,0 @@
# Production Readiness Work Breakdown
## Feature Scope (must-have)
- Identity and Auth: account/device model, signup/login, short-lived tokens + refresh, device binding/revocation, rate limits, audit events.
- Key and MLS Lifecycle: keypackage create/rotate/expire, add/remove member, epoch advance, replay/downgrade protection, external commits, keystore encryption at rest.
- Transport and Delivery: QUIC/TLS endpoint on 4201, health/readiness, ordering and dedup policy, idempotent delivery IDs, backpressure, resumable sessions, payload size caps.
- Private 1:1 Channels: first-class DM abstraction (channel IDs), authz on enqueue/fetch, per-channel history/retention policy, same MLS encryption with pairwise groups, spam/rate controls.
- Storage and Persistence: durable queues and keypackages, migrations and schema versioning, integrity checksums, backup/restore playbook.
- Observability and Ops: structured logs with correlation IDs, metrics (auth latency, handshake success, delivery lag, queue depth), traces across auth→delivery→storage, alerting/SLO dashboards.
- Client Resilience and UX: offline queue with retry/jitter, reconnect/resume, state persistence, basic key verification surface, compatibility handling for server upgrades.
- Compatibility and Protocols: Cap'n Proto schema versioning rules, golden-wire fixtures, N-1 client/server matrix tests, ciphersuite allowlist.
## Security Plan (by design)
- Governance: CODEOWNERS on crypto/proto/auth paths; required review; cargo-audit/deny + SBOM in CI; threat model maintained per release.
- Transport Policy: TLS 1.3 strict ciphers, mTLS option, pinned server identity, downgrade detection; QUIC rate limits/connection caps.
- MLS Policy: enforce lifetime/usage on keypackages, replay/downgrade checks, epoch monotonicity, credential validation.
- Input Validation: strict length/type checks on all RPC inputs; reject oversize or malformed payloads; explicit error mapping with no panics on untrusted data.
- Secrets: config via env/secret manager only; no secrets in repo/images; rotation hooks; memory zeroize where feasible.
- Abuse/DoS Controls: per-IP/account rate limits, request/body size caps, cheap pre-auth drops, bounded queues/backpressure.
- Data Protection: encryption at rest for keystore/state; backups with integrity verification; deletion/retention policies.
- Logging Safety: redaction of secrets/PII; correlation IDs; audit log for auth/device/key events; access-controlled log sinks.
- Testing: unit/prop tests for codecs/crypto/state machines; integration tests for auth/storage; e2e security cases (tamper/replay/downgrade/expiry); fuzzing targets for parsers; periodic pentest.
## Work Breakdown (phased)
1) Baselines and Governance
- CODEOWNERS + review gates; fmt/clippy/test and cargo-audit/deny in CI; SBOM generation; threat model + release criteria (SLOs, ciphersuites, compat policy).
2) Protocols and Core Hardening
- Cap'n Proto versioning rules + compat tests + golden-wire fixtures.
- Enforce ciphersuite allowlist; downgrade/replay guards; keypackage lifetime/expiry; keystore encryption; structured error taxonomy.
- Wire guardrails: TLS 1.3 only; MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519 only; schema version tags on all Cap'n Proto messages; reject unknown versions; golden captures for auth/envelope/delivery; N-1 compatibility tests.
3) Auth/Device and Server Hardening
- Account/device schema and storage; signup/login + token/refresh; device bind/revoke; rate limits and size caps; audit events; health/readiness; graceful shutdown/backpressure.
- AuthZ/RBAC hooks on enqueue/fetch keyed to identity/device; session TTLs; lockout/backoff; audit log on auth/device/key events; per-IP/account limits (50 r/s, 5 MB payload cap, 50 conns/IP).
4) Delivery Semantics and Client Resilience
- Idempotent delivery IDs, ordering/dedup policy, resumable sessions, offline queue with retry/jitter, state persistence; client/server config for port 4201; telemetry hooks.
- First-class 1:1 channels: channel IDs, authz on enqueue/fetch, per-channel retention (7d), keypackage TTL 24h, spam/rate controls, optional history toggle.
5) E2E Harness and Security Tests
- docker-compose testnet; Rust e2e driver; happy-path flows (register, upload/fetch, create/join/send/recv, resume); negative cases (tamper, replay, downgrade, expired keypackage, oversize, rate limit); compatibility matrix (N-1 clients/servers).
6) Reliability, Perf, and Operations
- Soak/load tests with thresholds; chaos (loss/latency/reorder); backups/restore drills; staging parity; canary/rollback runbooks; alerting + dashboards.
## Planning Checklist (before implementation)
- Define release criteria and SLOs: availability, p99 latencies (auth, handshake, enqueue/fetch), error budgets.
- Threat model sign-off: auth/device, transport, MLS lifecycle, storage, abuse/DoS; document mitigations and gaps.
- Protocol policy: allowed ciphersuites, Cap'n Proto versioning rules, backward/forward compatibility guarantees, keypackage lifetime/rotation cadence.
- Identity and auth model: account/device lifecycle, token TTL/refresh, revocation flows, audit requirements.
- Data model decisions: schema for keypackages, delivery queues, audit logs; retention and deletion policy (per-message, per-channel).
- Abuse controls: rate limits (per IP/account/channel), size caps, connection caps, cheap pre-auth drops; defaults and override policy.
- Observability contracts: required metrics/log fields/traces, correlation IDs; dashboards to build; alert thresholds.
- Environments and secrets: how configs are injected (env/secret manager), key rotation plan, no-secrets-in-repo enforcement.
- Testing matrix: target platforms, N-1 compatibility scope, minimum e2e acceptance set, perf thresholds.
- Rollout and ops: staging parity definition, canary/rollback procedure, backup/restore drill cadence, on-call/runbook ownership.

View File

@@ -0,0 +1,971 @@
use std::fs;
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock};
use anyhow::Context;
use capnp_rpc::{rpc_twoparty_capnp::Side, twoparty, RpcSystem};
use serde::{Deserialize, Serialize};
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
use quinn::{ClientConfig, Endpoint};
use quinn_proto::crypto::rustls::QuicClientConfig;
use rustls::pki_types::CertificateDer;
use rustls::{ClientConfig as RustlsClientConfig, RootCertStore};
use quicnprotochat_core::{
generate_key_package, hybrid_decrypt, hybrid_encrypt, DiskKeyStore, GroupMember,
HybridKeypair, HybridKeypairBytes, HybridPublicKey, IdentityKeypair,
};
use quicnprotochat_proto::node_capnp::{auth, node_service};
// Global auth context initialized once per process.
static AUTH_CONTEXT: OnceLock<ClientAuth> = OnceLock::new();
#[derive(Clone, Debug)]
pub struct ClientAuth {
version: u16,
access_token: Vec<u8>,
device_id: Vec<u8>,
}
impl ClientAuth {
/// Build a client auth context from optional token and device id.
/// Requires a non-empty token; we run version=1 only (no legacy mode).
pub fn from_parts(access_token: String, device_id: Option<String>) -> Self {
let token = access_token.into_bytes();
let device = device_id.unwrap_or_default().into_bytes();
Self {
version: 1,
access_token: token,
device_id: device,
}
}
}
/// Initialize the global auth context; subsequent calls are ignored.
pub fn init_auth(ctx: ClientAuth) {
let _ = AUTH_CONTEXT.set(ctx);
}
// ── Subcommand implementations ───────────────────────────────────────────────
/// Connect to `server`, call health, and print RTT over QUIC/TLS.
pub async fn cmd_ping(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
let sent_at = current_timestamp_ms();
let client = connect_node(server, ca_cert, server_name).await?;
let req = client.health_request();
let resp = req.send().promise.await.context("health RPC failed")?;
let status = resp
.get()
.context("health: bad response")?
.get_status()
.context("health: missing status")?
.to_str()
.unwrap_or("invalid");
let rtt_ms = current_timestamp_ms().saturating_sub(sent_at);
println!("health={status} rtt={rtt_ms}ms");
Ok(())
}
/// Generate a KeyPackage for a fresh identity and upload it to the AS.
///
/// Must run on a `LocalSet` because capnp-rpc is `!Send`.
pub async fn cmd_register(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
let identity = IdentityKeypair::generate();
let (tls_bytes, fingerprint) =
generate_key_package(&identity).context("KeyPackage generation failed")?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.upload_key_package_request();
{
let mut p = req.get();
p.set_identity_key(&identity.public_key_bytes());
p.set_package(&tls_bytes);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let response = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = response
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
anyhow::ensure!(
server_fp == fingerprint,
"fingerprint mismatch: local={} server={}",
hex::encode(&fingerprint),
hex::encode(&server_fp),
);
println!(
"identity_key : {}",
hex::encode(identity.public_key_bytes())
);
println!("fingerprint : {}", hex::encode(&fingerprint));
println!("KeyPackage uploaded successfully.");
Ok(())
}
/// Upload the stored identity's KeyPackage to the AS (persists backend state).
pub async fn cmd_register_state(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<()> {
let state = load_or_init_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
let tls_bytes = member
.generate_key_package()
.context("KeyPackage generation failed")?;
let fingerprint = sha256(&tls_bytes);
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.upload_key_package_request();
{
let mut p = req.get();
p.set_identity_key(&member.identity().public_key_bytes());
p.set_package(&tls_bytes);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let response = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = response
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
anyhow::ensure!(server_fp == fingerprint, "fingerprint mismatch");
// Upload hybrid public key alongside the KeyPackage.
if let Some(ref hkp) = hybrid_kp {
upload_hybrid_key(
&node_client,
&member.identity().public_key_bytes(),
&hkp.public_key(),
)
.await?;
println!("hybrid_key : uploaded (X25519 + ML-KEM-768)");
}
println!(
"identity_key : {}",
hex::encode(member.identity().public_key_bytes())
);
println!("fingerprint : {}", hex::encode(&fingerprint));
println!("KeyPackage uploaded successfully.");
save_state(state_path, &member, hybrid_kp.as_ref())?;
Ok(())
}
/// Fetch a peer's KeyPackage from the AS by their hex-encoded identity key.
///
/// Must run on a `LocalSet` because capnp-rpc is `!Send`.
pub async fn cmd_fetch_key(
server: &str,
ca_cert: &Path,
server_name: &str,
identity_key_hex: &str,
) -> anyhow::Result<()> {
let identity_key = hex::decode(identity_key_hex)
.map_err(|e| anyhow::anyhow!(e))
.context("identity_key must be 64 hex characters (32 bytes)")?;
anyhow::ensure!(
identity_key.len() == 32,
"identity_key must be exactly 32 bytes, got {}",
identity_key.len()
);
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.fetch_key_package_request();
{
let mut p = req.get();
p.set_identity_key(&identity_key);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let response = req
.send()
.promise
.await
.context("fetch_key_package RPC failed")?;
let package = response
.get()
.context("fetch_key_package: bad response")?
.get_package()
.context("fetch_key_package: missing package field")?
.to_vec();
if package.is_empty() {
println!("No KeyPackage available for this identity.");
return Ok(());
}
use sha2::{Digest, Sha256};
let fingerprint = Sha256::digest(&package);
println!("fingerprint : {}", hex::encode(fingerprint));
println!("package_len : {} bytes", package.len());
println!("KeyPackage fetched successfully.");
Ok(())
}
/// Run a complete Alice↔Bob MLS round-trip using the unified server endpoint.
///
/// All payloads are wrapped in post-quantum hybrid envelopes (X25519 + ML-KEM-768).
pub async fn cmd_demo_group(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
// Identities and MLS state must be tied to the same backend instance.
let alice_id = Arc::new(IdentityKeypair::generate());
let bob_id = Arc::new(IdentityKeypair::generate());
// Generate hybrid keypairs for both participants.
let alice_hybrid = HybridKeypair::generate();
let bob_hybrid = HybridKeypair::generate();
let mut alice = GroupMember::new(Arc::clone(&alice_id));
let mut bob = GroupMember::new(Arc::clone(&bob_id));
let alice_kp = alice
.generate_key_package()
.context("Alice KeyPackage generation failed")?;
let bob_kp = bob
.generate_key_package()
.context("Bob KeyPackage generation failed")?;
// Upload both KeyPackages and hybrid public keys to the server.
let alice_node = connect_node(server, ca_cert, server_name).await?;
let bob_node = connect_node(server, ca_cert, server_name).await?;
upload_key_package(&alice_node, &alice_id.public_key_bytes(), &alice_kp).await?;
upload_key_package(&bob_node, &bob_id.public_key_bytes(), &bob_kp).await?;
upload_hybrid_key(
&alice_node,
&alice_id.public_key_bytes(),
&alice_hybrid.public_key(),
)
.await?;
upload_hybrid_key(
&bob_node,
&bob_id.public_key_bytes(),
&bob_hybrid.public_key(),
)
.await?;
println!("hybrid public keys uploaded for Alice and Bob");
// Alice fetches Bob's KeyPackage and creates the group.
let fetched_bob_kp = fetch_key_package(&alice_node, &bob_id.public_key_bytes()).await?;
anyhow::ensure!(
!fetched_bob_kp.is_empty(),
"AS returned an empty KeyPackage for Bob",
);
alice
.create_group(b"demo-group")
.context("Alice create_group failed")?;
let (_commit, welcome) = alice
.add_member(&fetched_bob_kp)
.context("Alice add_member failed")?;
let alice_ds = alice_node.clone();
let bob_ds = bob_node.clone();
// Fetch Bob's hybrid PK and wrap the welcome.
let bob_hybrid_pk = fetch_hybrid_key(&alice_node, &bob_id.public_key_bytes())
.await?
.context("Bob hybrid key not found")?;
let wrapped_welcome =
hybrid_encrypt(&bob_hybrid_pk, &welcome).context("hybrid encrypt welcome")?;
enqueue(&alice_ds, &bob_id.public_key_bytes(), &wrapped_welcome).await?;
let welcome_payloads = fetch_all(&bob_ds, &bob_id.public_key_bytes()).await?;
let raw_welcome = welcome_payloads
.first()
.cloned()
.context("Welcome was not delivered to Bob via DS")?;
// Bob unwraps the hybrid envelope and joins the group.
let welcome_bytes = hybrid_decrypt(&bob_hybrid, &raw_welcome)
.context("Bob: hybrid decrypt welcome failed")?;
bob.join_group(&welcome_bytes)
.context("Bob join_group failed")?;
// Alice → Bob (hybrid-wrapped)
let ct_ab = alice
.send_message(b"hello bob")
.context("Alice send_message failed")?;
let wrapped_ab =
hybrid_encrypt(&bob_hybrid_pk, &ct_ab).context("hybrid encrypt Alice→Bob")?;
enqueue(&alice_ds, &bob_id.public_key_bytes(), &wrapped_ab).await?;
let bob_msgs = fetch_all(&bob_ds, &bob_id.public_key_bytes()).await?;
let raw_ab = bob_msgs
.first()
.context("Bob: missing Alice ciphertext from DS")?;
let inner_ab = hybrid_decrypt(&bob_hybrid, raw_ab).context("Bob: hybrid decrypt failed")?;
let ab_plaintext = bob
.receive_message(&inner_ab)?
.context("Bob expected application message from Alice")?;
println!(
"Alice → Bob plaintext: {}",
String::from_utf8_lossy(&ab_plaintext)
);
// Bob → Alice (hybrid-wrapped)
let alice_hybrid_pk = fetch_hybrid_key(&bob_node, &alice_id.public_key_bytes())
.await?
.context("Alice hybrid key not found")?;
let ct_ba = bob
.send_message(b"hello alice")
.context("Bob send_message failed")?;
let wrapped_ba =
hybrid_encrypt(&alice_hybrid_pk, &ct_ba).context("hybrid encrypt Bob→Alice")?;
enqueue(&bob_ds, &alice_id.public_key_bytes(), &wrapped_ba).await?;
let alice_msgs = fetch_all(&alice_ds, &alice_id.public_key_bytes()).await?;
let raw_ba = alice_msgs
.first()
.context("Alice: missing Bob ciphertext from DS")?;
let inner_ba =
hybrid_decrypt(&alice_hybrid, raw_ba).context("Alice: hybrid decrypt failed")?;
let ba_plaintext = alice
.receive_message(&inner_ba)?
.context("Alice expected application message from Bob")?;
println!(
"Bob → Alice plaintext: {}",
String::from_utf8_lossy(&ba_plaintext)
);
println!("demo-group complete (hybrid PQ envelope active)");
Ok(())
}
/// Create a new group and persist state.
pub async fn cmd_create_group(
state_path: &Path,
_server: &str,
group_id: &str,
) -> anyhow::Result<()> {
let state = load_or_init_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
anyhow::ensure!(
member.group_ref().is_none(),
"group already exists in state"
);
member
.create_group(group_id.as_bytes())
.context("create_group failed")?;
save_state(state_path, &member, hybrid_kp.as_ref())?;
println!("group created: {group_id}");
Ok(())
}
/// Invite a peer: fetch their KeyPackage, add to group, enqueue Welcome.
///
/// If the peer has a hybrid public key on the server, the Welcome is wrapped
/// in a post-quantum hybrid envelope (X25519 + ML-KEM-768).
pub async fn cmd_invite(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
peer_key_hex: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
let peer_key = decode_identity_key(peer_key_hex)?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let peer_kp = fetch_key_package(&node_client, &peer_key).await?;
anyhow::ensure!(
!peer_kp.is_empty(),
"server returned empty KeyPackage for peer"
);
let _ = member
.group_ref()
.context("no active group; run create-group first")?;
let (_, welcome) = member.add_member(&peer_kp).context("add_member failed")?;
// Wrap welcome in hybrid envelope if peer has a hybrid public key.
let peer_hybrid_pk = fetch_hybrid_key(&node_client, &peer_key).await?;
let payload = if let Some(ref pk) = peer_hybrid_pk {
hybrid_encrypt(pk, &welcome).context("hybrid encrypt welcome failed")?
} else {
welcome
};
enqueue(&node_client, &peer_key, &payload).await?;
save_state(state_path, &member, hybrid_kp.as_ref())?;
println!(
"invited peer (welcome queued{})",
if peer_hybrid_pk.is_some() { ", hybrid-encrypted" } else { "" }
);
Ok(())
}
/// Join a group by consuming a Welcome from the server queue.
///
/// Automatically detects and decrypts hybrid-wrapped Welcomes.
pub async fn cmd_join(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
anyhow::ensure!(
member.group_ref().is_none(),
"group already active in state"
);
let node_client = connect_node(server, ca_cert, server_name).await?;
let welcomes = fetch_all(&node_client, &member.identity().public_key_bytes()).await?;
let raw_welcome = welcomes
.first()
.cloned()
.context("no Welcome found in DS for this identity")?;
// Try hybrid decryption first, fall back to raw MLS welcome.
let welcome_bytes = try_hybrid_unwrap(hybrid_kp.as_ref(), &raw_welcome);
member
.join_group(&welcome_bytes)
.context("join_group failed")?;
save_state(state_path, &member, hybrid_kp.as_ref())?;
println!("joined group successfully");
Ok(())
}
/// Send an application message via DS.
///
/// If the peer has a hybrid public key, the MLS ciphertext is additionally
/// wrapped in a post-quantum hybrid envelope.
pub async fn cmd_send(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
peer_key_hex: &str,
msg: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
let peer_key = decode_identity_key(peer_key_hex)?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let ct = member
.send_message(msg.as_bytes())
.context("send_message failed")?;
// Wrap in hybrid envelope if peer has a hybrid public key.
let peer_hybrid_pk = fetch_hybrid_key(&node_client, &peer_key).await?;
let payload = if let Some(ref pk) = peer_hybrid_pk {
hybrid_encrypt(pk, &ct).context("hybrid encrypt failed")?
} else {
ct
};
enqueue(&node_client, &peer_key, &payload).await?;
save_state(state_path, &member, hybrid_kp.as_ref())?;
println!(
"message sent{}",
if peer_hybrid_pk.is_some() { " (hybrid-encrypted)" } else { "" }
);
Ok(())
}
/// Receive and decrypt all pending messages from the server.
///
/// Automatically detects and decrypts hybrid-wrapped payloads.
pub async fn cmd_recv(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
wait_ms: u64,
stream: bool,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let (mut member, hybrid_kp) = state.into_parts(state_path)?;
let client = connect_node(server, ca_cert, server_name).await?;
loop {
let payloads = fetch_wait(&client, &member.identity().public_key_bytes(), wait_ms).await?;
if payloads.is_empty() {
if !stream {
println!("no messages");
return Ok(());
}
continue;
}
for (idx, payload) in payloads.iter().enumerate() {
// Try hybrid decryption, fall back to raw MLS payload.
let mls_payload = try_hybrid_unwrap(hybrid_kp.as_ref(), payload);
match member.receive_message(&mls_payload) {
Ok(Some(pt)) => println!("[{idx}] plaintext: {}", String::from_utf8_lossy(&pt)),
Ok(None) => println!("[{idx}] commit applied"),
Err(e) => println!("[{idx}] error: {e}"),
}
}
save_state(state_path, &member, hybrid_kp.as_ref())?;
if !stream {
return Ok(());
}
}
}
// ── Shared helpers ───────────────────────────────────────────────────────────
/// Establish a QUIC/TLS connection and return a `NodeService` client.
///
/// Must be called from within a `LocalSet` because capnp-rpc is `!Send`.
pub async fn connect_node(
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<node_service::Client> {
let addr: SocketAddr = server
.parse()
.with_context(|| format!("server must be host:port, got {server}"))?;
let cert_bytes = fs::read(ca_cert).with_context(|| format!("read ca_cert {ca_cert:?}"))?;
let mut roots = RootCertStore::empty();
roots
.add(CertificateDer::from(cert_bytes))
.context("add root cert")?;
let tls = RustlsClientConfig::builder()
.with_root_certificates(roots)
.with_no_client_auth();
let crypto = QuicClientConfig::try_from(tls)
.map_err(|e| anyhow::anyhow!("invalid client TLS config: {e}"))?;
let mut endpoint = Endpoint::client("0.0.0.0:0".parse().unwrap())?;
endpoint.set_default_client_config(ClientConfig::new(Arc::new(crypto)));
let connection = endpoint
.connect(addr, server_name)
.context("quic connect init")?
.await
.context("quic connect failed")?;
let (send, recv) = connection.open_bi().await.context("open bi stream")?;
let network = twoparty::VatNetwork::new(
recv.compat(),
send.compat_write(),
Side::Client,
Default::default(),
);
let mut rpc_system = RpcSystem::new(Box::new(network), None);
let client: node_service::Client = rpc_system.bootstrap(Side::Server);
tokio::task::spawn_local(rpc_system);
Ok(client)
}
/// Upload a KeyPackage and verify the fingerprint echoed by the AS.
pub async fn upload_key_package(
client: &node_service::Client,
identity_key: &[u8],
package: &[u8],
) -> anyhow::Result<()> {
let mut req = client.upload_key_package_request();
{
let mut p = req.get();
p.set_identity_key(identity_key);
p.set_package(package);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let resp = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = resp
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
let local_fp = sha256(package);
anyhow::ensure!(server_fp == local_fp, "fingerprint mismatch");
Ok(())
}
/// Fetch a KeyPackage for `identity_key` from the AS.
pub async fn fetch_key_package(
client: &node_service::Client,
identity_key: &[u8],
) -> anyhow::Result<Vec<u8>> {
let mut req = client.fetch_key_package_request();
{
let mut p = req.get();
p.set_identity_key(identity_key);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let resp = req
.send()
.promise
.await
.context("fetch_key_package RPC failed")?;
let pkg = resp
.get()
.context("fetch_key_package: bad response")?
.get_package()
.context("fetch_key_package: missing package field")?
.to_vec();
Ok(pkg)
}
/// Enqueue an opaque payload to the DS for `recipient_key`.
pub async fn enqueue(
client: &node_service::Client,
recipient_key: &[u8],
payload: &[u8],
) -> anyhow::Result<()> {
let mut req = client.enqueue_request();
{
let mut p = req.get();
p.set_recipient_key(recipient_key);
p.set_payload(payload);
p.set_channel_id(&[]);
p.set_version(1);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
req.send().promise.await.context("enqueue RPC failed")?;
Ok(())
}
/// Fetch and drain all payloads for `recipient_key`.
pub async fn fetch_all(
client: &node_service::Client,
recipient_key: &[u8],
) -> anyhow::Result<Vec<Vec<u8>>> {
let mut req = client.fetch_request();
{
let mut p = req.get();
p.set_recipient_key(recipient_key);
p.set_channel_id(&[]);
p.set_version(1);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let resp = req.send().promise.await.context("fetch RPC failed")?;
let list = resp
.get()
.context("fetch: bad response")?
.get_payloads()
.context("fetch: missing payloads")?;
let mut payloads = Vec::with_capacity(list.len() as usize);
for i in 0..list.len() {
payloads.push(list.get(i).context("fetch: payload read failed")?.to_vec());
}
Ok(payloads)
}
/// Long-poll for payloads with optional timeout (ms).
pub async fn fetch_wait(
client: &node_service::Client,
recipient_key: &[u8],
timeout_ms: u64,
) -> anyhow::Result<Vec<Vec<u8>>> {
let mut req = client.fetch_wait_request();
{
let mut p = req.get();
p.set_recipient_key(recipient_key);
p.set_timeout_ms(timeout_ms);
p.set_channel_id(&[]);
p.set_version(1);
let mut auth = p.reborrow().init_auth();
set_auth(&mut auth);
}
let resp = req.send().promise.await.context("fetch_wait RPC failed")?;
let list = resp
.get()
.context("fetch_wait: bad response")?
.get_payloads()
.context("fetch_wait: missing payloads")?;
let mut payloads = Vec::with_capacity(list.len() as usize);
for i in 0..list.len() {
payloads.push(
list.get(i)
.context("fetch_wait: payload read failed")?
.to_vec(),
);
}
Ok(payloads)
}
/// Upload a hybrid (X25519 + ML-KEM-768) public key for an identity.
pub async fn upload_hybrid_key(
client: &node_service::Client,
identity_key: &[u8],
hybrid_pk: &HybridPublicKey,
) -> anyhow::Result<()> {
let mut req = client.upload_hybrid_key_request();
{
let mut p = req.get();
p.set_identity_key(identity_key);
p.set_hybrid_public_key(&hybrid_pk.to_bytes());
}
req.send()
.promise
.await
.context("upload_hybrid_key RPC failed")?;
Ok(())
}
/// Fetch a peer's hybrid public key from the server.
///
/// Returns `None` if the peer has not uploaded a hybrid key.
pub async fn fetch_hybrid_key(
client: &node_service::Client,
identity_key: &[u8],
) -> anyhow::Result<Option<HybridPublicKey>> {
let mut req = client.fetch_hybrid_key_request();
req.get().set_identity_key(identity_key);
let resp = req
.send()
.promise
.await
.context("fetch_hybrid_key RPC failed")?;
let pk_bytes = resp
.get()
.context("fetch_hybrid_key: bad response")?
.get_hybrid_public_key()
.context("fetch_hybrid_key: missing field")?
.to_vec();
if pk_bytes.is_empty() {
return Ok(None);
}
let pk = HybridPublicKey::from_bytes(&pk_bytes).context("invalid hybrid public key")?;
Ok(Some(pk))
}
/// Try to decrypt a hybrid envelope. If the payload is not a hybrid envelope or
/// decryption fails, return the original bytes unchanged (legacy plaintext MLS).
fn try_hybrid_unwrap(hybrid_kp: Option<&HybridKeypair>, payload: &[u8]) -> Vec<u8> {
if let Some(kp) = hybrid_kp {
if let Ok(inner) = hybrid_decrypt(kp, payload) {
return inner;
}
}
payload.to_vec()
}
fn sha256(bytes: &[u8]) -> Vec<u8> {
use sha2::{Digest, Sha256};
Sha256::digest(bytes).to_vec()
}
fn set_auth(auth: &mut auth::Builder<'_>) {
let ctx = AUTH_CONTEXT
.get()
.expect("init_auth must be called with a non-empty token before RPCs");
auth.set_version(ctx.version);
auth.set_access_token(&ctx.access_token);
auth.set_device_id(&ctx.device_id);
}
#[derive(Serialize, Deserialize)]
struct StoredState {
identity_seed: [u8; 32],
group: Option<Vec<u8>>,
/// Post-quantum hybrid keypair (X25519 + ML-KEM-768). `None` for legacy state files.
#[serde(default)]
hybrid_key: Option<HybridKeypairBytes>,
}
impl StoredState {
fn into_parts(self, state_path: &Path) -> anyhow::Result<(GroupMember, Option<HybridKeypair>)> {
let identity = Arc::new(IdentityKeypair::from_seed(self.identity_seed));
let group = self
.group
.map(|bytes| bincode::deserialize(&bytes).context("decode group"))
.transpose()?;
let key_store = DiskKeyStore::persistent(keystore_path(state_path))?;
let member = GroupMember::new_with_state(identity, key_store, group);
let hybrid_kp = self
.hybrid_key
.map(|bytes| HybridKeypair::from_bytes(&bytes).context("decode hybrid key"))
.transpose()?;
Ok((member, hybrid_kp))
}
fn from_parts(
member: &GroupMember,
hybrid_kp: Option<&HybridKeypair>,
) -> anyhow::Result<Self> {
let group = member
.group_ref()
.map(|g| bincode::serialize(g).context("serialize group"))
.transpose()?;
Ok(Self {
identity_seed: member.identity_seed(),
group,
hybrid_key: hybrid_kp.map(|kp| kp.to_bytes()),
})
}
}
fn load_or_init_state(path: &Path) -> anyhow::Result<StoredState> {
if path.exists() {
let mut state = load_existing_state(path)?;
// Upgrade legacy state files: generate hybrid keypair if missing.
if state.hybrid_key.is_none() {
state.hybrid_key = Some(HybridKeypair::generate().to_bytes());
write_state(path, &state)?;
}
return Ok(state);
}
let identity = IdentityKeypair::generate();
let hybrid_kp = HybridKeypair::generate();
let key_store = DiskKeyStore::persistent(keystore_path(path))?;
let member = GroupMember::new_with_state(Arc::new(identity), key_store, None);
let state = StoredState::from_parts(&member, Some(&hybrid_kp))?;
write_state(path, &state)?;
Ok(state)
}
fn load_existing_state(path: &Path) -> anyhow::Result<StoredState> {
let bytes = std::fs::read(path).with_context(|| format!("read state file {path:?}"))?;
bincode::deserialize(&bytes).context("decode state")
}
fn save_state(
path: &Path,
member: &GroupMember,
hybrid_kp: Option<&HybridKeypair>,
) -> anyhow::Result<()> {
let state = StoredState::from_parts(member, hybrid_kp)?;
write_state(path, &state)
}
fn write_state(path: &Path, state: &StoredState) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).with_context(|| format!("create dir {parent:?}"))?;
}
let bytes = bincode::serialize(state).context("encode state")?;
std::fs::write(path, bytes).with_context(|| format!("write state {path:?}"))?;
Ok(())
}
fn decode_identity_key(hex_str: &str) -> anyhow::Result<Vec<u8>> {
let bytes = hex::decode(hex_str)
.map_err(|e| anyhow::anyhow!(e))
.context("identity key must be hex")?;
anyhow::ensure!(bytes.len() == 32, "identity key must be 32 bytes");
Ok(bytes)
}
fn keystore_path(state_path: &Path) -> PathBuf {
let mut path = state_path.to_path_buf();
path.set_extension("ks");
path
}
/// Return the current Unix timestamp in milliseconds.
fn current_timestamp_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
// ── Hex encoding helper ─────────────────────────────────────────────────────
//
// We use a tiny inline module rather than adding `hex` as a dependency.
mod hex {
pub fn encode(bytes: impl AsRef<[u8]>) -> String {
bytes.as_ref().iter().map(|b| format!("{b:02x}")).collect()
}
pub fn decode(s: &str) -> Result<Vec<u8>, &'static str> {
if s.len() % 2 != 0 {
return Err("odd-length hex string");
}
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(|_| "invalid hex character"))
.collect()
}
}

View File

@@ -1,38 +1,13 @@
//! quicnprotochat CLI client. //! quicnprotochat CLI client.
//!
//! # Subcommands
//!
//! | Subcommand | Description |
//! |--------------|----------------------------------------------------------|
//! | `ping` | Send a Ping to the server, print RTT |
//! | `register` | Generate a KeyPackage and upload it to the AS |
//! | `fetch-key` | Fetch a peer's KeyPackage from the AS by identity key |
//!
//! # Configuration
//!
//! | Env var | CLI flag | Default |
//! |-----------------|--------------|---------------------|
//! | `QUICNPROTOCHAT_SERVER`| `--server` | `127.0.0.1:4201` |
//! | `RUST_LOG` | — | `warn` |
use std::fs; use std::path::PathBuf;
use std::net::SocketAddr;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::Context;
use capnp_rpc::{rpc_twoparty_capnp::Side, twoparty, RpcSystem};
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use serde::{Deserialize, Serialize};
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
use quinn::{ClientConfig, Endpoint}; use quicnprotochat_client::{
use quinn_proto::crypto::rustls::QuicClientConfig; cmd_create_group, cmd_demo_group, cmd_fetch_key, cmd_invite, cmd_join, cmd_ping, cmd_recv,
use rustls::pki_types::CertificateDer; cmd_register, cmd_register_state, cmd_send, ClientAuth, init_auth,
use rustls::{ClientConfig as RustlsClientConfig, RootCertStore}; };
use quicnprotochat_core::{generate_key_package, DiskKeyStore, GroupMember, IdentityKeypair};
use quicnprotochat_proto::node_capnp::node_service;
// ── CLI ─────────────────────────────────────────────────────────────────────── // ── CLI ───────────────────────────────────────────────────────────────────────
@@ -57,6 +32,14 @@ struct Args {
)] )]
server_name: String, server_name: String,
/// Bearer token for authenticated requests (version 1, required).
#[arg(long, global = true, env = "QUICNPROTOCHAT_ACCESS_TOKEN", required = true)]
access_token: String,
/// Optional device identifier (UUID bytes encoded as hex or raw string).
#[arg(long, global = true, env = "QUICNPROTOCHAT_DEVICE_ID")]
device_id: Option<String>,
#[command(subcommand)] #[command(subcommand)]
command: Command, command: Command,
} }
@@ -66,7 +49,7 @@ enum Command {
/// Send a Ping to the server and print the round-trip time. /// Send a Ping to the server and print the round-trip time.
Ping { Ping {
/// Server address (host:port). /// Server address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
}, },
@@ -76,7 +59,7 @@ enum Command {
/// Ed25519 identity public key bytes (hex), which peers need to fetch it. /// Ed25519 identity public key bytes (hex), which peers need to fetch it.
Register { Register {
/// Server address (host:port). /// Server address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
}, },
@@ -86,7 +69,7 @@ enum Command {
/// hex characters (32 bytes). /// hex characters (32 bytes).
FetchKey { FetchKey {
/// Server address (host:port). /// Server address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
/// Target peer's Ed25519 identity public key (64 hex chars = 32 bytes). /// Target peer's Ed25519 identity public key (64 hex chars = 32 bytes).
@@ -96,7 +79,7 @@ enum Command {
/// Run a full Alice↔Bob MLS round-trip against live AS and DS endpoints. /// Run a full Alice↔Bob MLS round-trip against live AS and DS endpoints.
DemoGroup { DemoGroup {
/// Server address (host:port). /// Server address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
}, },
@@ -111,7 +94,7 @@ enum Command {
state: PathBuf, state: PathBuf,
/// Authentication Service address (host:port). /// Authentication Service address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
}, },
@@ -126,7 +109,7 @@ enum Command {
state: PathBuf, state: PathBuf,
/// Server address (host:port). /// Server address (host:port).
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
/// Group identifier (arbitrary bytes, typically a human-readable name). /// Group identifier (arbitrary bytes, typically a human-readable name).
@@ -142,7 +125,7 @@ enum Command {
env = "QUICNPROTOCHAT_STATE" env = "QUICNPROTOCHAT_STATE"
)] )]
state: PathBuf, state: PathBuf,
#[arg(long, default_value = "127.0.0.1:4201", env = "QUICNPROTOCHAT_SERVER")] #[arg(long, default_value = "127.0.0.1:7000", env = "QUICNPROTOCHAT_SERVER")]
server: String, server: String,
/// Peer identity public key (64 hex chars = 32 bytes). /// Peer identity public key (64 hex chars = 32 bytes).
#[arg(long)] #[arg(long)]
@@ -213,6 +196,10 @@ async fn main() -> anyhow::Result<()> {
let args = Args::parse(); let args = Args::parse();
// Initialize auth context once for all RPCs.
let auth_ctx = ClientAuth::from_parts(args.access_token.clone(), args.device_id.clone());
init_auth(auth_ctx);
match args.command { match args.command {
Command::Ping { server } => cmd_ping(&server, &args.ca_cert, &args.server_name).await, Command::Ping { server } => cmd_ping(&server, &args.ca_cert, &args.server_name).await,
Command::Register { server } => { Command::Register { server } => {
@@ -322,698 +309,3 @@ async fn main() -> anyhow::Result<()> {
} }
} }
} }
// ── Subcommand implementations ────────────────────────────────────────────────
/// Connect to `server`, call health, and print RTT over QUIC/TLS.
async fn cmd_ping(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
let sent_at = current_timestamp_ms();
let client = connect_node(server, ca_cert, server_name).await?;
let req = client.health_request();
let resp = req.send().promise.await.context("health RPC failed")?;
let status = resp
.get()
.context("health: bad response")?
.get_status()
.context("health: missing status")?
.to_str()
.unwrap_or("invalid");
let rtt_ms = current_timestamp_ms().saturating_sub(sent_at);
println!("health={status} rtt={rtt_ms}ms");
Ok(())
}
/// Generate a KeyPackage for a fresh identity and upload it to the AS.
///
/// Must run on a `LocalSet` because capnp-rpc is `!Send`.
async fn cmd_register(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
let identity = IdentityKeypair::generate();
let (tls_bytes, fingerprint) =
generate_key_package(&identity).context("KeyPackage generation failed")?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.upload_key_package_request();
req.get().set_identity_key(&identity.public_key_bytes());
req.get().set_package(&tls_bytes);
let response = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = response
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
// Verify the server echoed the same fingerprint.
anyhow::ensure!(
server_fp == fingerprint,
"fingerprint mismatch: local={} server={}",
hex::encode(&fingerprint),
hex::encode(&server_fp),
);
println!(
"identity_key : {}",
hex::encode(identity.public_key_bytes())
);
println!("fingerprint : {}", hex::encode(&fingerprint));
println!("KeyPackage uploaded successfully.");
Ok(())
}
/// Upload the stored identity's KeyPackage to the AS (persists backend state).
async fn cmd_register_state(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<()> {
let state = load_or_init_state(state_path)?;
let mut member = state.into_member(state_path)?;
let tls_bytes = member
.generate_key_package()
.context("KeyPackage generation failed")?;
let fingerprint = sha256(&tls_bytes);
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.upload_key_package_request();
req.get()
.set_identity_key(&member.identity().public_key_bytes());
req.get().set_package(&tls_bytes);
let response = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = response
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
anyhow::ensure!(server_fp == fingerprint, "fingerprint mismatch");
println!(
"identity_key : {}",
hex::encode(member.identity().public_key_bytes())
);
println!("fingerprint : {}", hex::encode(&fingerprint));
println!("KeyPackage uploaded successfully.");
save_state(state_path, &member)?;
Ok(())
}
/// Fetch a peer's KeyPackage from the AS by their hex-encoded identity key.
///
/// Must run on a `LocalSet` because capnp-rpc is `!Send`.
async fn cmd_fetch_key(
server: &str,
ca_cert: &Path,
server_name: &str,
identity_key_hex: &str,
) -> anyhow::Result<()> {
let identity_key = hex::decode(identity_key_hex)
.map_err(|e| anyhow::anyhow!(e))
.context("identity_key must be 64 hex characters (32 bytes)")?;
anyhow::ensure!(
identity_key.len() == 32,
"identity_key must be exactly 32 bytes, got {}",
identity_key.len()
);
let node_client = connect_node(server, ca_cert, server_name).await?;
let mut req = node_client.fetch_key_package_request();
req.get().set_identity_key(&identity_key);
let response = req
.send()
.promise
.await
.context("fetch_key_package RPC failed")?;
let package = response
.get()
.context("fetch_key_package: bad response")?
.get_package()
.context("fetch_key_package: missing package field")?
.to_vec();
if package.is_empty() {
println!("No KeyPackage available for this identity.");
return Ok(());
}
use sha2::{Digest, Sha256};
let fingerprint = Sha256::digest(&package);
println!("fingerprint : {}", hex::encode(fingerprint));
println!("package_len : {} bytes", package.len());
println!("KeyPackage fetched successfully.");
Ok(())
}
/// Run a complete Alice↔Bob MLS round-trip using the unified server endpoint.
async fn cmd_demo_group(server: &str, ca_cert: &Path, server_name: &str) -> anyhow::Result<()> {
// Identities and MLS state must be tied to the same backend instance.
let alice_id = Arc::new(IdentityKeypair::generate());
let bob_id = Arc::new(IdentityKeypair::generate());
let mut alice = GroupMember::new(Arc::clone(&alice_id));
let mut bob = GroupMember::new(Arc::clone(&bob_id));
let alice_kp = alice
.generate_key_package()
.context("Alice KeyPackage generation failed")?;
let bob_kp = bob
.generate_key_package()
.context("Bob KeyPackage generation failed")?;
// Upload both KeyPackages to the server.
let alice_node = connect_node(server, ca_cert, server_name).await?;
let bob_node = connect_node(server, ca_cert, server_name).await?;
upload_key_package(&alice_node, &alice_id.public_key_bytes(), &alice_kp).await?;
upload_key_package(&bob_node, &bob_id.public_key_bytes(), &bob_kp).await?;
// Alice fetches Bob's KeyPackage and creates the group.
let fetched_bob_kp = fetch_key_package(&alice_node, &bob_id.public_key_bytes()).await?;
anyhow::ensure!(
!fetched_bob_kp.is_empty(),
"AS returned an empty KeyPackage for Bob",
);
alice
.create_group(b"demo-group")
.context("Alice create_group failed")?;
let (_commit, welcome) = alice
.add_member(&fetched_bob_kp)
.context("Alice add_member failed")?;
let alice_ds = alice_node.clone();
let bob_ds = bob_node.clone();
enqueue(&alice_ds, &bob_id.public_key_bytes(), &welcome).await?;
let welcome_payloads = fetch_all(&bob_ds, &bob_id.public_key_bytes()).await?;
let welcome_bytes = welcome_payloads
.first()
.cloned()
.context("Welcome was not delivered to Bob via DS")?;
bob.join_group(&welcome_bytes)
.context("Bob join_group failed")?;
// Alice → Bob
let ct_ab = alice
.send_message(b"hello bob")
.context("Alice send_message failed")?;
enqueue(&alice_ds, &bob_id.public_key_bytes(), &ct_ab).await?;
let bob_msgs = fetch_all(&bob_ds, &bob_id.public_key_bytes()).await?;
let ab_plaintext = bob
.receive_message(
bob_msgs
.first()
.context("Bob: missing Alice ciphertext from DS")?,
)?
.context("Bob expected application message from Alice")?;
println!(
"Alice → Bob plaintext: {}",
String::from_utf8_lossy(&ab_plaintext)
);
// Bob → Alice
let ct_ba = bob
.send_message(b"hello alice")
.context("Bob send_message failed")?;
enqueue(&bob_ds, &alice_id.public_key_bytes(), &ct_ba).await?;
let alice_msgs = fetch_all(&alice_ds, &alice_id.public_key_bytes()).await?;
let ba_plaintext = alice
.receive_message(
alice_msgs
.first()
.context("Alice: missing Bob ciphertext from DS")?,
)?
.context("Alice expected application message from Bob")?;
println!(
"Bob → Alice plaintext: {}",
String::from_utf8_lossy(&ba_plaintext)
);
println!("demo-group complete ✔");
Ok(())
}
/// Create a new group and persist state.
async fn cmd_create_group(state_path: &Path, _server: &str, group_id: &str) -> anyhow::Result<()> {
let state = load_or_init_state(state_path)?;
let mut member = state.into_member(state_path)?;
anyhow::ensure!(
member.group_ref().is_none(),
"group already exists in state"
);
member
.create_group(group_id.as_bytes())
.context("create_group failed")?;
save_state(state_path, &member)?;
println!("group created: {group_id}");
Ok(())
}
/// Invite a peer: fetch their KeyPackage, add to group, enqueue Welcome.
async fn cmd_invite(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
peer_key_hex: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let mut member = state.into_member(state_path)?;
let peer_key = decode_identity_key(peer_key_hex)?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let peer_kp = fetch_key_package(&node_client, &peer_key).await?;
anyhow::ensure!(
!peer_kp.is_empty(),
"server returned empty KeyPackage for peer"
);
let _ = member
.group_ref()
.context("no active group; run create-group first")?;
let (_, welcome) = member.add_member(&peer_kp).context("add_member failed")?;
enqueue(&node_client, &peer_key, &welcome).await?;
save_state(state_path, &member)?;
println!("invited peer (welcome queued)");
Ok(())
}
/// Join a group by consuming a Welcome from the server queue.
async fn cmd_join(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let mut member = state.into_member(state_path)?;
anyhow::ensure!(
member.group_ref().is_none(),
"group already active in state"
);
let node_client = connect_node(server, ca_cert, server_name).await?;
let welcomes = fetch_all(&node_client, &member.identity().public_key_bytes()).await?;
let welcome_bytes = welcomes
.first()
.cloned()
.context("no Welcome found in DS for this identity")?;
member
.join_group(&welcome_bytes)
.context("join_group failed")?;
save_state(state_path, &member)?;
println!("joined group successfully");
Ok(())
}
/// Send an application message via DS.
async fn cmd_send(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
peer_key_hex: &str,
msg: &str,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let mut member = state.into_member(state_path)?;
let peer_key = decode_identity_key(peer_key_hex)?;
let node_client = connect_node(server, ca_cert, server_name).await?;
let ct = member
.send_message(msg.as_bytes())
.context("send_message failed")?;
enqueue(&node_client, &peer_key, &ct).await?;
save_state(state_path, &member)?;
println!("message sent");
Ok(())
}
/// Receive and decrypt all pending messages from the server.
async fn cmd_recv(
state_path: &Path,
server: &str,
ca_cert: &Path,
server_name: &str,
wait_ms: u64,
stream: bool,
) -> anyhow::Result<()> {
let state = load_existing_state(state_path)?;
let mut member = state.into_member(state_path)?;
let client = connect_node(server, ca_cert, server_name).await?;
loop {
let payloads = fetch_wait(&client, &member.identity().public_key_bytes(), wait_ms).await?;
if payloads.is_empty() {
if !stream {
println!("no messages");
return Ok(());
}
continue;
}
for (idx, payload) in payloads.iter().enumerate() {
match member.receive_message(payload) {
Ok(Some(pt)) => println!("[{idx}] plaintext: {}", String::from_utf8_lossy(&pt)),
Ok(None) => println!("[{idx}] commit applied"),
Err(e) => println!("[{idx}] error: {e}"),
}
}
save_state(state_path, &member)?;
if !stream {
return Ok(());
}
}
}
// ── Shared helpers ────────────────────────────────────────────────────────────
/// Establish a QUIC/TLS connection and return a `NodeService` client.
///
/// Must be called from within a `LocalSet` because capnp-rpc is `!Send`.
async fn connect_node(
server: &str,
ca_cert: &Path,
server_name: &str,
) -> anyhow::Result<node_service::Client> {
let addr: SocketAddr = server
.parse()
.with_context(|| format!("server must be host:port, got {server}"))?;
let cert_bytes = fs::read(ca_cert).with_context(|| format!("read ca_cert {ca_cert:?}"))?;
let mut roots = RootCertStore::empty();
roots
.add(CertificateDer::from(cert_bytes))
.context("add root cert")?;
let tls = RustlsClientConfig::builder()
.with_root_certificates(roots)
.with_no_client_auth();
let crypto = QuicClientConfig::try_from(tls)
.map_err(|e| anyhow::anyhow!("invalid client TLS config: {e}"))?;
let mut endpoint = Endpoint::client("0.0.0.0:0".parse().unwrap())?;
endpoint.set_default_client_config(ClientConfig::new(Arc::new(crypto)));
let connection = endpoint
.connect(addr, server_name)
.context("quic connect init")?
.await
.context("quic connect failed")?;
let (send, recv) = connection.open_bi().await.context("open bi stream")?;
let network = twoparty::VatNetwork::new(
recv.compat(),
send.compat_write(),
Side::Client,
Default::default(),
);
let mut rpc_system = RpcSystem::new(Box::new(network), None);
let client: node_service::Client = rpc_system.bootstrap(Side::Server);
tokio::task::spawn_local(rpc_system);
Ok(client)
}
/// Upload a KeyPackage and verify the fingerprint echoed by the AS.
async fn upload_key_package(
client: &node_service::Client,
identity_key: &[u8],
package: &[u8],
) -> anyhow::Result<()> {
let mut req = client.upload_key_package_request();
req.get().set_identity_key(identity_key);
req.get().set_package(package);
let resp = req
.send()
.promise
.await
.context("upload_key_package RPC failed")?;
let server_fp = resp
.get()
.context("upload_key_package: bad response")?
.get_fingerprint()
.context("upload_key_package: missing fingerprint")?
.to_vec();
let local_fp = sha256(package);
anyhow::ensure!(server_fp == local_fp, "fingerprint mismatch");
Ok(())
}
/// Fetch a KeyPackage for `identity_key` from the AS.
async fn fetch_key_package(
client: &node_service::Client,
identity_key: &[u8],
) -> anyhow::Result<Vec<u8>> {
let mut req = client.fetch_key_package_request();
req.get().set_identity_key(identity_key);
let resp = req
.send()
.promise
.await
.context("fetch_key_package RPC failed")?;
let pkg = resp
.get()
.context("fetch_key_package: bad response")?
.get_package()
.context("fetch_key_package: missing package field")?
.to_vec();
Ok(pkg)
}
/// Enqueue an opaque payload to the DS for `recipient_key`.
async fn enqueue(
client: &node_service::Client,
recipient_key: &[u8],
payload: &[u8],
) -> anyhow::Result<()> {
let mut req = client.enqueue_request();
req.get().set_recipient_key(recipient_key);
req.get().set_payload(payload);
req.send().promise.await.context("enqueue RPC failed")?;
Ok(())
}
/// Fetch and drain all payloads for `recipient_key`.
async fn fetch_all(
client: &node_service::Client,
recipient_key: &[u8],
) -> anyhow::Result<Vec<Vec<u8>>> {
let mut req = client.fetch_request();
req.get().set_recipient_key(recipient_key);
let resp = req.send().promise.await.context("fetch RPC failed")?;
let list = resp
.get()
.context("fetch: bad response")?
.get_payloads()
.context("fetch: missing payloads")?;
let mut payloads = Vec::with_capacity(list.len() as usize);
for i in 0..list.len() {
payloads.push(list.get(i).context("fetch: payload read failed")?.to_vec());
}
Ok(payloads)
}
/// Long-poll for payloads with optional timeout (ms).
async fn fetch_wait(
client: &node_service::Client,
recipient_key: &[u8],
timeout_ms: u64,
) -> anyhow::Result<Vec<Vec<u8>>> {
let mut req = client.fetch_wait_request();
req.get().set_recipient_key(recipient_key);
req.get().set_timeout_ms(timeout_ms);
let resp = req.send().promise.await.context("fetch_wait RPC failed")?;
let list = resp
.get()
.context("fetch_wait: bad response")?
.get_payloads()
.context("fetch_wait: missing payloads")?;
let mut payloads = Vec::with_capacity(list.len() as usize);
for i in 0..list.len() {
payloads.push(
list.get(i)
.context("fetch_wait: payload read failed")?
.to_vec(),
);
}
Ok(payloads)
}
fn sha256(bytes: &[u8]) -> Vec<u8> {
use sha2::{Digest, Sha256};
Sha256::digest(bytes).to_vec()
}
#[derive(Serialize, Deserialize)]
struct StoredState {
identity_seed: [u8; 32],
group: Option<Vec<u8>>,
}
impl StoredState {
fn into_member(self, state_path: &Path) -> anyhow::Result<GroupMember> {
let identity = Arc::new(IdentityKeypair::from_seed(self.identity_seed));
let group = self
.group
.map(|bytes| bincode::deserialize(&bytes).context("decode group"))
.transpose()?;
let key_store = DiskKeyStore::persistent(keystore_path(state_path))?;
Ok(GroupMember::new_with_state(identity, key_store, group))
}
fn from_member(member: &GroupMember) -> anyhow::Result<Self> {
let group = member
.group_ref()
.map(|g| bincode::serialize(g).context("serialize group"))
.transpose()?;
Ok(Self {
identity_seed: member.identity_seed(),
group,
})
}
}
fn load_or_init_state(path: &Path) -> anyhow::Result<StoredState> {
if path.exists() {
return load_existing_state(path);
}
let identity = IdentityKeypair::generate();
let key_store = DiskKeyStore::persistent(keystore_path(path))?;
let member = GroupMember::new_with_state(Arc::new(identity), key_store, None);
let state = StoredState::from_member(&member)?;
write_state(path, &state)?;
Ok(state)
}
fn load_existing_state(path: &Path) -> anyhow::Result<StoredState> {
let bytes = std::fs::read(path).with_context(|| format!("read state file {path:?}"))?;
bincode::deserialize(&bytes).context("decode state")
}
fn save_state(path: &Path, member: &GroupMember) -> anyhow::Result<()> {
let state = StoredState::from_member(member)?;
write_state(path, &state)
}
fn write_state(path: &Path, state: &StoredState) -> anyhow::Result<()> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).with_context(|| format!("create dir {parent:?}"))?;
}
let bytes = bincode::serialize(state).context("encode state")?;
std::fs::write(path, bytes).with_context(|| format!("write state {path:?}"))?;
Ok(())
}
fn decode_identity_key(hex_str: &str) -> anyhow::Result<Vec<u8>> {
let bytes = hex::decode(hex_str)
.map_err(|e| anyhow::anyhow!(e))
.context("identity key must be hex")?;
anyhow::ensure!(bytes.len() == 32, "identity key must be 32 bytes");
Ok(bytes)
}
fn keystore_path(state_path: &Path) -> PathBuf {
let mut path = state_path.to_path_buf();
path.set_extension("ks");
path
}
/// Format the first `n` bytes as lowercase hex with a trailing `…`.
fn fmt_hex(bytes: &[u8]) -> String {
let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
format!("{hex}")
}
/// Return the current Unix timestamp in milliseconds.
fn current_timestamp_ms() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
// ── Hex encoding helper ───────────────────────────────────────────────────────
//
// We use a tiny inline module rather than adding `hex` as a dependency.
mod hex {
pub fn encode(bytes: impl AsRef<[u8]>) -> String {
bytes.as_ref().iter().map(|b| format!("{b:02x}")).collect()
}
pub fn decode(s: &str) -> Result<Vec<u8>, &'static str> {
if s.len() % 2 != 0 {
return Err("odd-length hex string");
}
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(|_| "invalid hex character"))
.collect()
}
}

View File

@@ -1,201 +0,0 @@
//! M1 integration test: Noise_XX handshake + Ping/Pong round-trip.
//!
//! Both the server-side and client-side logic run in the same Tokio runtime
//! using `tokio::spawn`. The test verifies:
//!
//! 1. The Noise_XX handshake completes from both sides.
//! 2. A Ping sent by the client arrives as a Ping on the server side.
//! 3. The server's Pong arrives correctly on the client side.
//! 4. Mutual authentication: each peer's observed remote static key matches the
//! other peer's actual public key (the core security property of XX).
use std::sync::Arc;
use tokio::net::TcpListener;
use quicnprotochat_core::{handshake_initiator, handshake_responder, NoiseKeypair};
use quicnprotochat_proto::{MsgType, ParsedEnvelope};
/// Completes a full Noise_XX handshake and Ping/Pong exchange, then verifies
/// mutual authentication by comparing observed vs. actual static public keys.
#[tokio::test]
async fn noise_xx_ping_pong_round_trip() {
let server_keypair = Arc::new(NoiseKeypair::generate());
let client_keypair = NoiseKeypair::generate();
// Bind the listener *before* spawning so the port is ready when the client
// calls connect — no sleep or retry needed.
let listener = TcpListener::bind("127.0.0.1:0")
.await
.expect("failed to bind test listener");
let server_addr = listener.local_addr().expect("failed to get local addr");
// ── Server task ───────────────────────────────────────────────────────────
//
// Handles exactly one connection: completes the handshake, asserts that it
// receives a Ping, sends a Pong, then returns the client's observed key.
let server_kp = Arc::clone(&server_keypair);
let server_task = tokio::spawn(async move {
let (stream, _peer) = listener.accept().await.expect("server accept failed");
let mut transport = handshake_responder(stream, &server_kp)
.await
.expect("server Noise_XX handshake failed");
let env = transport
.recv_envelope()
.await
.expect("server recv_envelope failed");
match env.msg_type {
MsgType::Ping => {}
_ => panic!("server expected Ping, received a different message type"),
}
transport
.send_envelope(&ParsedEnvelope {
msg_type: MsgType::Pong,
group_id: vec![],
sender_id: vec![],
payload: vec![],
timestamp_ms: 0,
})
.await
.expect("server send_envelope failed");
// Return the client's public key as authenticated by the server.
transport
.remote_static_public_key()
.expect("server: no remote static key after completed XX handshake")
.to_vec()
});
// ── Client side ───────────────────────────────────────────────────────────
let stream = tokio::net::TcpStream::connect(server_addr)
.await
.expect("client connect failed");
let mut transport = handshake_initiator(stream, &client_keypair)
.await
.expect("client Noise_XX handshake failed");
// Capture the server's public key as authenticated by the client.
let server_key_seen_by_client = transport
.remote_static_public_key()
.expect("client: no remote static key after completed XX handshake")
.to_vec();
transport
.send_envelope(&ParsedEnvelope {
msg_type: MsgType::Ping,
group_id: vec![],
sender_id: vec![],
payload: vec![],
timestamp_ms: 1_700_000_000_000,
})
.await
.expect("client send_envelope failed");
let pong = tokio::time::timeout(std::time::Duration::from_secs(5), transport.recv_envelope())
.await
.expect("timed out waiting for Pong — server task likely panicked")
.expect("client recv_envelope failed");
match pong.msg_type {
MsgType::Pong => {}
_ => panic!("client expected Pong, received a different message type"),
}
// ── Mutual authentication assertions ──────────────────────────────────────
let client_key_seen_by_server = server_task
.await
.expect("server task panicked — see output above");
// The server authenticated the client's static public key correctly.
assert_eq!(
client_key_seen_by_server,
client_keypair.public_bytes().to_vec(),
"server's authenticated view of client key does not match client's actual public key"
);
// The client authenticated the server's static public key correctly.
assert_eq!(
server_key_seen_by_client,
server_keypair.public_bytes().to_vec(),
"client's authenticated view of server key does not match server's actual public key"
);
}
/// A second independent connection on the same server must also succeed,
/// confirming that the server keypair reuse across connections is correct.
#[tokio::test]
async fn two_sequential_connections_both_authenticate() {
let server_keypair = Arc::new(NoiseKeypair::generate());
let listener = TcpListener::bind("127.0.0.1:0").await.expect("bind failed");
let server_addr = listener.local_addr().expect("local_addr failed");
let server_kp = Arc::clone(&server_keypair);
tokio::spawn(async move {
for _ in 0..2_u8 {
let (stream, _) = listener.accept().await.expect("accept failed");
let kp = Arc::clone(&server_kp);
tokio::spawn(async move {
let mut t = handshake_responder(stream, &kp)
.await
.expect("server handshake failed");
let env = t.recv_envelope().await.expect("recv failed");
match env.msg_type {
MsgType::Ping => {}
_ => panic!("expected Ping"),
}
t.send_envelope(&ParsedEnvelope {
msg_type: MsgType::Pong,
group_id: vec![],
sender_id: vec![],
payload: vec![],
timestamp_ms: 0,
})
.await
.expect("server send failed");
});
}
});
for _ in 0..2_u8 {
let kp = NoiseKeypair::generate();
let stream = tokio::net::TcpStream::connect(server_addr)
.await
.expect("connect failed");
let mut t = handshake_initiator(stream, &kp)
.await
.expect("client handshake failed");
t.send_envelope(&ParsedEnvelope {
msg_type: MsgType::Ping,
group_id: vec![],
sender_id: vec![],
payload: vec![],
timestamp_ms: 0,
})
.await
.expect("client send failed");
let pong = tokio::time::timeout(std::time::Duration::from_secs(5), t.recv_envelope())
.await
.expect("timeout")
.expect("recv failed");
match pong.msg_type {
MsgType::Pong => {}
_ => panic!("expected Pong"),
}
// Each client sees the *same* server public key (key reuse across connections).
let seen = t
.remote_static_public_key()
.expect("no remote key")
.to_vec();
assert_eq!(seen, server_keypair.public_bytes().to_vec());
}
}

View File

@@ -2,20 +2,23 @@
name = "quicnprotochat-core" name = "quicnprotochat-core"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
description = "Crypto primitives, TLS/QUIC transport, MLS state machine, and Cap'n Proto frame codec for quicnprotochat." description = "Crypto primitives, MLS state machine, and hybrid post-quantum KEM for quicnprotochat."
license = "MIT" license = "MIT"
[dependencies] [dependencies]
# Crypto — classical # Crypto — classical
x25519-dalek = { workspace = true } x25519-dalek = { workspace = true }
ed25519-dalek = { workspace = true } ed25519-dalek = { workspace = true }
snow = { workspace = true }
sha2 = { workspace = true } sha2 = { workspace = true }
hkdf = { workspace = true } hkdf = { workspace = true }
chacha20poly1305 = { workspace = true }
zeroize = { workspace = true } zeroize = { workspace = true }
rand = { workspace = true } rand = { workspace = true }
# Crypto — MLS (M2); ml-kem added in M5 # Crypto — post-quantum hybrid KEM (M7)
ml-kem = { workspace = true }
# Crypto — MLS (M2)
openmls = { workspace = true } openmls = { workspace = true }
openmls_rust_crypto = { workspace = true } openmls_rust_crypto = { workspace = true }
openmls_traits = { workspace = true } openmls_traits = { workspace = true }
@@ -28,11 +31,8 @@ serde_json = { workspace = true }
capnp = { workspace = true } capnp = { workspace = true }
quicnprotochat-proto = { path = "../quicnprotochat-proto" } quicnprotochat-proto = { path = "../quicnprotochat-proto" }
# Async runtime + codec # Async runtime
tokio = { workspace = true } tokio = { workspace = true }
tokio-util = { workspace = true }
futures = { workspace = true }
bytes = { version = "1" }
# Error handling # Error handling
thiserror = { workspace = true } thiserror = { workspace = true }

View File

@@ -1,203 +0,0 @@
//! Length-prefixed byte frame codec for Tokio's `Framed` adapter.
//!
//! # Wire format
//!
//! ```text
//! ┌──────────────────────────┬──────────────────────────────────────┐
//! │ length (4 bytes, LE u32)│ payload (length bytes) │
//! └──────────────────────────┴──────────────────────────────────────┘
//! ```
//!
//! Little-endian was chosen over big-endian for consistency with Cap'n Proto's
//! own segment table encoding. Both sides of the connection use the same codec.
//!
//! # Usage
//!
//! This codec is transport-agnostic: during the Noise handshake it frames raw
//! Noise handshake messages; after the handshake it frames Noise-encrypted
//! application data. In both cases the payload is opaque bytes from the
//! codec's perspective.
//!
//! # Frame size limit
//!
//! The Noise protocol specifies a maximum message size of 65 535 bytes.
//! Frames larger than [`NOISE_MAX_MSG`] are rejected as protocol violations.
use bytes::{Buf, BufMut, Bytes, BytesMut};
use tokio_util::codec::{Decoder, Encoder};
use crate::error::CodecError;
/// Maximum Noise protocol message size in bytes (per RFC / Noise spec §3).
pub const NOISE_MAX_MSG: usize = 65_535;
/// A stateless codec that prepends / reads a 4-byte little-endian length field.
///
/// Implements both [`Encoder<Bytes>`] and [`Decoder`] so it can be used with
/// `tokio_util::codec::Framed`.
#[derive(Debug, Clone, Copy, Default)]
pub struct LengthPrefixedCodec;
impl LengthPrefixedCodec {
pub fn new() -> Self {
Self
}
}
impl Encoder<Bytes> for LengthPrefixedCodec {
type Error = CodecError;
/// Prepend a 4-byte LE length field and append the payload to `dst`.
///
/// # Errors
///
/// Returns [`CodecError::FrameTooLarge`] if `item.len() > NOISE_MAX_MSG`.
/// Returns [`CodecError::Io`] if the underlying write fails (propagated
/// by `tokio-util` from the TCP stream).
fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> Result<(), Self::Error> {
let len = item.len();
if len > NOISE_MAX_MSG {
return Err(CodecError::FrameTooLarge {
len,
max: NOISE_MAX_MSG,
});
}
// Reserve exactly the space needed: 4 bytes header + payload.
dst.reserve(4 + len);
dst.put_u32_le(len as u32);
dst.extend_from_slice(&item);
Ok(())
}
}
impl Decoder for LengthPrefixedCodec {
type Item = BytesMut;
type Error = CodecError;
/// Read a length-prefixed frame from `src`.
///
/// Returns `Ok(None)` when more bytes are needed (standard Decoder contract).
/// Returns `Ok(Some(frame))` when a complete frame is available.
///
/// # Errors
///
/// Returns [`CodecError::FrameTooLarge`] if the length field exceeds
/// [`NOISE_MAX_MSG`]. This is treated as an unrecoverable protocol
/// violation — callers should close the connection.
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
// Need at least the 4-byte length header.
if src.len() < 4 {
src.reserve(4_usize.saturating_sub(src.len()));
return Ok(None);
}
// Peek at the length without advancing — avoid mutating state on None.
let frame_len = u32::from_le_bytes([src[0], src[1], src[2], src[3]]) as usize;
if frame_len > NOISE_MAX_MSG {
return Err(CodecError::FrameTooLarge {
len: frame_len,
max: NOISE_MAX_MSG,
});
}
let total = 4 + frame_len;
if src.len() < total {
// Tell Tokio how many additional bytes we need to avoid O(n) polling.
src.reserve(total - src.len());
return Ok(None);
}
// Consume the 4-byte length header, then split the payload.
src.advance(4);
Ok(Some(src.split_to(frame_len)))
}
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
fn encode_then_decode(payload: &[u8]) -> BytesMut {
let mut codec = LengthPrefixedCodec::new();
let mut buf = BytesMut::new();
codec
.encode(Bytes::copy_from_slice(payload), &mut buf)
.expect("encode failed");
let decoded = codec.decode(&mut buf).expect("decode error");
decoded.expect("expected a complete frame")
}
#[test]
fn round_trip_empty_payload() {
let result = encode_then_decode(&[]);
assert!(result.is_empty());
}
#[test]
fn round_trip_small_payload() {
let payload = b"hello quicnprotochat";
let result = encode_then_decode(payload);
assert_eq!(&result[..], payload);
}
#[test]
fn round_trip_max_size_payload() {
let payload = vec![0xAB_u8; NOISE_MAX_MSG];
let result = encode_then_decode(&payload);
assert_eq!(&result[..], &payload[..]);
}
#[test]
fn oversized_encode_returns_error() {
let mut codec = LengthPrefixedCodec::new();
let mut buf = BytesMut::new();
let oversized = Bytes::from(vec![0u8; NOISE_MAX_MSG + 1]);
let err = codec.encode(oversized, &mut buf).unwrap_err();
assert!(matches!(err, CodecError::FrameTooLarge { .. }));
}
#[test]
fn oversized_length_field_decode_returns_error() {
let mut codec = LengthPrefixedCodec::new();
let mut buf = BytesMut::new();
// Encode a fake length field that exceeds NOISE_MAX_MSG.
buf.put_u32_le((NOISE_MAX_MSG + 1) as u32);
let err = codec.decode(&mut buf).unwrap_err();
assert!(matches!(err, CodecError::FrameTooLarge { .. }));
}
#[test]
fn partial_payload_returns_none() {
let mut codec = LengthPrefixedCodec::new();
let mut buf = BytesMut::new();
// Length header says 10 bytes but we only provide 5.
buf.put_u32_le(10);
buf.extend_from_slice(&[0u8; 5]);
let result = codec.decode(&mut buf).expect("decode error");
assert!(result.is_none());
}
#[test]
fn partial_header_returns_none() {
let mut codec = LengthPrefixedCodec::new();
// Only 2 bytes of the 4-byte header are available.
let mut buf = BytesMut::from(&[0x00_u8, 0x01][..]);
let result = codec.decode(&mut buf).expect("decode error");
assert!(result.is_none());
}
#[test]
fn length_field_is_little_endian() {
let payload = b"le-check";
let mut codec = LengthPrefixedCodec::new();
let mut buf = BytesMut::new();
codec
.encode(Bytes::from_static(payload), &mut buf)
.expect("encode failed");
// First 4 bytes are the LE length: 8 in LE is [0x08, 0x00, 0x00, 0x00].
assert_eq!(&buf[..4], &[8, 0, 0, 0]);
}
}

View File

@@ -1,77 +1,21 @@
//! Error types for `quicnprotochat-core`. //! Error types for `quicnprotochat-core`.
//!
//! Two separate error types are used to preserve type-level separation of concerns:
//!
//! - [`CodecError`] — errors from the length-prefixed frame codec (I/O and framing only).
//! `tokio-util` requires the codec error implement `From<io::Error>`.
//!
//! - [`CoreError`] — errors from the Noise handshake and transport layer.
use thiserror::Error; use thiserror::Error;
/// Maximum plaintext bytes per Noise transport frame. /// Errors produced by core cryptographic and MLS operations.
///
/// Noise limits each message to 65 535 bytes. ChaCha20-Poly1305 consumes
/// 16 bytes for the authentication tag, leaving 65 519 bytes for plaintext.
pub const MAX_PLAINTEXT_LEN: usize = 65_519;
// ── Codec errors ──────────────────────────────────────────────────────────────
/// Errors produced by [`LengthPrefixedCodec`](crate::LengthPrefixedCodec).
#[derive(Debug, Error)]
pub enum CodecError {
/// The underlying TCP stream returned an I/O error.
///
/// This variant satisfies the `tokio-util` requirement that codec error
/// types implement `From<std::io::Error>`.
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
/// A frame length field exceeded the Noise protocol maximum (65 535 bytes).
///
/// This is treated as a protocol violation and the connection should be
/// closed rather than retried.
#[error("frame length {len} exceeds maximum {max} bytes")]
FrameTooLarge { len: usize, max: usize },
}
// ── Core errors ───────────────────────────────────────────────────────────────
/// Errors produced by the Noise handshake and [`NoiseTransport`](crate::NoiseTransport).
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum CoreError { pub enum CoreError {
/// The `snow` Noise protocol engine returned an error.
///
/// This covers DH failures, decryption failures, state machine violations,
/// and pattern parse errors.
#[error("Noise protocol error: {0}")]
Noise(#[from] snow::Error),
/// The frame codec reported an I/O or framing error.
#[error("frame codec error: {0}")]
Codec(#[from] CodecError),
/// Cap'n Proto serialisation or deserialisation failed. /// Cap'n Proto serialisation or deserialisation failed.
#[error("Cap'n Proto error: {0}")] #[error("Cap'n Proto error: {0}")]
Capnp(#[from] capnp::Error), Capnp(#[from] capnp::Error),
/// The remote peer closed the connection before the handshake completed.
#[error("peer closed connection during Noise handshake")]
HandshakeIncomplete,
/// The remote peer closed the connection during normal operation.
#[error("peer closed connection")]
ConnectionClosed,
/// The caller attempted to send a plaintext larger than the Noise maximum.
///
/// The limit is [`MAX_PLAINTEXT_LEN`] bytes per frame.
#[error("plaintext {size} B exceeds Noise frame limit of {MAX_PLAINTEXT_LEN} B")]
MessageTooLarge { size: usize },
/// An MLS operation failed. /// An MLS operation failed.
/// ///
/// The inner string is the debug representation of the openmls error. /// The inner string is the debug representation of the openmls error.
#[error("MLS error: {0}")] #[error("MLS error: {0}")]
Mls(String), Mls(String),
/// A hybrid KEM (X25519 + ML-KEM-768) operation failed.
#[error("hybrid KEM error: {0}")]
HybridKem(#[from] crate::hybrid_kem::HybridKemError),
} }

View File

@@ -0,0 +1,452 @@
//! Post-quantum hybrid KEM: X25519 + ML-KEM-768.
//!
//! Wraps MLS payloads in an outer encryption layer using a hybrid key
//! encapsulation mechanism. The X25519 component provides classical
//! ECDH security; the ML-KEM-768 component (FIPS 203) provides
//! post-quantum security.
//!
//! # Wire format
//!
//! ```text
//! version(1) | x25519_eph_pk(32) | mlkem_ct(1088) | aead_nonce(12) | aead_ct(var)
//! ```
//!
//! # Key derivation
//!
//! ```text
//! ikm = X25519_shared(32) || ML-KEM_shared(32)
//! key = HKDF-SHA256(salt=[], ikm, info="quicnprotochat-hybrid-v1", L=32)
//! ```
use chacha20poly1305::{
aead::{Aead, KeyInit},
ChaCha20Poly1305, Key, Nonce,
};
use hkdf::Hkdf;
use ml_kem::{
array::Array,
kem::{Decapsulate, Encapsulate},
EncodedSizeUser, KemCore, MlKem768, MlKem768Params,
};
use rand::rngs::OsRng;
use serde::{Deserialize, Serialize};
use sha2::Sha256;
use x25519_dalek::{EphemeralSecret, PublicKey as X25519Public, StaticSecret};
use zeroize::Zeroizing;
// Re-import the concrete key types from the kem sub-module.
use ml_kem::kem::{DecapsulationKey, EncapsulationKey};
/// Current hybrid envelope version byte.
const HYBRID_VERSION: u8 = 0x01;
/// HKDF info string for domain separation.
const HKDF_INFO: &[u8] = b"quicnprotochat-hybrid-v1";
/// ML-KEM-768 ciphertext size in bytes.
const MLKEM_CT_LEN: usize = 1088;
/// ML-KEM-768 encapsulation key size in bytes.
pub const MLKEM_EK_LEN: usize = 1184;
/// ML-KEM-768 decapsulation key size in bytes.
pub const MLKEM_DK_LEN: usize = 2400;
/// Envelope header: version(1) + x25519 eph pk(32) + mlkem ct(1088) + nonce(12).
const HEADER_LEN: usize = 1 + 32 + MLKEM_CT_LEN + 12;
// ── Error type ──────────────────────────────────────────────────────────────
#[derive(Debug, thiserror::Error)]
pub enum HybridKemError {
#[error("AEAD encryption failed")]
EncryptionFailed,
#[error("AEAD decryption failed (wrong recipient or tampered)")]
DecryptionFailed,
#[error("unsupported hybrid envelope version: {0}")]
UnsupportedVersion(u8),
#[error("envelope too short ({0} bytes, minimum {HEADER_LEN})")]
TooShort(usize),
#[error("invalid ML-KEM encapsulation key")]
InvalidMlKemKey,
#[error("ML-KEM decapsulation failed")]
MlKemDecapsFailed,
}
// ── Keypair types ───────────────────────────────────────────────────────────
/// A hybrid keypair combining X25519 (classical) + ML-KEM-768 (post-quantum).
///
/// Each peer holds one of these. The public portion is distributed so
/// senders can encrypt payloads with post-quantum protection.
pub struct HybridKeypair {
x25519_sk: StaticSecret,
x25519_pk: X25519Public,
mlkem_dk: DecapsulationKey<MlKem768Params>,
mlkem_ek: EncapsulationKey<MlKem768Params>,
}
/// Serialisable form of a [`HybridKeypair`] for persistence.
#[derive(Serialize, Deserialize)]
pub struct HybridKeypairBytes {
pub x25519_sk: [u8; 32],
pub mlkem_dk: Vec<u8>,
pub mlkem_ek: Vec<u8>,
}
/// The public portion of a hybrid keypair, sent to peers.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct HybridPublicKey {
pub x25519_pk: [u8; 32],
pub mlkem_ek: Vec<u8>,
}
impl HybridKeypair {
/// Generate a fresh hybrid keypair from OS CSPRNG.
pub fn generate() -> Self {
let x25519_sk = StaticSecret::random_from_rng(OsRng);
let x25519_pk = X25519Public::from(&x25519_sk);
let (mlkem_dk, mlkem_ek) = MlKem768::generate(&mut OsRng);
Self {
x25519_sk,
x25519_pk,
mlkem_dk,
mlkem_ek,
}
}
/// Reconstruct from serialised bytes.
pub fn from_bytes(bytes: &HybridKeypairBytes) -> Result<Self, HybridKemError> {
let x25519_sk = StaticSecret::from(bytes.x25519_sk);
let x25519_pk = X25519Public::from(&x25519_sk);
let mlkem_dk_arr = Array::try_from(bytes.mlkem_dk.as_slice())
.map_err(|_| HybridKemError::InvalidMlKemKey)?;
let mlkem_dk = DecapsulationKey::<MlKem768Params>::from_bytes(&mlkem_dk_arr);
let mlkem_ek_arr = Array::try_from(bytes.mlkem_ek.as_slice())
.map_err(|_| HybridKemError::InvalidMlKemKey)?;
let mlkem_ek = EncapsulationKey::<MlKem768Params>::from_bytes(&mlkem_ek_arr);
Ok(Self {
x25519_sk,
x25519_pk,
mlkem_dk,
mlkem_ek,
})
}
/// Serialise the keypair for persistence.
pub fn to_bytes(&self) -> HybridKeypairBytes {
HybridKeypairBytes {
x25519_sk: self.x25519_sk.to_bytes(),
mlkem_dk: self.mlkem_dk.as_bytes().to_vec(),
mlkem_ek: self.mlkem_ek.as_bytes().to_vec(),
}
}
/// Extract the public portion for distribution to peers.
pub fn public_key(&self) -> HybridPublicKey {
HybridPublicKey {
x25519_pk: self.x25519_pk.to_bytes(),
mlkem_ek: self.mlkem_ek.as_bytes().to_vec(),
}
}
}
impl HybridPublicKey {
/// Serialise to a single byte blob: x25519_pk(32) || mlkem_ek(1184).
pub fn to_bytes(&self) -> Vec<u8> {
let mut out = Vec::with_capacity(32 + self.mlkem_ek.len());
out.extend_from_slice(&self.x25519_pk);
out.extend_from_slice(&self.mlkem_ek);
out
}
/// Deserialise from a single byte blob.
pub fn from_bytes(bytes: &[u8]) -> Result<Self, HybridKemError> {
if bytes.len() < 32 + MLKEM_EK_LEN {
return Err(HybridKemError::TooShort(bytes.len()));
}
let mut x25519_pk = [0u8; 32];
x25519_pk.copy_from_slice(&bytes[..32]);
let mlkem_ek = bytes[32..32 + MLKEM_EK_LEN].to_vec();
Ok(Self {
x25519_pk,
mlkem_ek,
})
}
}
// ── Encrypt / Decrypt ───────────────────────────────────────────────────────
/// Encrypt `plaintext` to `recipient_pk` using X25519 + ML-KEM-768 hybrid KEM.
///
/// Returns the complete hybrid envelope as a byte vector.
pub fn hybrid_encrypt(
recipient_pk: &HybridPublicKey,
plaintext: &[u8],
) -> Result<Vec<u8>, HybridKemError> {
// 1. Ephemeral X25519 DH
let eph_secret = EphemeralSecret::random_from_rng(OsRng);
let eph_public = X25519Public::from(&eph_secret);
let x25519_recipient = X25519Public::from(recipient_pk.x25519_pk);
let x25519_ss = eph_secret.diffie_hellman(&x25519_recipient);
// 2. ML-KEM-768 encapsulation
let mlkem_ek_arr = Array::try_from(recipient_pk.mlkem_ek.as_slice())
.map_err(|_| HybridKemError::InvalidMlKemKey)?;
let mlkem_ek = EncapsulationKey::<MlKem768Params>::from_bytes(&mlkem_ek_arr);
let (mlkem_ct, mlkem_ss) = mlkem_ek
.encapsulate(&mut OsRng)
.map_err(|_| HybridKemError::EncryptionFailed)?;
// 3. Combine shared secrets via HKDF
let (aead_key, aead_nonce) =
derive_aead_material(x25519_ss.as_bytes(), mlkem_ss.as_slice());
// 4. AEAD encrypt
let cipher = ChaCha20Poly1305::new(&aead_key);
let ct = cipher
.encrypt(&aead_nonce, plaintext)
.map_err(|_| HybridKemError::EncryptionFailed)?;
// 5. Assemble envelope: version || x25519_eph_pk || mlkem_ct || nonce || aead_ct
let mut out = Vec::with_capacity(HEADER_LEN + ct.len());
out.push(HYBRID_VERSION);
out.extend_from_slice(&eph_public.to_bytes());
out.extend_from_slice(mlkem_ct.as_slice());
out.extend_from_slice(aead_nonce.as_slice());
out.extend_from_slice(&ct);
Ok(out)
}
/// Decrypt a hybrid envelope using the recipient's private key.
pub fn hybrid_decrypt(
keypair: &HybridKeypair,
envelope: &[u8],
) -> Result<Vec<u8>, HybridKemError> {
if envelope.len() < HEADER_LEN + 16 {
// 16 = minimum AEAD tag
return Err(HybridKemError::TooShort(envelope.len()));
}
let version = envelope[0];
if version != HYBRID_VERSION {
return Err(HybridKemError::UnsupportedVersion(version));
}
let mut cursor = 1;
// X25519 ephemeral public key
let mut eph_pk_bytes = [0u8; 32];
eph_pk_bytes.copy_from_slice(&envelope[cursor..cursor + 32]);
cursor += 32;
// ML-KEM ciphertext
let mlkem_ct_bytes = &envelope[cursor..cursor + MLKEM_CT_LEN];
cursor += MLKEM_CT_LEN;
// AEAD nonce
let nonce = Nonce::from_slice(&envelope[cursor..cursor + 12]);
cursor += 12;
// AEAD ciphertext
let aead_ct = &envelope[cursor..];
// 1. X25519 DH with ephemeral public key
let eph_pk = X25519Public::from(eph_pk_bytes);
let x25519_ss = keypair.x25519_sk.diffie_hellman(&eph_pk);
// 2. ML-KEM decapsulation — convert bytes to the ciphertext array type
// that `DecapsulationKey::decapsulate` expects.
let mlkem_ct_arr = Array::try_from(mlkem_ct_bytes)
.map_err(|_| HybridKemError::MlKemDecapsFailed)?;
let mlkem_ss = keypair
.mlkem_dk
.decapsulate(&mlkem_ct_arr)
.map_err(|_| HybridKemError::MlKemDecapsFailed)?;
// 3. Derive AEAD key
let (aead_key, _) = derive_aead_material(x25519_ss.as_bytes(), mlkem_ss.as_slice());
// 4. Decrypt
let cipher = ChaCha20Poly1305::new(&aead_key);
let plaintext = cipher
.decrypt(nonce, aead_ct)
.map_err(|_| HybridKemError::DecryptionFailed)?;
Ok(plaintext)
}
/// Derive AEAD key + nonce from the combined X25519 + ML-KEM shared secrets.
fn derive_aead_material(
x25519_ss: &[u8],
mlkem_ss: &[u8],
) -> (Key, Nonce) {
let mut ikm = Zeroizing::new(vec![0u8; x25519_ss.len() + mlkem_ss.len()]);
ikm[..x25519_ss.len()].copy_from_slice(x25519_ss);
ikm[x25519_ss.len()..].copy_from_slice(mlkem_ss);
let hk = Hkdf::<Sha256>::new(None, &ikm);
let mut key_bytes = Zeroizing::new([0u8; 32]);
hk.expand(HKDF_INFO, &mut *key_bytes)
.expect("32 bytes is valid HKDF-SHA256 output length");
let mut nonce_bytes = [0u8; 12];
hk.expand(b"quicnprotochat-hybrid-nonce-v1", &mut nonce_bytes)
.expect("12 bytes is valid HKDF-SHA256 output length");
(*Key::from_slice(&*key_bytes), *Nonce::from_slice(&nonce_bytes))
}
// ── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn keygen_produces_valid_public_key() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
assert_eq!(pk.x25519_pk.len(), 32);
assert_eq!(pk.mlkem_ek.len(), MLKEM_EK_LEN);
}
#[test]
fn encrypt_decrypt_round_trip() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let plaintext = b"hello post-quantum world!";
let envelope = hybrid_encrypt(&pk, plaintext).unwrap();
let recovered = hybrid_decrypt(&kp, &envelope).unwrap();
assert_eq!(recovered, plaintext);
}
#[test]
fn wrong_key_decryption_fails() {
let kp_sender_target = HybridKeypair::generate();
let kp_wrong = HybridKeypair::generate();
let pk = kp_sender_target.public_key();
let envelope = hybrid_encrypt(&pk, b"secret").unwrap();
let result = hybrid_decrypt(&kp_wrong, &envelope);
assert!(result.is_err());
}
#[test]
fn tampered_aead_ciphertext_fails() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let mut envelope = hybrid_encrypt(&pk, b"payload").unwrap();
let last = envelope.len() - 1;
envelope[last] ^= 0x01;
assert!(matches!(
hybrid_decrypt(&kp, &envelope),
Err(HybridKemError::DecryptionFailed)
));
}
#[test]
fn tampered_mlkem_ct_fails() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let mut envelope = hybrid_encrypt(&pk, b"payload").unwrap();
// Flip a byte in the ML-KEM ciphertext region (starts at offset 33)
envelope[40] ^= 0xFF;
assert!(hybrid_decrypt(&kp, &envelope).is_err());
}
#[test]
fn tampered_x25519_eph_pk_fails() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let mut envelope = hybrid_encrypt(&pk, b"payload").unwrap();
// Flip a byte in the X25519 ephemeral pk region (offset 1..33)
envelope[5] ^= 0xFF;
assert!(hybrid_decrypt(&kp, &envelope).is_err());
}
#[test]
fn unsupported_version_rejected() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let mut envelope = hybrid_encrypt(&pk, b"payload").unwrap();
envelope[0] = 0xFF;
assert!(matches!(
hybrid_decrypt(&kp, &envelope),
Err(HybridKemError::UnsupportedVersion(0xFF))
));
}
#[test]
fn envelope_too_short_rejected() {
let kp = HybridKeypair::generate();
assert!(matches!(
hybrid_decrypt(&kp, &[0x01; 10]),
Err(HybridKemError::TooShort(10))
));
}
#[test]
fn keypair_serialisation_round_trip() {
let kp = HybridKeypair::generate();
let bytes = kp.to_bytes();
let restored = HybridKeypair::from_bytes(&bytes).unwrap();
assert_eq!(kp.x25519_pk.to_bytes(), restored.x25519_pk.to_bytes());
assert_eq!(
kp.public_key().mlkem_ek,
restored.public_key().mlkem_ek
);
// Verify restored keypair can decrypt
let pk = kp.public_key();
let ct = hybrid_encrypt(&pk, b"test").unwrap();
let pt = hybrid_decrypt(&restored, &ct).unwrap();
assert_eq!(pt, b"test");
}
#[test]
fn public_key_serialisation_round_trip() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let bytes = pk.to_bytes();
let restored = HybridPublicKey::from_bytes(&bytes).unwrap();
assert_eq!(pk.x25519_pk, restored.x25519_pk);
assert_eq!(pk.mlkem_ek, restored.mlkem_ek);
}
#[test]
fn large_payload_round_trip() {
let kp = HybridKeypair::generate();
let pk = kp.public_key();
let plaintext = vec![0xAB; 50_000]; // 50 KB
let envelope = hybrid_encrypt(&pk, &plaintext).unwrap();
let recovered = hybrid_decrypt(&kp, &envelope).unwrap();
assert_eq!(recovered, plaintext);
}
}

View File

@@ -1,11 +1,8 @@
//! Ed25519 identity keypair for MLS credentials and AS registration. //! Ed25519 identity keypair for MLS credentials and AS registration.
//! //!
//! # Relationship to the Noise keypair //! The [`IdentityKeypair`] is the long-term identity key embedded in MLS
//! //! `BasicCredential`s. It is used for signing MLS messages and as the
//! The X25519 [`NoiseKeypair`](crate::NoiseKeypair) is the transport-layer //! indexing key for the Authentication Service.
//! static key used in the Noise_XX handshake. The Ed25519 [`IdentityKeypair`]
//! is the long-term identity key embedded in MLS `BasicCredential`s. The two
//! keys serve different roles and must not be confused.
//! //!
//! # Zeroize //! # Zeroize
//! //!

View File

@@ -1,121 +0,0 @@
//! Static X25519 keypair for the Noise_XX handshake.
//!
//! # Security properties
//!
//! - The private key is stored as [`x25519_dalek::StaticSecret`], which
//! implements [`ZeroizeOnDrop`](zeroize::ZeroizeOnDrop) — the key material
//! is overwritten with zeros when the `StaticSecret` is dropped.
//!
//! - [`NoiseKeypair::private_bytes`] returns a [`Zeroizing`](zeroize::Zeroizing)
//! wrapper so the caller's copy of the raw bytes is also cleared on drop.
//! Pass it directly to `snow::Builder::local_private_key` and let it fall
//! out of scope immediately after.
//!
//! - The public key is not secret and may be freely cloned or logged.
//!
//! # Persistence
//!
//! `NoiseKeypair` does not implement `Serialize` intentionally. Key persistence
//! to disk is handled at the application layer (M6) with appropriate file
//! permission checks and, optionally, passphrase-based encryption.
use rand::rngs::OsRng;
use x25519_dalek::{PublicKey, StaticSecret};
use zeroize::Zeroizing;
/// A static X25519 keypair used for Noise_XX mutual authentication.
///
/// Generate once per node identity and reuse across connections.
/// The private scalar is zeroized when this value is dropped.
pub struct NoiseKeypair {
/// Private scalar — zeroized on drop via `x25519_dalek`'s `ZeroizeOnDrop` impl.
private: StaticSecret,
/// Corresponding public key — derived from `private` at construction time.
public: PublicKey,
}
impl NoiseKeypair {
/// Generate a fresh keypair from the OS CSPRNG.
///
/// This calls `getrandom` on Linux (via `OsRng`) and is suitable for
/// generating long-lived static identity keys.
pub fn generate() -> Self {
let private = StaticSecret::random_from_rng(OsRng);
let public = PublicKey::from(&private);
Self { private, public }
}
/// Return the raw private key bytes in a [`Zeroizing`] wrapper.
///
/// The returned wrapper clears the 32-byte copy when dropped.
/// Use it immediately to initialise a `snow::Builder` and let it drop:
///
/// ```rust,ignore
/// let private = keypair.private_bytes();
/// let session = snow::Builder::new(params)
/// .local_private_key(&private[..])
/// .build_initiator()?;
/// // `private` is zeroized here.
/// ```
pub fn private_bytes(&self) -> Zeroizing<[u8; 32]> {
Zeroizing::new(self.private.to_bytes())
}
/// Return the public key bytes.
///
/// Safe to log or transmit — this is not secret material.
pub fn public_bytes(&self) -> [u8; 32] {
self.public.to_bytes()
}
}
// Prevent accidental `{:?}` printing of the private key.
impl std::fmt::Debug for NoiseKeypair {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Show only the first 4 bytes of the public key as a sanity identifier.
// No external crate needed; the private key is never printed.
let pub_bytes = self.public_bytes();
write!(
f,
"NoiseKeypair {{ public: {:02x}{:02x}{:02x}{:02x}…, private: [redacted] }}",
pub_bytes[0], pub_bytes[1], pub_bytes[2], pub_bytes[3],
)
}
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn generated_public_key_matches_private() {
let kp = NoiseKeypair::generate();
// Re-derive the public key from the private bytes and confirm they match.
let private_bytes = kp.private_bytes();
let secret = StaticSecret::from(*private_bytes);
let rederived = PublicKey::from(&secret);
assert_eq!(rederived.to_bytes(), kp.public_bytes());
}
#[test]
fn two_keypairs_differ() {
let a = NoiseKeypair::generate();
let b = NoiseKeypair::generate();
assert_ne!(a.public_bytes(), b.public_bytes());
}
#[test]
fn private_bytes_is_zeroizing() {
// Verify that Zeroizing<[u8;32]> does not expose the key via Debug.
let kp = NoiseKeypair::generate();
let private = kp.private_bytes();
// We cannot observe zeroization after drop in a test without unsafe,
// but we can confirm the wrapper type is returned and is non-zero.
assert!(
private.iter().any(|&b| b != 0),
"freshly generated private key should not be all zeros"
);
}
}

View File

@@ -1,34 +1,32 @@
//! Core cryptographic primitives, Noise_XX transport, MLS group state machine, //! Core cryptographic primitives, MLS group state machine, and hybrid
//! and frame codec for quicnprotochat. //! post-quantum KEM for quicnprotochat.
//! //!
//! # Module layout //! # Module layout
//! //!
//! | Module | Responsibility | //! | Module | Responsibility |
//! |--------------|------------------------------------------------------------------| //! |--------------|------------------------------------------------------------------|
//! | `error` | [`CoreError`] and [`CodecError`] types | //! | `error` | [`CoreError`] type |
//! | `keypair` | [`NoiseKeypair`] — static X25519 key, zeroize-on-drop |
//! | `codec` | [`LengthPrefixedCodec`] — Tokio Encoder + Decoder |
//! | `noise` | [`handshake_initiator`], [`handshake_responder`], [`NoiseTransport`] |
//! | `identity` | [`IdentityKeypair`] — Ed25519 identity key for MLS credentials | //! | `identity` | [`IdentityKeypair`] — Ed25519 identity key for MLS credentials |
//! | `keypackage` | [`generate_key_package`] — standalone KeyPackage generation | //! | `keypackage` | [`generate_key_package`] — standalone KeyPackage generation |
//! | `group` | [`GroupMember`] — MLS group lifecycle (create/join/send/recv) | //! | `group` | [`GroupMember`] — MLS group lifecycle (create/join/send/recv) |
//! | `hybrid_kem` | Hybrid X25519 + ML-KEM-768 key encapsulation |
//! | `keystore` | [`DiskKeyStore`] — OpenMLS key store with optional persistence |
mod codec;
mod error; mod error;
mod group; mod group;
pub mod hybrid_kem;
mod identity; mod identity;
mod keypackage; mod keypackage;
mod keypair;
mod keystore; mod keystore;
mod noise;
// ── Public API ──────────────────────────────────────────────────────────────── // ── Public API ────────────────────────────────────────────────────────────────
pub use codec::{LengthPrefixedCodec, NOISE_MAX_MSG}; pub use error::CoreError;
pub use error::{CodecError, CoreError, MAX_PLAINTEXT_LEN};
pub use group::GroupMember; pub use group::GroupMember;
pub use hybrid_kem::{
hybrid_decrypt, hybrid_encrypt, HybridKeypair, HybridKeypairBytes, HybridKemError,
HybridPublicKey,
};
pub use identity::IdentityKeypair; pub use identity::IdentityKeypair;
pub use keypackage::generate_key_package; pub use keypackage::generate_key_package;
pub use keypair::NoiseKeypair;
pub use keystore::DiskKeyStore; pub use keystore::DiskKeyStore;
pub use noise::{handshake_initiator, handshake_responder, NoiseTransport};

View File

@@ -1,400 +0,0 @@
//! Noise_XX handshake and encrypted transport.
//!
//! # Protocol
//!
//! Pattern: `Noise_XX_25519_ChaChaPoly_BLAKE2s`
//!
//! ```text
//! XX handshake (3 messages):
//! -> e (initiator sends ephemeral public key)
//! <- e, ee, s, es (responder replies; mutual DH + responder static)
//! -> s, se (initiator sends static key; final DH)
//! ```
//!
//! After the handshake both peers have authenticated each other's static X25519
//! keys and negotiated a symmetric session with ChaCha20-Poly1305.
//!
//! # Framing
//!
//! All messages — handshake and application — are carried in length-prefixed
//! frames (see [`LengthPrefixedCodec`](crate::LengthPrefixedCodec)).
//!
//! In the handshake phase the frame payload is the raw Noise handshake bytes
//! produced by `snow`. In the transport phase the frame payload is a
//! Noise-encrypted Cap'n Proto message.
//!
//! # Post-quantum gap (ADR-006)
//!
//! The Noise transport uses classical X25519. PQ-Noise is not yet standardised
//! in `snow`. MLS application data is PQ-protected from M5 onward. The residual
//! risk (metadata exposure via handshake harvest) is accepted for M1M5.
use bytes::Bytes;
use futures::{SinkExt, StreamExt};
use tokio::{
io::{duplex, AsyncReadExt, AsyncWriteExt, DuplexStream, ReadHalf, WriteHalf},
net::TcpStream,
};
use tokio_util::codec::Framed;
use crate::{
codec::{LengthPrefixedCodec, NOISE_MAX_MSG},
error::{CoreError, MAX_PLAINTEXT_LEN},
keypair::NoiseKeypair,
};
use quicnprotochat_proto::{build_envelope, parse_envelope, ParsedEnvelope};
/// Noise parameters used throughout quicnprotochat.
///
/// `Noise_XX_25519_ChaChaPoly_BLAKE2s` — both parties authenticate each
/// other's static X25519 keys; ChaCha20-Poly1305 for AEAD; BLAKE2s as PRF.
const NOISE_PARAMS: &str = "Noise_XX_25519_ChaChaPoly_BLAKE2s";
/// ChaCha20-Poly1305 authentication tag overhead per Noise message.
const NOISE_TAG_LEN: usize = 16;
// ── Public type ───────────────────────────────────────────────────────────────
/// An authenticated, encrypted Noise transport session.
///
/// Obtained by completing a [`handshake_initiator`] or [`handshake_responder`]
/// call. All subsequent I/O is through [`send_frame`](Self::send_frame) and
/// [`recv_frame`](Self::recv_frame), or the higher-level envelope helpers.
///
/// # Thread safety
///
/// `NoiseTransport` is `Send` but not `Clone` or `Sync`. Use one instance per
/// Tokio task; use message passing to share data across tasks.
pub struct NoiseTransport {
/// The TCP stream wrapped in the length-prefix codec.
framed: Framed<TcpStream, LengthPrefixedCodec>,
/// The Noise session in transport mode — encrypts and decrypts frames.
session: snow::TransportState,
/// Remote peer's static X25519 public key, captured from the HandshakeState
/// before `into_transport_mode()` consumes it.
///
/// Stored here explicitly rather than via `TransportState::get_remote_static()`
/// because snow does not guarantee the method survives the mode transition.
remote_static: Option<Vec<u8>>,
}
impl NoiseTransport {
// ── Transport-layer I/O ───────────────────────────────────────────────────
/// Encrypt `plaintext` and send it as a single length-prefixed frame.
///
/// # Errors
///
/// - [`CoreError::MessageTooLarge`] if `plaintext` exceeds
/// [`MAX_PLAINTEXT_LEN`] bytes.
/// - [`CoreError::Noise`] if the Noise session fails to encrypt.
/// - [`CoreError::Codec`] if the underlying TCP write fails.
pub async fn send_frame(&mut self, plaintext: &[u8]) -> Result<(), CoreError> {
if plaintext.len() > MAX_PLAINTEXT_LEN {
return Err(CoreError::MessageTooLarge {
size: plaintext.len(),
});
}
// Allocate exactly the right amount: plaintext + AEAD tag.
let mut ciphertext = vec![0u8; plaintext.len() + NOISE_TAG_LEN];
let len = self
.session
.write_message(plaintext, &mut ciphertext)
.map_err(CoreError::Noise)?;
self.framed
.send(Bytes::copy_from_slice(&ciphertext[..len]))
.await
.map_err(CoreError::Codec)?;
Ok(())
}
/// Receive the next length-prefixed frame and decrypt it.
///
/// Awaits until a complete frame arrives on the TCP stream.
///
/// # Errors
///
/// - [`CoreError::ConnectionClosed`] if the peer closed the connection.
/// - [`CoreError::Noise`] if decryption or authentication fails.
/// - [`CoreError::Codec`] if the underlying TCP read or framing fails.
pub async fn recv_frame(&mut self) -> Result<Vec<u8>, CoreError> {
let ciphertext = self
.framed
.next()
.await
.ok_or(CoreError::ConnectionClosed)?
.map_err(CoreError::Codec)?;
// Plaintext is always shorter than ciphertext (AEAD tag is stripped).
let mut plaintext = vec![0u8; ciphertext.len()];
let len = self
.session
.read_message(&ciphertext, &mut plaintext)
.map_err(CoreError::Noise)?;
plaintext.truncate(len);
Ok(plaintext)
}
// ── Envelope-level I/O ────────────────────────────────────────────────────
/// Serialise and encrypt a [`ParsedEnvelope`], then send it.
///
/// This is the primary application-level send method. The Cap'n Proto
/// encoding is done by [`quicnprotochat_proto::build_envelope`] before encryption.
pub async fn send_envelope(&mut self, env: &ParsedEnvelope) -> Result<(), CoreError> {
let bytes = build_envelope(env).map_err(CoreError::Capnp)?;
self.send_frame(&bytes).await
}
/// Receive a frame, decrypt it, and deserialise it as a [`ParsedEnvelope`].
///
/// This is the primary application-level receive method.
pub async fn recv_envelope(&mut self) -> Result<ParsedEnvelope, CoreError> {
let bytes = self.recv_frame().await?;
parse_envelope(&bytes).map_err(CoreError::Capnp)
}
// ── capnp-rpc bridge ─────────────────────────────────────────────────────
/// Consume the transport and return a byte-stream pair suitable for
/// `capnp-rpc`'s `twoparty::VatNetwork`.
///
/// # Why this exists
///
/// `capnp-rpc` expects `AsyncRead + AsyncWrite` byte streams, but
/// `NoiseTransport` is message-based (each call to `send_frame` /
/// `recv_frame` encrypts/decrypts one Noise message). This method bridges
/// the two models by:
///
/// 1. Creating a `tokio::io::duplex` pipe (an in-process byte channel).
/// 2. Spawning a background task that shuttles bytes between the pipe and
/// the Noise framed transport using `tokio::select!`.
///
/// The returned `(ReadHalf, WriteHalf)` are the **application** ends of the
/// pipe; `capnp-rpc` reads from `ReadHalf` and writes to `WriteHalf`. The
/// bridge task owns the **transport** end and the `NoiseTransport`.
///
/// # Framing
///
/// Each Noise frame carries at most [`MAX_PLAINTEXT_LEN`] bytes of
/// plaintext. The bridge uses that as the read buffer size so that one
/// frame is never split across multiple pipe writes.
///
/// # Lifetime
///
/// The bridge task runs until either side of the pipe closes. When the
/// capnp-rpc system drops the pipe halves, the bridge exits cleanly.
pub fn into_capnp_io(mut self) -> (ReadHalf<DuplexStream>, WriteHalf<DuplexStream>) {
// Choose a pipe capacity large enough for one max-size Noise frame.
let (app_stream, mut transport_stream) = duplex(MAX_PLAINTEXT_LEN);
tokio::spawn(async move {
let mut buf = vec![0u8; MAX_PLAINTEXT_LEN];
loop {
tokio::select! {
// Noise → app: receive an encrypted frame and write decrypted
// plaintext into the pipe.
noise_result = self.recv_frame() => {
match noise_result {
Ok(plaintext) => {
if transport_stream.write_all(&plaintext).await.is_err() {
break; // app side closed
}
}
Err(_) => break, // peer closed or Noise error
}
}
// app → Noise: read bytes from the pipe and send as an
// encrypted Noise frame.
read_result = transport_stream.read(&mut buf) => {
match read_result {
Ok(0) | Err(_) => break, // app side closed
Ok(n) => {
if self.send_frame(&buf[..n]).await.is_err() {
break; // peer closed or Noise error
}
}
}
}
}
}
});
tokio::io::split(app_stream)
}
// ── Session metadata ──────────────────────────────────────────────────────
/// Return the remote peer's static X25519 public key (32 bytes), as
/// authenticated during the Noise_XX handshake.
///
/// Returns `None` only in the impossible case where the XX handshake
/// completed without exchanging static keys (a snow implementation bug).
/// In practice this is always `Some` after a successful handshake.
pub fn remote_static_public_key(&self) -> Option<&[u8]> {
self.remote_static.as_deref()
}
}
impl std::fmt::Debug for NoiseTransport {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let remote = self
.remote_static
.as_deref()
.map(|k| format!("{:02x}{:02x}{:02x}{:02x}", k[0], k[1], k[2], k[3]));
f.debug_struct("NoiseTransport")
.field("remote_static", &remote)
.finish_non_exhaustive()
}
}
// ── Handshake functions ───────────────────────────────────────────────────────
/// Complete a Noise_XX handshake as the **initiator** over `stream`.
///
/// The initiator sends the first handshake message. After the three-message
/// exchange completes, the function returns an authenticated [`NoiseTransport`]
/// ready for application data.
///
/// # Errors
///
/// - [`CoreError::HandshakeIncomplete`] if the peer closes the connection mid-handshake.
/// - [`CoreError::Noise`] if any Noise operation fails (pattern mismatch, bad DH, etc.).
/// - [`CoreError::Codec`] if any TCP I/O fails during the handshake.
pub async fn handshake_initiator(
stream: TcpStream,
keypair: &NoiseKeypair,
) -> Result<NoiseTransport, CoreError> {
let params: snow::params::NoiseParams = NOISE_PARAMS
.parse()
.expect("NOISE_PARAMS is a compile-time constant and must parse successfully");
// The private key bytes are held in a Zeroizing wrapper and cleared after
// snow clones them internally during build_initiator().
let private = keypair.private_bytes();
let mut session = snow::Builder::new(params)
.local_private_key(&private[..])
.build_initiator()
.map_err(CoreError::Noise)?;
drop(private); // zeroize our copy; snow holds its own internal copy
let mut framed = Framed::new(stream, LengthPrefixedCodec::new());
let mut buf = vec![0u8; NOISE_MAX_MSG];
// ── Message 1: -> e ──────────────────────────────────────────────────────
let len = session
.write_message(&[], &mut buf)
.map_err(CoreError::Noise)?;
framed
.send(Bytes::copy_from_slice(&buf[..len]))
.await
.map_err(CoreError::Codec)?;
// ── Message 2: <- e, ee, s, es ───────────────────────────────────────────
let msg2 = recv_handshake_frame(&mut framed).await?;
session
.read_message(&msg2, &mut buf)
.map_err(CoreError::Noise)?;
// ── Message 3: -> s, se ──────────────────────────────────────────────────
let len = session
.write_message(&[], &mut buf)
.map_err(CoreError::Noise)?;
framed
.send(Bytes::copy_from_slice(&buf[..len]))
.await
.map_err(CoreError::Codec)?;
// Zeroize the scratch buffer — it contained plaintext key material during
// the handshake (ephemeral key bytes in message 2 payload).
zeroize::Zeroize::zeroize(&mut buf);
// Capture the remote static key from HandshakeState before consuming it.
let remote_static = session.get_remote_static().map(|k| k.to_vec());
let transport_session = session.into_transport_mode().map_err(CoreError::Noise)?;
Ok(NoiseTransport {
framed,
session: transport_session,
remote_static,
})
}
/// Complete a Noise_XX handshake as the **responder** over `stream`.
///
/// The responder waits for the initiator's first message. After the
/// three-message exchange completes, the function returns an authenticated
/// [`NoiseTransport`] ready for application data.
///
/// # Errors
///
/// Same as [`handshake_initiator`].
pub async fn handshake_responder(
stream: TcpStream,
keypair: &NoiseKeypair,
) -> Result<NoiseTransport, CoreError> {
let params: snow::params::NoiseParams = NOISE_PARAMS
.parse()
.expect("NOISE_PARAMS is a compile-time constant and must parse successfully");
let private = keypair.private_bytes();
let mut session = snow::Builder::new(params)
.local_private_key(&private[..])
.build_responder()
.map_err(CoreError::Noise)?;
drop(private);
let mut framed = Framed::new(stream, LengthPrefixedCodec::new());
let mut buf = vec![0u8; NOISE_MAX_MSG];
// ── Message 1: <- e ──────────────────────────────────────────────────────
let msg1 = recv_handshake_frame(&mut framed).await?;
session
.read_message(&msg1, &mut buf)
.map_err(CoreError::Noise)?;
// ── Message 2: -> e, ee, s, es ───────────────────────────────────────────
let len = session
.write_message(&[], &mut buf)
.map_err(CoreError::Noise)?;
framed
.send(Bytes::copy_from_slice(&buf[..len]))
.await
.map_err(CoreError::Codec)?;
// ── Message 3: <- s, se ──────────────────────────────────────────────────
let msg3 = recv_handshake_frame(&mut framed).await?;
session
.read_message(&msg3, &mut buf)
.map_err(CoreError::Noise)?;
zeroize::Zeroize::zeroize(&mut buf);
// Capture the remote static key from HandshakeState before consuming it.
let remote_static = session.get_remote_static().map(|k| k.to_vec());
let transport_session = session.into_transport_mode().map_err(CoreError::Noise)?;
Ok(NoiseTransport {
framed,
session: transport_session,
remote_static,
})
}
// ── Private helpers ───────────────────────────────────────────────────────────
/// Read one handshake frame from `framed`, mapping stream closure to
/// [`CoreError::HandshakeIncomplete`].
async fn recv_handshake_frame(
framed: &mut Framed<TcpStream, LengthPrefixedCodec>,
) -> Result<bytes::BytesMut, CoreError> {
framed
.next()
.await
.ok_or(CoreError::HandshakeIncomplete)?
.map_err(CoreError::Codec)
}

View File

@@ -32,6 +32,9 @@ quinn-proto = { workspace = true }
rustls = { workspace = true } rustls = { workspace = true }
rcgen = { workspace = true } rcgen = { workspace = true }
# Database
rusqlite = { workspace = true }
# Error handling # Error handling
anyhow = { workspace = true } anyhow = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
@@ -40,3 +43,4 @@ serde = { workspace = true }
# CLI # CLI
clap = { workspace = true } clap = { workspace = true }
toml = { version = "0.8" }

View File

@@ -25,14 +25,15 @@
//! | `QUICNPROTOCHAT_LISTEN` | `--listen` | `0.0.0.0:4201` | //! | `QUICNPROTOCHAT_LISTEN` | `--listen` | `0.0.0.0:4201` |
//! | `RUST_LOG` | — | `info` | //! | `RUST_LOG` | — | `info` |
use std::{fs, net::SocketAddr, path::PathBuf, sync::Arc, time::Duration}; use std::{fs, net::SocketAddr, path::{Path, PathBuf}, sync::Arc, time::Duration};
use anyhow::Context; use anyhow::Context;
use serde::Deserialize;
use capnp::capability::Promise; use capnp::capability::Promise;
use capnp_rpc::{rpc_twoparty_capnp::Side, twoparty, RpcSystem}; use capnp_rpc::{rpc_twoparty_capnp::Side, twoparty, RpcSystem};
use clap::Parser; use clap::Parser;
use dashmap::DashMap; use dashmap::DashMap;
use quicnprotochat_proto::node_capnp::node_service; use quicnprotochat_proto::node_capnp::{auth, node_service};
use quinn::{Endpoint, ServerConfig}; use quinn::{Endpoint, ServerConfig};
use quinn_proto::crypto::rustls::QuicServerConfig; use quinn_proto::crypto::rustls::QuicServerConfig;
use rcgen::generate_simple_self_signed; use rcgen::generate_simple_self_signed;
@@ -43,12 +44,139 @@ use tokio::sync::Notify;
use tokio::time::timeout; use tokio::time::timeout;
use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt}; use tokio_util::compat::{TokioAsyncReadCompatExt, TokioAsyncWriteCompatExt};
mod sql_store;
mod storage; mod storage;
use storage::{FileBackedStore, StorageError}; use sql_store::SqlStore;
use storage::{FileBackedStore, Store, StorageError};
const MAX_PAYLOAD_BYTES: usize = 5 * 1024 * 1024; // 5 MB cap per message const MAX_PAYLOAD_BYTES: usize = 5 * 1024 * 1024; // 5 MB cap per message
const MAX_KEYPACKAGE_BYTES: usize = 1 * 1024 * 1024; // 1 MB cap per KeyPackage const MAX_KEYPACKAGE_BYTES: usize = 1 * 1024 * 1024; // 1 MB cap per KeyPackage
const CURRENT_WIRE_VERSION: u16 = 1; // allow 0 (legacy) and 1 (current) const CURRENT_WIRE_VERSION: u16 = 1; // legacy disabled; current wire version only
const DEFAULT_LISTEN: &str = "0.0.0.0:7000";
const DEFAULT_DATA_DIR: &str = "data";
const DEFAULT_TLS_CERT: &str = "data/server-cert.der";
const DEFAULT_TLS_KEY: &str = "data/server-key.der";
const DEFAULT_STORE_BACKEND: &str = "file";
const DEFAULT_DB_PATH: &str = "data/quicnprotochat.db";
#[derive(Clone, Debug)]
struct AuthConfig {
required_token: Option<Vec<u8>>,
}
impl AuthConfig {
fn new(required_token: Option<String>) -> Self {
let required_token = required_token.filter(|s| !s.is_empty()).map(|s| s.into_bytes());
Self { required_token }
}
}
#[derive(Debug, Default, Deserialize)]
struct FileConfig {
listen: Option<String>,
data_dir: Option<String>,
tls_cert: Option<PathBuf>,
tls_key: Option<PathBuf>,
auth_token: Option<String>,
store_backend: Option<String>,
db_path: Option<PathBuf>,
db_key: Option<String>,
}
#[derive(Debug)]
struct EffectiveConfig {
listen: String,
data_dir: String,
tls_cert: PathBuf,
tls_key: PathBuf,
auth_token: Option<String>,
store_backend: String,
db_path: PathBuf,
db_key: String,
}
fn load_config(path: Option<&Path>) -> anyhow::Result<FileConfig> {
let path = match path {
Some(p) => PathBuf::from(p),
None => PathBuf::from("quicnprotochat-server.toml"),
};
if !path.exists() {
return Ok(FileConfig::default());
}
let contents = fs::read_to_string(&path)
.with_context(|| format!("read config file {path:?}"))?;
let cfg: FileConfig = toml::from_str(&contents)
.with_context(|| format!("parse config file {path:?}"))?;
Ok(cfg)
}
fn merge_config(args: &Args, file: &FileConfig) -> EffectiveConfig {
let listen = if args.listen == DEFAULT_LISTEN {
file.listen.clone().unwrap_or_else(|| DEFAULT_LISTEN.to_string())
} else {
args.listen.clone()
};
let data_dir = if args.data_dir == DEFAULT_DATA_DIR {
file.data_dir.clone().unwrap_or_else(|| DEFAULT_DATA_DIR.to_string())
} else {
args.data_dir.clone()
};
let tls_cert = if args.tls_cert == PathBuf::from(DEFAULT_TLS_CERT) {
file.tls_cert.clone().unwrap_or_else(|| PathBuf::from(DEFAULT_TLS_CERT))
} else {
args.tls_cert.clone()
};
let tls_key = if args.tls_key == PathBuf::from(DEFAULT_TLS_KEY) {
file.tls_key.clone().unwrap_or_else(|| PathBuf::from(DEFAULT_TLS_KEY))
} else {
args.tls_key.clone()
};
let auth_token = if args.auth_token.is_some() {
args.auth_token.clone()
} else {
file.auth_token.clone()
};
let store_backend = if args.store_backend == DEFAULT_STORE_BACKEND {
file.store_backend
.clone()
.unwrap_or_else(|| DEFAULT_STORE_BACKEND.to_string())
} else {
args.store_backend.clone()
};
let db_path = if args.db_path == PathBuf::from(DEFAULT_DB_PATH) {
file.db_path
.clone()
.unwrap_or_else(|| PathBuf::from(DEFAULT_DB_PATH))
} else {
args.db_path.clone()
};
let db_key = if args.db_key.is_empty() {
file.db_key.clone().unwrap_or_else(|| args.db_key.clone())
} else {
args.db_key.clone()
};
EffectiveConfig {
listen,
data_dir,
tls_cert,
tls_key,
auth_token,
store_backend,
db_path,
db_key,
}
}
// ── CLI ─────────────────────────────────────────────────────────────────────── // ── CLI ───────────────────────────────────────────────────────────────────────
@@ -59,37 +187,50 @@ const CURRENT_WIRE_VERSION: u16 = 1; // allow 0 (legacy) and 1 (current)
version version
)] )]
struct Args { struct Args {
/// Optional path to a TOML config file (fields map to CLI flags).
#[arg(long, env = "QUICNPROTOCHAT_CONFIG")]
config: Option<PathBuf>,
/// QUIC listen address (host:port). /// QUIC listen address (host:port).
#[arg(long, default_value = "0.0.0.0:4201", env = "QUICNPROTOCHAT_LISTEN")] #[arg(long, default_value = DEFAULT_LISTEN, env = "QUICNPROTOCHAT_LISTEN")]
listen: String, listen: String,
/// Directory for persisted server data (KeyPackages + delivery queues). /// Directory for persisted server data (KeyPackages + delivery queues).
#[arg(long, default_value = "data", env = "QUICNPROTOCHAT_DATA_DIR")] #[arg(long, default_value = DEFAULT_DATA_DIR, env = "QUICNPROTOCHAT_DATA_DIR")]
data_dir: String, data_dir: String,
/// TLS certificate path (generated automatically if missing). /// TLS certificate path (generated automatically if missing).
#[arg( #[arg(long, default_value = DEFAULT_TLS_CERT, env = "QUICNPROTOCHAT_TLS_CERT")]
long,
default_value = "data/server-cert.der",
env = "QUICNPROTOCHAT_TLS_CERT"
)]
tls_cert: PathBuf, tls_cert: PathBuf,
/// TLS private key path (generated automatically if missing). /// TLS private key path (generated automatically if missing).
#[arg( #[arg(long, default_value = DEFAULT_TLS_KEY, env = "QUICNPROTOCHAT_TLS_KEY")]
long,
default_value = "data/server-key.der",
env = "QUICNPROTOCHAT_TLS_KEY"
)]
tls_key: PathBuf, tls_key: PathBuf,
/// Required bearer token for auth.version=1 requests. If unset, any non-empty token is accepted.
#[arg(long, env = "QUICNPROTOCHAT_AUTH_TOKEN")]
auth_token: Option<String>,
/// Storage backend: "file" (bincode) or "sql" (SQLCipher-encrypted).
#[arg(long, default_value = DEFAULT_STORE_BACKEND, env = "QUICNPROTOCHAT_STORE_BACKEND")]
store_backend: String,
/// Path to the SQLCipher database file (only used when --store-backend=sql).
#[arg(long, default_value = DEFAULT_DB_PATH, env = "QUICNPROTOCHAT_DB_PATH")]
db_path: PathBuf,
/// SQLCipher encryption key. Empty string disables encryption.
#[arg(long, default_value = "", env = "QUICNPROTOCHAT_DB_KEY")]
db_key: String,
} }
// ── Node service implementation ───────────────────────────────────────────── // ── Node service implementation ─────────────────────────────────────────────
/// Cap'n Proto RPC server implementation for `NodeService` (Auth + Delivery). /// Cap'n Proto RPC server implementation for `NodeService` (Auth + Delivery).
struct NodeServiceImpl { struct NodeServiceImpl {
store: Arc<FileBackedStore>, store: Arc<dyn Store>,
waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>>, waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>>,
auth_cfg: Arc<AuthConfig>,
} }
impl NodeServiceImpl { impl NodeServiceImpl {
@@ -114,6 +255,9 @@ impl node_service::Server for NodeServiceImpl {
let (identity_key, package) = match params { let (identity_key, package) = match params {
Ok(p) => { Ok(p) => {
if let Err(e) = validate_auth(&self.auth_cfg, p.get_auth()) {
return Promise::err(e);
}
let ik = match p.get_identity_key() { let ik = match p.get_identity_key() {
Ok(v) => v.to_vec(), Ok(v) => v.to_vec(),
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))), Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
@@ -177,6 +321,14 @@ impl node_service::Server for NodeServiceImpl {
}, },
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))), Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
}; };
if let Err(e) = params
.get()
.ok()
.map(|p| validate_auth(&self.auth_cfg, p.get_auth()))
.transpose()
{
return Promise::err(e);
}
if identity_key.len() != 32 { if identity_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
@@ -234,6 +386,9 @@ impl node_service::Server for NodeServiceImpl {
}; };
let channel_id = p.get_channel_id().unwrap_or_default().to_vec(); let channel_id = p.get_channel_id().unwrap_or_default().to_vec();
let version = p.get_version(); let version = p.get_version();
if let Err(e) = validate_auth(&self.auth_cfg, p.get_auth()) {
return Promise::err(e);
}
if recipient_key.len() != 32 { if recipient_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
@@ -252,9 +407,9 @@ impl node_service::Server for NodeServiceImpl {
MAX_PAYLOAD_BYTES MAX_PAYLOAD_BYTES
))); )));
} }
if version != 0 && version != CURRENT_WIRE_VERSION { if version != CURRENT_WIRE_VERSION {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
"unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", "unsupported wire version {} (expected {CURRENT_WIRE_VERSION})",
version version
))); )));
} }
@@ -300,7 +455,15 @@ impl node_service::Server for NodeServiceImpl {
.get() .get()
.ok() .ok()
.map(|p| p.get_version()) .map(|p| p.get_version())
.unwrap_or(0); .unwrap_or(CURRENT_WIRE_VERSION);
if let Err(e) = params
.get()
.ok()
.map(|p| validate_auth(&self.auth_cfg, p.get_auth()))
.transpose()
{
return Promise::err(e);
}
if recipient_key.len() != 32 { if recipient_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
@@ -308,9 +471,9 @@ impl node_service::Server for NodeServiceImpl {
recipient_key.len() recipient_key.len()
))); )));
} }
if version != 0 && version != CURRENT_WIRE_VERSION { if version != CURRENT_WIRE_VERSION {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
"unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", "unsupported wire version {} (expected {CURRENT_WIRE_VERSION})",
version version
))); )));
} }
@@ -355,6 +518,9 @@ impl node_service::Server for NodeServiceImpl {
let channel_id = p.get_channel_id().unwrap_or_default().to_vec(); let channel_id = p.get_channel_id().unwrap_or_default().to_vec();
let version = p.get_version(); let version = p.get_version();
let timeout_ms = p.get_timeout_ms(); let timeout_ms = p.get_timeout_ms();
if let Err(e) = validate_auth(&self.auth_cfg, p.get_auth()) {
return Promise::err(e);
}
if recipient_key.len() != 32 { if recipient_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
@@ -362,9 +528,9 @@ impl node_service::Server for NodeServiceImpl {
recipient_key.len() recipient_key.len()
))); )));
} }
if version != 0 && version != CURRENT_WIRE_VERSION { if version != CURRENT_WIRE_VERSION {
return Promise::err(capnp::Error::failed(format!( return Promise::err(capnp::Error::failed(format!(
"unsupported wire version {} (expected 0 or {CURRENT_WIRE_VERSION})", "unsupported wire version {} (expected {CURRENT_WIRE_VERSION})",
version version
))); )));
} }
@@ -403,6 +569,103 @@ impl node_service::Server for NodeServiceImpl {
results.get().set_status("ok"); results.get().set_status("ok");
Promise::ok(()) Promise::ok(())
} }
/// Store a hybrid (X25519 + ML-KEM-768) public key for an identity.
fn upload_hybrid_key(
&mut self,
params: node_service::UploadHybridKeyParams,
_results: node_service::UploadHybridKeyResults,
) -> Promise<(), capnp::Error> {
let p = match params.get() {
Ok(p) => p,
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
};
let identity_key = match p.get_identity_key() {
Ok(v) => v.to_vec(),
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
};
let hybrid_pk = match p.get_hybrid_public_key() {
Ok(v) => v.to_vec(),
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
};
if identity_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!(
"identityKey must be exactly 32 bytes, got {}",
identity_key.len()
)));
}
if hybrid_pk.is_empty() {
return Promise::err(capnp::Error::failed(
"hybridPublicKey must not be empty".to_string(),
));
}
if let Err(e) = self
.store
.upload_hybrid_key(&identity_key, hybrid_pk)
.map_err(storage_err)
{
return Promise::err(e);
}
tracing::debug!(
identity = %fmt_hex(&identity_key[..4]),
"hybrid public key uploaded"
);
Promise::ok(())
}
/// Fetch a peer's hybrid public key.
fn fetch_hybrid_key(
&mut self,
params: node_service::FetchHybridKeyParams,
mut results: node_service::FetchHybridKeyResults,
) -> Promise<(), capnp::Error> {
let identity_key = match params.get() {
Ok(p) => match p.get_identity_key() {
Ok(v) => v.to_vec(),
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
},
Err(e) => return Promise::err(capnp::Error::failed(format!("{e}"))),
};
if identity_key.len() != 32 {
return Promise::err(capnp::Error::failed(format!(
"identityKey must be exactly 32 bytes, got {}",
identity_key.len()
)));
}
let hybrid_pk = match self
.store
.fetch_hybrid_key(&identity_key)
.map_err(storage_err)
{
Ok(p) => p,
Err(e) => return Promise::err(e),
};
match hybrid_pk {
Some(pk) => {
tracing::debug!(
identity = %fmt_hex(&identity_key[..4]),
"hybrid key fetched"
);
results.get().set_hybrid_public_key(&pk);
}
None => {
tracing::debug!(
identity = %fmt_hex(&identity_key[..4]),
"no hybrid key for identity"
);
results.get().set_hybrid_public_key(&[]);
}
}
Promise::ok(())
}
} }
fn fill_payloads_wait(results: &mut node_service::FetchWaitResults, messages: Vec<Vec<u8>>) { fn fill_payloads_wait(results: &mut node_service::FetchWaitResults, messages: Vec<Vec<u8>>) {
@@ -416,6 +679,42 @@ fn storage_err(err: StorageError) -> capnp::Error {
capnp::Error::failed(format!("{err}")) capnp::Error::failed(format!("{err}"))
} }
fn validate_auth(
cfg: &AuthConfig,
auth: Result<auth::Reader<'_>, capnp::Error>,
) -> Result<(), capnp::Error> {
let auth = auth?;
let version = auth.get_version();
if version != 1 {
return Err(capnp::Error::failed(format!(
"unsupported auth version {} (expected 1)",
version
)));
}
let token = auth
.get_access_token()
.map_err(|e| capnp::Error::failed(format!("auth.accessToken: {e}")))?
.to_vec();
if token.is_empty() {
return Err(capnp::Error::failed(
"auth.version=1 requires non-empty accessToken".to_string(),
));
}
if let Some(expected) = &cfg.required_token {
if &token != expected {
return Err(capnp::Error::failed("invalid accessToken".to_string()));
}
}
// Early-development stance: no legacy/no-auth path to avoid maintaining divergent behavior.
Ok(())
}
// ── Entry point ─────────────────────────────────────────────────────────────── // ── Entry point ───────────────────────────────────────────────────────────────
#[tokio::main] #[tokio::main]
@@ -428,20 +727,42 @@ async fn main() -> anyhow::Result<()> {
.init(); .init();
let args = Args::parse(); let args = Args::parse();
let file_cfg = load_config(args.config.as_deref())?;
let effective = merge_config(&args, &file_cfg);
let listen: SocketAddr = args.listen.parse().context("--listen must be host:port")?; let listen: SocketAddr = effective
.listen
.parse()
.context("--listen must be host:port")?;
let server_config = build_server_config(&args.tls_cert, &args.tls_key) let server_config = build_server_config(&effective.tls_cert, &effective.tls_key)
.context("failed to build TLS/QUIC server config")?; .context("failed to build TLS/QUIC server config")?;
// Shared storage — persisted to disk for restart safety. // Shared storage — persisted to disk for restart safety.
let store = Arc::new(FileBackedStore::open(&args.data_dir)?); let store: Arc<dyn Store> = match effective.store_backend.as_str() {
"sql" => {
if let Some(parent) = effective.db_path.parent() {
std::fs::create_dir_all(parent).context("create db dir")?;
}
tracing::info!(
path = %effective.db_path.display(),
encrypted = !effective.db_key.is_empty(),
"opening SQLCipher store"
);
Arc::new(SqlStore::open(&effective.db_path, &effective.db_key)?)
}
"file" | _ => {
tracing::info!(dir = %effective.data_dir, "opening file-backed store");
Arc::new(FileBackedStore::open(&effective.data_dir)?)
}
};
let auth_cfg = Arc::new(AuthConfig::new(effective.auth_token.clone()));
let waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>> = Arc::new(DashMap::new()); let waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>> = Arc::new(DashMap::new());
let endpoint = Endpoint::server(server_config, listen)?; let endpoint = Endpoint::server(server_config, listen)?;
tracing::info!( tracing::info!(
addr = %args.listen, addr = %effective.listen,
"accepting QUIC connections" "accepting QUIC connections"
); );
@@ -466,8 +787,9 @@ async fn main() -> anyhow::Result<()> {
let store = Arc::clone(&store); let store = Arc::clone(&store);
let waiters = Arc::clone(&waiters); let waiters = Arc::clone(&waiters);
let auth_cfg = Arc::clone(&auth_cfg);
tokio::task::spawn_local(async move { tokio::task::spawn_local(async move {
if let Err(e) = handle_node_connection(connecting, store, waiters).await { if let Err(e) = handle_node_connection(connecting, store, waiters, auth_cfg).await {
tracing::warn!(error = %e, "connection error"); tracing::warn!(error = %e, "connection error");
} }
}); });
@@ -483,8 +805,9 @@ async fn main() -> anyhow::Result<()> {
/// Handle one NodeService connection. /// Handle one NodeService connection.
async fn handle_node_connection( async fn handle_node_connection(
connecting: quinn::Connecting, connecting: quinn::Connecting,
store: Arc<FileBackedStore>, store: Arc<dyn Store>,
waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>>, waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>>,
auth_cfg: Arc<AuthConfig>,
) -> Result<(), anyhow::Error> { ) -> Result<(), anyhow::Error> {
let connection = connecting.await?; let connection = connecting.await?;
@@ -498,7 +821,11 @@ async fn handle_node_connection(
let network = twoparty::VatNetwork::new(reader, writer, Side::Server, Default::default()); let network = twoparty::VatNetwork::new(reader, writer, Side::Server, Default::default());
let service: node_service::Client = capnp_rpc::new_client(NodeServiceImpl { store, waiters }); let service: node_service::Client = capnp_rpc::new_client(NodeServiceImpl {
store,
waiters,
auth_cfg,
});
RpcSystem::new(Box::new(network), Some(service.client)) RpcSystem::new(Box::new(network), Some(service.client))
.await .await

View File

@@ -0,0 +1,315 @@
//! SQLCipher-backed persistent storage.
//!
//! Uses `rusqlite` with `bundled-sqlcipher` for encrypted-at-rest storage.
//! Implements the same [`Store`] trait as [`FileBackedStore`] but with proper
//! ACID transactions and indexed queries.
use std::path::Path;
use std::sync::Mutex;
use rusqlite::{params, Connection};
use crate::storage::{StorageError, Store};
/// SQLCipher-encrypted storage backend.
///
/// All data is stored in a single encrypted SQLite database. The encryption
/// key is set via `PRAGMA key` at open time.
pub struct SqlStore {
conn: Mutex<Connection>,
}
impl SqlStore {
/// Open (or create) an encrypted database at `path`.
///
/// `key` is the passphrase used by SQLCipher. Pass an empty string for an
/// unencrypted database (useful for testing).
pub fn open(path: impl AsRef<Path>, key: &str) -> Result<Self, StorageError> {
let conn = Connection::open(path).map_err(|e| StorageError::Db(e.to_string()))?;
if !key.is_empty() {
conn.pragma_update(None, "key", key)
.map_err(|e| StorageError::Db(format!("PRAGMA key failed: {e}")))?;
}
// Performance pragmas — safe for a single-writer server.
conn.execute_batch(
"PRAGMA journal_mode = WAL;
PRAGMA synchronous = NORMAL;
PRAGMA foreign_keys = ON;",
)
.map_err(|e| StorageError::Db(e.to_string()))?;
let store = Self {
conn: Mutex::new(conn),
};
store.migrate()?;
Ok(store)
}
/// Create schema tables if they don't exist yet.
fn migrate(&self) -> Result<(), StorageError> {
let conn = self.conn.lock().unwrap();
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS key_packages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
identity_key BLOB NOT NULL,
package_data BLOB NOT NULL,
created_at INTEGER DEFAULT (strftime('%s','now'))
);
CREATE TABLE IF NOT EXISTS deliveries (
id INTEGER PRIMARY KEY AUTOINCREMENT,
recipient_key BLOB NOT NULL,
channel_id BLOB NOT NULL DEFAULT X'',
payload BLOB NOT NULL,
created_at INTEGER DEFAULT (strftime('%s','now'))
);
CREATE TABLE IF NOT EXISTS hybrid_keys (
identity_key BLOB PRIMARY KEY,
hybrid_public_key BLOB NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_kp_identity
ON key_packages(identity_key);
CREATE INDEX IF NOT EXISTS idx_del_recipient_channel
ON deliveries(recipient_key, channel_id);",
)
.map_err(|e| StorageError::Db(e.to_string()))?;
Ok(())
}
}
impl Store for SqlStore {
fn upload_key_package(
&self,
identity_key: &[u8],
package: Vec<u8>,
) -> Result<(), StorageError> {
let conn = self.conn.lock().unwrap();
conn.execute(
"INSERT INTO key_packages (identity_key, package_data) VALUES (?1, ?2)",
params![identity_key, package],
)
.map_err(|e| StorageError::Db(e.to_string()))?;
Ok(())
}
fn fetch_key_package(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
let conn = self.conn.lock().unwrap();
// Find the oldest KeyPackage (FIFO) and delete it atomically.
let mut stmt = conn
.prepare(
"SELECT id, package_data FROM key_packages
WHERE identity_key = ?1
ORDER BY id ASC
LIMIT 1",
)
.map_err(|e| StorageError::Db(e.to_string()))?;
let row = stmt
.query_row(params![identity_key], |row| {
Ok((row.get::<_, i64>(0)?, row.get::<_, Vec<u8>>(1)?))
})
.optional()
.map_err(|e| StorageError::Db(e.to_string()))?;
match row {
Some((id, package)) => {
conn.execute("DELETE FROM key_packages WHERE id = ?1", params![id])
.map_err(|e| StorageError::Db(e.to_string()))?;
Ok(Some(package))
}
None => Ok(None),
}
}
fn enqueue(
&self,
recipient_key: &[u8],
channel_id: &[u8],
payload: Vec<u8>,
) -> Result<(), StorageError> {
let conn = self.conn.lock().unwrap();
conn.execute(
"INSERT INTO deliveries (recipient_key, channel_id, payload) VALUES (?1, ?2, ?3)",
params![recipient_key, channel_id, payload],
)
.map_err(|e| StorageError::Db(e.to_string()))?;
Ok(())
}
fn fetch(
&self,
recipient_key: &[u8],
channel_id: &[u8],
) -> Result<Vec<Vec<u8>>, StorageError> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare(
"SELECT id, payload FROM deliveries
WHERE recipient_key = ?1 AND channel_id = ?2
ORDER BY id ASC",
)
.map_err(|e| StorageError::Db(e.to_string()))?;
let rows: Vec<(i64, Vec<u8>)> = stmt
.query_map(params![recipient_key, channel_id], |row| {
Ok((row.get(0)?, row.get(1)?))
})
.map_err(|e| StorageError::Db(e.to_string()))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| StorageError::Db(e.to_string()))?;
if !rows.is_empty() {
let ids: Vec<i64> = rows.iter().map(|(id, _)| *id).collect();
// Delete fetched rows in a single statement.
let placeholders: String = ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
let sql = format!("DELETE FROM deliveries WHERE id IN ({placeholders})");
let params: Vec<&dyn rusqlite::types::ToSql> =
ids.iter().map(|id| id as &dyn rusqlite::types::ToSql).collect();
conn.execute(&sql, params.as_slice())
.map_err(|e| StorageError::Db(e.to_string()))?;
}
Ok(rows.into_iter().map(|(_, payload)| payload).collect())
}
fn upload_hybrid_key(
&self,
identity_key: &[u8],
hybrid_pk: Vec<u8>,
) -> Result<(), StorageError> {
let conn = self.conn.lock().unwrap();
conn.execute(
"INSERT OR REPLACE INTO hybrid_keys (identity_key, hybrid_public_key) VALUES (?1, ?2)",
params![identity_key, hybrid_pk],
)
.map_err(|e| StorageError::Db(e.to_string()))?;
Ok(())
}
fn fetch_hybrid_key(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare("SELECT hybrid_public_key FROM hybrid_keys WHERE identity_key = ?1")
.map_err(|e| StorageError::Db(e.to_string()))?;
stmt.query_row(params![identity_key], |row| row.get(0))
.optional()
.map_err(|e| StorageError::Db(e.to_string()))
}
}
/// Convenience extension for `rusqlite::OptionalExtension`.
trait OptionalExt<T> {
fn optional(self) -> Result<Option<T>, rusqlite::Error>;
}
impl<T> OptionalExt<T> for Result<T, rusqlite::Error> {
fn optional(self) -> Result<Option<T>, rusqlite::Error> {
match self {
Ok(v) => Ok(Some(v)),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn open_in_memory() -> SqlStore {
SqlStore::open(":memory:", "").unwrap()
}
#[test]
fn key_package_fifo() {
let store = open_in_memory();
let ik = b"alice_identity_key__32bytes_long";
// Pad to 32 bytes to match real usage
let mut identity = [0u8; 32];
identity[..ik.len()].copy_from_slice(ik);
store
.upload_key_package(&identity, b"kp1".to_vec())
.unwrap();
store
.upload_key_package(&identity, b"kp2".to_vec())
.unwrap();
assert_eq!(
store.fetch_key_package(&identity).unwrap(),
Some(b"kp1".to_vec())
);
assert_eq!(
store.fetch_key_package(&identity).unwrap(),
Some(b"kp2".to_vec())
);
assert_eq!(store.fetch_key_package(&identity).unwrap(), None);
}
#[test]
fn delivery_round_trip() {
let store = open_in_memory();
let rk = [1u8; 32];
let ch = b"channel-1";
store.enqueue(&rk, ch, b"msg1".to_vec()).unwrap();
store.enqueue(&rk, ch, b"msg2".to_vec()).unwrap();
let msgs = store.fetch(&rk, ch).unwrap();
assert_eq!(msgs, vec![b"msg1".to_vec(), b"msg2".to_vec()]);
// Queue is drained.
assert!(store.fetch(&rk, ch).unwrap().is_empty());
}
#[test]
fn hybrid_key_round_trip() {
let store = open_in_memory();
let ik = [2u8; 32];
let pk = b"hybrid_public_key_data".to_vec();
store.upload_hybrid_key(&ik, pk.clone()).unwrap();
assert_eq!(store.fetch_hybrid_key(&ik).unwrap(), Some(pk));
}
#[test]
fn hybrid_key_upsert() {
let store = open_in_memory();
let ik = [3u8; 32];
store
.upload_hybrid_key(&ik, b"v1".to_vec())
.unwrap();
store
.upload_hybrid_key(&ik, b"v2".to_vec())
.unwrap();
assert_eq!(
store.fetch_hybrid_key(&ik).unwrap(),
Some(b"v2".to_vec())
);
}
#[test]
fn separate_channels_isolated() {
let store = open_in_memory();
let rk = [4u8; 32];
store.enqueue(&rk, b"ch-a", b"a1".to_vec()).unwrap();
store.enqueue(&rk, b"ch-b", b"b1".to_vec()).unwrap();
let a_msgs = store.fetch(&rk, b"ch-a").unwrap();
assert_eq!(a_msgs, vec![b"a1".to_vec()]);
let b_msgs = store.fetch(&rk, b"ch-b").unwrap();
assert_eq!(b_msgs, vec![b"b1".to_vec()]);
}
}

View File

@@ -1,7 +1,7 @@
use std::{ use std::{
collections::{HashMap, VecDeque}, collections::{HashMap, VecDeque},
fs, fs,
hash::{Hash, Hasher}, hash::Hash,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::Mutex, sync::Mutex,
}; };
@@ -14,13 +14,46 @@ pub enum StorageError {
Io(String), Io(String),
#[error("serialization error")] #[error("serialization error")]
Serde, Serde,
#[error("database error: {0}")]
Db(String),
} }
#[derive(Serialize, Deserialize, Default)] // ── Store trait ──────────────────────────────────────────────────────────────
struct QueueMapV1 {
map: HashMap<Vec<u8>, VecDeque<Vec<u8>>>, /// Abstraction over storage backends (file-backed, SQLCipher, etc.).
pub trait Store: Send + Sync {
fn upload_key_package(
&self,
identity_key: &[u8],
package: Vec<u8>,
) -> Result<(), StorageError>;
fn fetch_key_package(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError>;
fn enqueue(
&self,
recipient_key: &[u8],
channel_id: &[u8],
payload: Vec<u8>,
) -> Result<(), StorageError>;
fn fetch(
&self,
recipient_key: &[u8],
channel_id: &[u8],
) -> Result<Vec<Vec<u8>>, StorageError>;
fn upload_hybrid_key(
&self,
identity_key: &[u8],
hybrid_pk: Vec<u8>,
) -> Result<(), StorageError>;
fn fetch_hybrid_key(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError>;
} }
// ── ChannelKey ───────────────────────────────────────────────────────────────
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)] #[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)]
pub struct ChannelKey { pub struct ChannelKey {
pub channel_id: Vec<u8>, pub channel_id: Vec<u8>,
@@ -28,12 +61,19 @@ pub struct ChannelKey {
} }
impl Hash for ChannelKey { impl Hash for ChannelKey {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.channel_id.hash(state); self.channel_id.hash(state);
self.recipient_key.hash(state); self.recipient_key.hash(state);
} }
} }
// ── FileBackedStore ──────────────────────────────────────────────────────────
#[derive(Serialize, Deserialize, Default)]
struct QueueMapV1 {
map: HashMap<Vec<u8>, VecDeque<Vec<u8>>>,
}
#[derive(Serialize, Deserialize, Default)] #[derive(Serialize, Deserialize, Default)]
struct QueueMapV2 { struct QueueMapV2 {
map: HashMap<ChannelKey, VecDeque<Vec<u8>>>, map: HashMap<ChannelKey, VecDeque<Vec<u8>>>,
@@ -45,8 +85,10 @@ struct QueueMapV2 {
pub struct FileBackedStore { pub struct FileBackedStore {
kp_path: PathBuf, kp_path: PathBuf,
ds_path: PathBuf, ds_path: PathBuf,
hk_path: PathBuf,
key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>, key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>,
deliveries: Mutex<HashMap<ChannelKey, VecDeque<Vec<u8>>>>, deliveries: Mutex<HashMap<ChannelKey, VecDeque<Vec<u8>>>>,
hybrid_keys: Mutex<HashMap<Vec<u8>, Vec<u8>>>,
} }
impl FileBackedStore { impl FileBackedStore {
@@ -57,73 +99,23 @@ impl FileBackedStore {
} }
let kp_path = dir.join("keypackages.bin"); let kp_path = dir.join("keypackages.bin");
let ds_path = dir.join("deliveries.bin"); let ds_path = dir.join("deliveries.bin");
let hk_path = dir.join("hybridkeys.bin");
let key_packages = Mutex::new(Self::load_map(&kp_path)?); let key_packages = Mutex::new(Self::load_kp_map(&kp_path)?);
let deliveries = Mutex::new(Self::load_map(&ds_path)?); let deliveries = Mutex::new(Self::load_delivery_map(&ds_path)?);
let hybrid_keys = Mutex::new(Self::load_hybrid_keys(&hk_path)?);
Ok(Self { Ok(Self {
kp_path, kp_path,
ds_path, ds_path,
hk_path,
key_packages, key_packages,
deliveries, deliveries,
hybrid_keys,
}) })
} }
pub fn upload_key_package( fn load_kp_map(path: &Path) -> Result<HashMap<Vec<u8>, VecDeque<Vec<u8>>>, StorageError> {
&self,
identity_key: &[u8],
package: Vec<u8>,
) -> Result<(), StorageError> {
let mut map = self.key_packages.lock().unwrap();
map.entry(identity_key.to_vec())
.or_default()
.push_back(package);
self.flush_map(&self.kp_path, &*map)
}
pub fn fetch_key_package(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
let mut map = self.key_packages.lock().unwrap();
let package = map.get_mut(identity_key).and_then(|q| q.pop_front());
self.flush_map(&self.kp_path, &*map)?;
Ok(package)
}
pub fn enqueue(
&self,
recipient_key: &[u8],
channel_id: &[u8],
payload: Vec<u8>,
) -> Result<(), StorageError> {
let mut map = self.deliveries.lock().unwrap();
let key = ChannelKey {
channel_id: channel_id.to_vec(),
recipient_key: recipient_key.to_vec(),
};
map.entry(key)
.or_default()
.push_back(payload);
self.flush_map(&self.ds_path, &*map)
}
pub fn fetch(
&self,
recipient_key: &[u8],
channel_id: &[u8],
) -> Result<Vec<Vec<u8>>, StorageError> {
let mut map = self.deliveries.lock().unwrap();
let key = ChannelKey {
channel_id: channel_id.to_vec(),
recipient_key: recipient_key.to_vec(),
};
let messages = map
.get_mut(&key)
.map(|q| q.drain(..).collect())
.unwrap_or_default();
self.flush_map(&self.ds_path, &*map)?;
Ok(messages)
}
fn load_map(path: &Path) -> Result<HashMap<ChannelKey, VecDeque<Vec<u8>>>, StorageError> {
if !path.exists() { if !path.exists() {
return Ok(HashMap::new()); return Ok(HashMap::new());
} }
@@ -131,7 +123,32 @@ impl FileBackedStore {
if bytes.is_empty() { if bytes.is_empty() {
return Ok(HashMap::new()); return Ok(HashMap::new());
} }
// Try v2 format (channel-aware). Fallback to legacy v1. let map: QueueMapV1 = bincode::deserialize(&bytes).map_err(|_| StorageError::Serde)?;
Ok(map.map)
}
fn flush_kp_map(
&self,
path: &Path,
map: &HashMap<Vec<u8>, VecDeque<Vec<u8>>>,
) -> Result<(), StorageError> {
let payload = QueueMapV1 { map: map.clone() };
let bytes = bincode::serialize(&payload).map_err(|_| StorageError::Serde)?;
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|e| StorageError::Io(e.to_string()))?;
}
fs::write(path, bytes).map_err(|e| StorageError::Io(e.to_string()))
}
fn load_delivery_map(path: &Path) -> Result<HashMap<ChannelKey, VecDeque<Vec<u8>>>, StorageError> {
if !path.exists() {
return Ok(HashMap::new());
}
let bytes = fs::read(path).map_err(|e| StorageError::Io(e.to_string()))?;
if bytes.is_empty() {
return Ok(HashMap::new());
}
// Try v2 format (channel-aware). Fallback to legacy v1 for upgrade.
if let Ok(map) = bincode::deserialize::<QueueMapV2>(&bytes) { if let Ok(map) = bincode::deserialize::<QueueMapV2>(&bytes) {
return Ok(map.map); return Ok(map.map);
} }
@@ -149,7 +166,7 @@ impl FileBackedStore {
Ok(upgraded) Ok(upgraded)
} }
fn flush_map( fn flush_delivery_map(
&self, &self,
path: &Path, path: &Path,
map: &HashMap<ChannelKey, VecDeque<Vec<u8>>>, map: &HashMap<ChannelKey, VecDeque<Vec<u8>>>,
@@ -161,4 +178,98 @@ impl FileBackedStore {
} }
fs::write(path, bytes).map_err(|e| StorageError::Io(e.to_string())) fs::write(path, bytes).map_err(|e| StorageError::Io(e.to_string()))
} }
fn load_hybrid_keys(path: &Path) -> Result<HashMap<Vec<u8>, Vec<u8>>, StorageError> {
if !path.exists() {
return Ok(HashMap::new());
}
let bytes = fs::read(path).map_err(|e| StorageError::Io(e.to_string()))?;
if bytes.is_empty() {
return Ok(HashMap::new());
}
bincode::deserialize(&bytes).map_err(|_| StorageError::Serde)
}
fn flush_hybrid_keys(
&self,
path: &Path,
map: &HashMap<Vec<u8>, Vec<u8>>,
) -> Result<(), StorageError> {
let bytes = bincode::serialize(map).map_err(|_| StorageError::Serde)?;
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).map_err(|e| StorageError::Io(e.to_string()))?;
}
fs::write(path, bytes).map_err(|e| StorageError::Io(e.to_string()))
}
}
impl Store for FileBackedStore {
fn upload_key_package(
&self,
identity_key: &[u8],
package: Vec<u8>,
) -> Result<(), StorageError> {
let mut map = self.key_packages.lock().unwrap();
map.entry(identity_key.to_vec())
.or_default()
.push_back(package);
self.flush_kp_map(&self.kp_path, &*map)
}
fn fetch_key_package(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
let mut map = self.key_packages.lock().unwrap();
let package = map.get_mut(identity_key).and_then(|q| q.pop_front());
self.flush_kp_map(&self.kp_path, &*map)?;
Ok(package)
}
fn enqueue(
&self,
recipient_key: &[u8],
channel_id: &[u8],
payload: Vec<u8>,
) -> Result<(), StorageError> {
let mut map = self.deliveries.lock().unwrap();
let key = ChannelKey {
channel_id: channel_id.to_vec(),
recipient_key: recipient_key.to_vec(),
};
map.entry(key)
.or_default()
.push_back(payload);
self.flush_delivery_map(&self.ds_path, &*map)
}
fn fetch(
&self,
recipient_key: &[u8],
channel_id: &[u8],
) -> Result<Vec<Vec<u8>>, StorageError> {
let mut map = self.deliveries.lock().unwrap();
let key = ChannelKey {
channel_id: channel_id.to_vec(),
recipient_key: recipient_key.to_vec(),
};
let messages = map
.get_mut(&key)
.map(|q| q.drain(..).collect())
.unwrap_or_default();
self.flush_delivery_map(&self.ds_path, &*map)?;
Ok(messages)
}
fn upload_hybrid_key(
&self,
identity_key: &[u8],
hybrid_pk: Vec<u8>,
) -> Result<(), StorageError> {
let mut map = self.hybrid_keys.lock().unwrap();
map.insert(identity_key.to_vec(), hybrid_pk);
self.flush_hybrid_keys(&self.hk_path, &*map)
}
fn fetch_hybrid_key(&self, identity_key: &[u8]) -> Result<Option<Vec<u8>>, StorageError> {
let map = self.hybrid_keys.lock().unwrap();
Ok(map.get(identity_key).cloned())
}
} }

View File

@@ -1,38 +0,0 @@
# 1:1 Channel Design (MVP)
## Goals
- First-class 1:1 channels (DMs) atop NodeService.
- Authz on enqueue/fetch per channel, not just recipient key.
- Privacy: MLS-encrypted payloads; metadata limited to channel ID + participants.
- Retention: 7d message retention; keypackages expire after 24h (configurable later).
- Compatibility: additive schema change with version tagging; N-1 clients can interop if they ignore new fields.
## Schema changes (Cap'n Proto)
- Add `channelId :Data` (UUID/16B) to enqueue/fetch/fetchWait requests.
- Add `version :UInt16` to NodeService messages (reject unknown > current).
- Keep `recipientKey` for routing; server authz uses `(channelId, caller identity)`.
## AuthZ model
- Channel membership: exactly two identities (A,B). Server stores membership map `{channelId -> {a_key, b_key}}`.
- Enqueue allowed if caller identity ∈ channel members; fetch/fetchWait allowed only for caller identity.
- Rate limits applied per channel and per identity (50 r/s per IP/identity, 5 MB max payload).
## Storage model
- New table/map: `channels` with `channelId`, `member_keys[2]`, `created_at`.
- Deliveries keyed by `(channelId, recipient_key)`; queues retain per recipient, per channel.
- Messages carry `received_at` timestamp; TTL eviction at fetch time and background sweep.
## Flows
- Create channel: caller provides peer identity; server generates channelId, stores membership, returns channelId.
- Send: client includes channelId + recipientKey; server authz + size/TTL checks; enqueue.
- Receive: fetch/fetchWait drains messages for `(channelId, caller_key)`; applies TTL, returns non-expired.
## Backward compatibility
- Old clients without channelId: server treats channelId=nil as legacy mode (current behavior) for interim.
- Version field allows rejecting future schema changes cleanly.
## Open items
- Persistence backend: extend FileBackedStore or move to proper DB for channels + TTL metadata.
- API surface: add `createChannel(channelMembers)` RPC or reuse auth service.
- Client UX: map peer identity → channelId discovery; cache channelId in state file.
- Auditing: log channel create, authz failures, send/recv events with redaction.

View File

@@ -1,57 +0,0 @@
# Technology Suggestions for quicnprotochat
## Transport & Networking
- **LibP2P or iroh (from n0)** — Decentralized peer discovery, NAT traversal (hole-punching), and relay fallback. Move beyond client-server to a mesh/hybrid topology where peers can communicate directly when possible.
- **WebTransport (HTTP/3)** — Expose QUIC transport to browsers, enabling a web client without WebSocket degradation.
- **Tor / I2P integration** — Onion-routed transport layer for metadata resistance. MLS protects content, but connection metadata still leaks to the server.
## Storage & Persistence
- **SQLCipher or libsql (Turso)** — Encrypted-at-rest SQLite for durable group state, key stores, and message history.
- **CRDT-based sync (Automerge / Yrs)** — Conflict-free replicated data types for multi-device state synchronization without a central authority.
- **Object storage (S3-compatible)** — For encrypted file/media attachments with server-side ignorance of content.
## Cryptography & Privacy
- **ML-KEM + ML-DSA hybrid** — Hybrid X25519+ML-KEM-768 KEM for MLS init keys. One of the first post-quantum MLS implementations.
- **Private Information Retrieval (PIR)** — Let clients fetch messages/key packages without revealing which recipient they are (SealPIR / SimplePIR).
- **Sealed Sender (Signal-style)** — Encrypt sender identity inside the MLS ciphertext so the server can't see who sent a message to whom.
- **Key Transparency (RFC draft)** — Verifiable log of public keys to detect server-side key substitution attacks.
## Identity & Authentication
- **DID (Decentralized Identifiers)** — Self-sovereign `did:key` or `did:web` identifiers. Portable across servers.
- **OPAQUE (aPAKE)** — Password-authenticated key exchange where the server never sees the password.
- **WebAuthn / Passkeys** — Hardware-backed authentication for device binding (YubiKey, Touch ID, etc.).
- **Verifiable Credentials (W3C VC)** — Prove attributes (org membership, role) without revealing full identity.
## Application Layer
- **Matrix-style federation** — Let multiple quicnprotochat servers federate for cross-server communication.
- **WASM plugin system** — Sandboxed WASM plugins for bots, bridges, custom message types.
- **Double-ratchet DM layer** — Signal-style double ratchet (X3DH + Axolotl) for efficient 1:1 conversations.
## Observability & Operations
- **OpenTelemetry (tracing + metrics)** — OTLP export for distributed tracing, latency histograms, and dashboards.
- **Prometheus + Grafana** — Metrics on message throughput, MLS epoch advancement rate, queue depths.
- **Testcontainers-rs** — Docker stack in Rust integration tests for true end-to-end CI.
## Developer Experience
- **Tauri or Dioxus** — Native cross-platform GUI client in Rust, sharing core crate.
- **uniffi or diplomat** — FFI bindings from Rust core to Swift/Kotlin for mobile clients.
- **Nix flakes** — Reproducible dev environment bundling capnp, Rust toolchain, and test infra.
---
## Top 5 Priority Implementations
| Priority | Technology | Why |
|----------|-----------|-----|
| 1 | **Post-quantum hybrid KEM** | `ml-kem` already vendored — finishing this makes the project cutting-edge |
| 2 | **SQLCipher persistence** | Unlocks M6, multi-device, and offline usage |
| 3 | **OPAQUE auth** | Zero-knowledge passwords, massive security uplift for auth layer |
| 4 | **iroh / LibP2P** | NAT traversal + optional P2P mesh makes this deployable without central infra |
| 5 | **Sealed Sender + PIR** | Metadata resistance is the frontier — content encryption is table stakes now |

View File

@@ -4,14 +4,14 @@ services:
context: . context: .
dockerfile: docker/Dockerfile dockerfile: docker/Dockerfile
ports: ports:
- "4201:4201" - "7000:7000"
environment: environment:
RUST_LOG: "info" RUST_LOG: "info"
QUICNPROTOCHAT_LISTEN: "0.0.0.0:4201" QUICNPROTOCHAT_LISTEN: "0.0.0.0:7000"
# Healthcheck: attempt a TCP connection to port 4201. # Healthcheck: attempt a TCP connection to port 7000.
# Uses bash /dev/tcp — available in debian:bookworm-slim without extra packages. # Uses bash /dev/tcp — available in debian:bookworm-slim without extra packages.
healthcheck: healthcheck:
test: ["CMD", "bash", "-c", "echo '' > /dev/tcp/localhost/4201"] test: ["CMD", "bash", "-c", "echo '' > /dev/tcp/localhost/7000"]
interval: 5s interval: 5s
timeout: 3s timeout: 3s
retries: 10 retries: 10

19
docs/book.toml Normal file
View File

@@ -0,0 +1,19 @@
[book]
title = "quicnprotochat"
description = "End-to-end encrypted group messaging over QUIC + TLS 1.3 + MLS (RFC 9420)"
authors = ["quicnprotochat contributors"]
language = "en"
src = "src"
[build]
build-dir = "book"
[output.html]
default-theme = "navy"
preferred-dark-theme = "navy"
no-section-label = false
[output.html.search]
enable = true
limit-results = 30
boost-hierarchy = 2

106
docs/src/SUMMARY.md Normal file
View File

@@ -0,0 +1,106 @@
# Summary
[Introduction](introduction.md)
---
# Getting Started
- [Prerequisites](getting-started/prerequisites.md)
- [Building from Source](getting-started/building.md)
- [Running the Server](getting-started/running-the-server.md)
- [Running the Client](getting-started/running-the-client.md)
- [Docker Deployment](getting-started/docker.md)
- [Demo Walkthrough: Alice and Bob](getting-started/demo-walkthrough.md)
---
# Architecture
- [Architecture Overview](architecture/overview.md)
- [Protocol Stack](architecture/protocol-stack.md)
- [Crate Responsibilities](architecture/crate-responsibilities.md)
- [Service Architecture](architecture/service-architecture.md)
- [End-to-End Data Flow](architecture/data-flow.md)
---
# Protocol Deep Dives
- [Protocol Layers Overview](protocol-layers/overview.md)
- [QUIC + TLS 1.3](protocol-layers/quic-tls.md)
- [Noise\_XX Handshake](protocol-layers/noise-xx.md)
- [Cap'n Proto Serialisation and RPC](protocol-layers/capn-proto.md)
- [MLS (RFC 9420)](protocol-layers/mls.md)
- [Hybrid KEM: X25519 + ML-KEM-768](protocol-layers/hybrid-kem.md)
---
# Cryptographic Properties
- [Cryptography Overview](cryptography/overview.md)
- [Ed25519 Identity Keys](cryptography/identity-keys.md)
- [X25519 Transport Keys](cryptography/transport-keys.md)
- [Key Lifecycle and Zeroization](cryptography/key-lifecycle.md)
- [Forward Secrecy](cryptography/forward-secrecy.md)
- [Post-Compromise Security](cryptography/post-compromise-security.md)
- [Post-Quantum Readiness](cryptography/post-quantum-readiness.md)
- [Threat Model](cryptography/threat-model.md)
---
# Wire Format Reference
- [Wire Format Overview](wire-format/overview.md)
- [Envelope Schema](wire-format/envelope-schema.md)
- [Auth Schema](wire-format/auth-schema.md)
- [Delivery Schema](wire-format/delivery-schema.md)
- [NodeService Schema](wire-format/node-service-schema.md)
- [Length-Prefixed Framing Codec](wire-format/framing-codec.md)
---
# Design Rationale
- [Design Decisions Overview](design-rationale/overview.md)
- [Why This Design, Not Signal/Matrix/...](design-rationale/why-not-signal.md)
- [ADR-001: Noise\_XX for Transport Auth](design-rationale/adr-001-noise-xx.md)
- [ADR-002: Cap'n Proto over MessagePack](design-rationale/adr-002-capnproto.md)
- [ADR-003: RPC Inside the Noise Tunnel](design-rationale/adr-003-rpc-inside-noise.md)
- [ADR-004: MLS-Unaware Delivery Service](design-rationale/adr-004-mls-unaware-ds.md)
- [ADR-005: Single-Use KeyPackages](design-rationale/adr-005-single-use-keypackages.md)
- [ADR-006: PQ Gap in Noise Transport](design-rationale/adr-006-pq-gap.md)
---
# Implementation Internals
- [GroupMember Lifecycle](internals/group-member-lifecycle.md)
- [KeyPackage Exchange Flow](internals/keypackage-exchange.md)
- [Delivery Service Internals](internals/delivery-service.md)
- [Authentication Service Internals](internals/authentication-service.md)
- [Storage Backend](internals/storage-backend.md)
---
# Roadmap and Research
- [Milestone Tracker](roadmap/milestones.md)
- [Production Readiness WBS](roadmap/production-readiness.md)
- [Auth, Devices, and Tokens](roadmap/authz-plan.md)
- [1:1 Channel Design](roadmap/dm-channels.md)
- [Future Research Directions](roadmap/future-research.md)
---
# Contributing
- [Coding Standards](contributing/coding-standards.md)
- [Testing Strategy](contributing/testing.md)
---
# Appendix
- [Glossary](appendix/glossary.md)
- [References and Further Reading](appendix/references.md)

View File

@@ -0,0 +1,142 @@
# Glossary
Alphabetical glossary of terms used throughout the quicnprotochat documentation.
Each entry includes a brief definition and, where applicable, a reference to the
relevant specification or documentation page.
---
**AEAD** -- Authenticated Encryption with Associated Data. A symmetric encryption
scheme that provides both confidentiality and integrity. quicnprotochat uses
AES-128-GCM (in the MLS ciphersuite) and ChaCha20-Poly1305 (in the Noise
transport). See [Cryptography Overview](../cryptography/overview.md).
**ALPN** -- Application-Layer Protocol Negotiation. A TLS extension that allows
the client and server to agree on an application protocol during the TLS
handshake. quicnprotochat uses the ALPN token `b"capnp"` to identify Cap'n Proto
RPC connections. See [QUIC + TLS 1.3](../protocol-layers/quic-tls.md).
**AS** -- Authentication Service. The server component that stores and
distributes single-use MLS KeyPackages. Clients upload KeyPackages after identity
generation; peers fetch them to add new members to a group.
See [Architecture Overview](../architecture/overview.md).
**Cap'n Proto** -- A zero-copy serialisation format with a built-in RPC system.
quicnprotochat uses Cap'n Proto for all wire messages and service RPCs. Schemas
live in `schemas/*.capnp` and are compiled to Rust at build time.
See [Cap'n Proto Serialisation and RPC](../protocol-layers/capn-proto.md).
**Commit** -- An MLS message type that advances the group to a new epoch. When a
member sends a Commit (e.g., after adding or removing a member), all group
participants update their key schedule. Commits are the mechanism for both
forward secrecy and post-compromise security.
See [MLS (RFC 9420)](../protocol-layers/mls.md).
**Credential** -- An MLS identity binding that associates a member's signing key
with their identity. quicnprotochat uses `BasicCredential`, which contains the
raw Ed25519 public key bytes. See
[Ed25519 Identity Keys](../cryptography/identity-keys.md).
**DER** -- Distinguished Encoding Rules. A binary encoding format for ASN.1
structures, used for X.509 certificates and TLS certificate chains. The
self-signed TLS certificate generated by quicnprotochat is DER-encoded.
**DS** -- Delivery Service. The server component that provides store-and-forward
relay for opaque MLS payloads. The DS never inspects ciphertext -- it routes
solely by recipient public key and optional channel ID.
See [Architecture Overview](../architecture/overview.md).
**Ed25519** -- Edwards-curve Digital Signature Algorithm on Curve25519. Used for
MLS identity credentials and signing (KeyPackages, Commits, group operations).
quicnprotochat uses the `ed25519-dalek` crate.
See [Ed25519 Identity Keys](../cryptography/identity-keys.md).
**Epoch** -- The version number of an MLS group's key state. Each Commit
advances the epoch by one. Messages encrypted under epoch *n* cannot be
decrypted by members who have advanced to epoch *n+1*, providing forward secrecy.
See [Forward Secrecy](../cryptography/forward-secrecy.md).
**Forward Secrecy (FS)** -- The property that past sessions remain secure even
if long-term keys are later compromised. In MLS, forward secrecy is achieved by
the epoch ratchet: key material from earlier epochs is deleted when the epoch
advances. See [Forward Secrecy](../cryptography/forward-secrecy.md).
**HKDF** -- HMAC-based Key Derivation Function. Used in MLS to derive symmetric
keys from shared secrets. quicnprotochat uses HKDF-SHA256.
**HPKE** -- Hybrid Public Key Encryption. The public-key encryption scheme used
in MLS for key exchange (encrypting to a KeyPackage's init key). Defined in
RFC 9180. In quicnprotochat, HPKE uses DHKEM(X25519, HKDF-SHA256).
See [Hybrid KEM](../protocol-layers/hybrid-kem.md).
**KEM** -- Key Encapsulation Mechanism. A cryptographic primitive that generates
a shared secret and an encapsulated (encrypted) version of that secret. Used in
HPKE and in the hybrid post-quantum construction (X25519 + ML-KEM-768).
**KeyPackage** -- An MLS structure containing a member's public key material
(HPKE init key, signing key, credential) that peers use to add the member to a
group. KeyPackages are single-use per the MLS specification (RFC 9420) -- each
is consumed on fetch. See
[ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md).
**ML-KEM-768** -- Module-Lattice-based Key Encapsulation Mechanism, security
level 3 (NIST FIPS 203). A post-quantum KEM based on the hardness of the
module learning-with-errors (MLWE) problem. quicnprotochat plans to use ML-KEM-768
in a hybrid construction with X25519 at milestone M7.
See [Post-Quantum Readiness](../cryptography/post-quantum-readiness.md).
**MLS** -- Messaging Layer Security. A protocol for group key agreement defined
in RFC 9420. MLS provides forward secrecy and post-compromise security for
groups of any size through an efficient tree-based key schedule.
See [MLS (RFC 9420)](../protocol-layers/mls.md).
**Noise\_XX** -- A Noise Protocol Framework handshake pattern providing mutual
authentication. Both parties transmit their static public keys during the
handshake (encrypted after the first round-trip). The M1 transport stack uses
Noise\_XX over TCP; the M3+ stack uses QUIC + TLS 1.3 as the primary transport.
See [Noise\_XX Handshake](../protocol-layers/noise-xx.md).
**PCS** -- Post-Compromise Security. The property that a protocol recovers
security after a member's state is compromised. In MLS, once a compromised
member sends an Update or Commit, subsequent epochs are secure again (assuming
the attacker does not maintain persistent access).
See [Post-Compromise Security](../cryptography/post-compromise-security.md).
**PIR** -- Private Information Retrieval. A cryptographic technique that allows
a client to fetch a record from a database without the server learning which
record was requested. Explored as a future enhancement for metadata-hiding
KeyPackage and message fetch.
See [Future Research](../roadmap/future-research.md).
**QUIC** -- A UDP-based, multiplexed, encrypted transport protocol defined in
RFC 9000. QUIC integrates TLS 1.3 for authentication and confidentiality and
provides 0-RTT connection establishment, stream multiplexing, and built-in
congestion control. quicnprotochat uses the `quinn` crate.
See [QUIC + TLS 1.3](../protocol-layers/quic-tls.md).
**Ratchet Tree** -- The binary tree data structure used in MLS for efficient
group key derivation. Each leaf corresponds to a group member; internal nodes
hold derived key material. Updates propagate along the path from a leaf to the
root, giving O(log N) cost for key updates in a group of N members.
**TLS 1.3** -- Transport Layer Security version 1.3, defined in RFC 8446. The
standard for authenticated, encrypted transport. quicnprotochat uses TLS 1.3
exclusively (via `rustls` with `TLS13` cipher suites only) as part of the QUIC
transport. See [QUIC + TLS 1.3](../protocol-layers/quic-tls.md).
**Welcome** -- An MLS message sent to new members when they are added to a
group. The Welcome contains the group state (ratchet tree, group context,
epoch secrets) encrypted under the new member's HPKE init key from their
KeyPackage. See [MLS (RFC 9420)](../protocol-layers/mls.md).
**X25519** -- Elliptic curve Diffie-Hellman key exchange on Curve25519 (using
the Montgomery form). Used for the Noise\_XX handshake (transport
authentication) and as the classical component of DHKEM in MLS.
quicnprotochat uses the `x25519-dalek` crate.
See [X25519 Transport Keys](../cryptography/transport-keys.md).
**Zeroize** -- The practice of securely clearing sensitive data (private keys,
shared secrets) from memory when it is no longer needed. quicnprotochat uses the
`zeroize` crate with the `ZeroizeOnDrop` derive macro to ensure that key material
is overwritten on drop.
See [Key Lifecycle and Zeroization](../cryptography/key-lifecycle.md).

View File

@@ -0,0 +1,121 @@
# References and Further Reading
This page collects the standards, crate documentation, and research papers
referenced throughout the quicnprotochat documentation. Entries are organised by
category.
---
## Standards and RFCs
| Reference | Description |
|-----------|-------------|
| [RFC 9420 -- The Messaging Layer Security (MLS) Protocol](https://datatracker.ietf.org/doc/rfc9420/) | The group key agreement protocol used by quicnprotochat. Defines KeyPackages, Welcome messages, Commits, the ratchet tree, epoch advancement, and the security properties (forward secrecy, post-compromise security). See [MLS (RFC 9420)](../protocol-layers/mls.md). |
| [RFC 9000 -- QUIC: A UDP-Based Multiplexed and Secure Transport](https://datatracker.ietf.org/doc/rfc9000/) | The transport protocol underlying quicnprotochat's primary connection layer. Provides multiplexed streams, 0-RTT connection establishment, and built-in congestion control. See [QUIC + TLS 1.3](../protocol-layers/quic-tls.md). |
| [RFC 9001 -- Using TLS to Secure QUIC](https://datatracker.ietf.org/doc/rfc9001/) | Defines how TLS 1.3 is integrated into QUIC for authentication and key exchange. quicnprotochat uses this via the `quinn` + `rustls` stack. |
| [RFC 8446 -- The Transport Layer Security (TLS) Protocol Version 1.3](https://datatracker.ietf.org/doc/rfc8446/) | The TLS version used exclusively by quicnprotochat (no TLS 1.2 fallback). Provides the handshake, key schedule, and record layer for QUIC transport security. |
| [RFC 9180 -- Hybrid Public Key Encryption (HPKE)](https://datatracker.ietf.org/doc/rfc9180/) | The public-key encryption scheme used internally by MLS for encrypting to KeyPackage init keys. quicnprotochat's MLS ciphersuite uses DHKEM(X25519, HKDF-SHA256) with AES-128-GCM. |
| [NIST FIPS 203 -- Module-Lattice-Based Key-Encapsulation Mechanism Standard (ML-KEM)](https://csrc.nist.gov/pubs/fips/203/final) | The post-quantum KEM standard. quicnprotochat plans to use ML-KEM-768 in a hybrid construction with X25519 at milestone M7. See [Post-Quantum Readiness](../cryptography/post-quantum-readiness.md). |
| [Noise Protocol Framework](https://noiseprotocol.org/noise.html) | The framework defining the Noise\_XX handshake pattern used in quicnprotochat's M1 transport stack. Provides mutual authentication and channel encryption. See [Noise\_XX Handshake](../protocol-layers/noise-xx.md). |
| [Cap'n Proto specification](https://capnproto.org/) | The zero-copy serialisation format and RPC system used for all quicnprotochat wire messages and service interfaces. See [Cap'n Proto Serialisation and RPC](../protocol-layers/capn-proto.md). |
| [draft-ietf-tls-hybrid-design -- Hybrid Key Exchange in TLS 1.3](https://datatracker.ietf.org/doc/draft-ietf-tls-hybrid-design/) | The combiner approach used by quicnprotochat's hybrid KEM construction (X25519 shared secret concatenated with ML-KEM-768 shared secret, fed through HKDF). See [Hybrid KEM](../protocol-layers/hybrid-kem.md). |
| [RFC 9497 -- OPAQUE](https://datatracker.ietf.org/doc/rfc9497/) | Asymmetric password-authenticated key exchange. Considered for future authentication (see [Future Research](../roadmap/future-research.md)). |
---
## Rust Crate Documentation
| Crate | docs.rs | Role in quicnprotochat |
|-------|---------|----------------------|
| `openmls` | [docs.rs/openmls](https://docs.rs/openmls/) | MLS protocol implementation: group creation, member addition, Welcome processing, application message encryption/decryption. See [MLS (RFC 9420)](../protocol-layers/mls.md). |
| `openmls_rust_crypto` | [docs.rs/openmls_rust_crypto](https://docs.rs/openmls_rust_crypto/) | Pure-Rust cryptographic backend for openmls. Provides the `OpenMlsRustCrypto` provider used by `GroupMember`. |
| `quinn` | [docs.rs/quinn](https://docs.rs/quinn/) | QUIC transport implementation. Provides the `Endpoint`, `Connection`, and stream types for client and server. See [QUIC + TLS 1.3](../protocol-layers/quic-tls.md). |
| `rustls` | [docs.rs/rustls](https://docs.rs/rustls/) | TLS 1.3 implementation used by `quinn`. Configured with `TLS13` cipher suites only and custom certificate verification. |
| `snow` | [docs.rs/snow](https://docs.rs/snow/) | Noise Protocol Framework implementation. Provides the Noise\_XX handshake for the M1 transport stack. See [Noise\_XX Handshake](../protocol-layers/noise-xx.md). |
| `capnp` | [docs.rs/capnp](https://docs.rs/capnp/) | Cap'n Proto serialisation library. Used for building and reading all wire messages. |
| `capnp-rpc` | [docs.rs/capnp-rpc](https://docs.rs/capnp-rpc/) | Cap'n Proto RPC framework. Provides the async RPC system for `NodeService`. Runs inside the QUIC encrypted channel. |
| `capnpc` | [docs.rs/capnpc](https://docs.rs/capnpc/) | Cap'n Proto compiler invoked at build time (`build.rs`) to generate Rust types from `.capnp` schemas. |
| `ml-kem` | [docs.rs/ml-kem](https://docs.rs/ml-kem/) | ML-KEM (NIST FIPS 203) implementation. Vendored in the workspace for the planned hybrid post-quantum KEM (M7). |
| `ed25519-dalek` | [docs.rs/ed25519-dalek](https://docs.rs/ed25519-dalek/) | Ed25519 signing and verification. Used for MLS identity credentials (`BasicCredential`). See [Ed25519 Identity Keys](../cryptography/identity-keys.md). |
| `x25519-dalek` | [docs.rs/x25519-dalek](https://docs.rs/x25519-dalek/) | X25519 Diffie-Hellman key exchange. Used for Noise\_XX transport authentication. See [X25519 Transport Keys](../cryptography/transport-keys.md). |
| `zeroize` | [docs.rs/zeroize](https://docs.rs/zeroize/) | Secure memory zeroisation. All private key types implement `Zeroize + ZeroizeOnDrop`. See [Key Lifecycle and Zeroization](../cryptography/key-lifecycle.md). |
| `tokio` | [docs.rs/tokio](https://docs.rs/tokio/) | Async runtime. All server and client I/O runs on Tokio. |
| `clap` | [docs.rs/clap](https://docs.rs/clap/) | CLI argument parser for the client binary. |
| `dashmap` | [docs.rs/dashmap](https://docs.rs/dashmap/) | Concurrent hash map. Used for the in-memory AS key store and DS delivery queues (to be replaced by SQLite at M6). |
| `tracing` | [docs.rs/tracing](https://docs.rs/tracing/) | Structured logging framework. Used throughout the server for request logging and diagnostics. |
| `thiserror` | [docs.rs/thiserror](https://docs.rs/thiserror/) | Derive macro for typed error enums in library crates. |
| `anyhow` | [docs.rs/anyhow](https://docs.rs/anyhow/) | Flexible error handling for application crates (server, client). |
---
## Research Papers and Background
### MLS Motivation and Design
**"On Ends-to-Ends Encryption: Asynchronous Group Messaging with Strong Security Guarantees"**
Katriel Cohn-Gordon, Cas Cremers, Luke Garratt, Jon Millican, and Kevin Milner.
*ACM CCS 2018.*
This paper analyses the security properties of group messaging protocols and
motivates the design of MLS. It defines the security goals (forward secrecy,
post-compromise security, asynchronous operation) that MLS formalises into a
protocol. Essential background for understanding why quicnprotochat uses MLS
rather than extending the Signal protocol to groups.
### Signal Protocol
**"The Double Ratchet Algorithm"**
Trevor Perrin and Moxie Marlinspike.
[signal.org/docs/specifications/doubleratchet](https://signal.org/docs/specifications/doubleratchet/)
Defines the double ratchet used in Signal's 1:1 messaging. Relevant as a
potential optimisation for quicnprotochat's 1:1 channels (see
[Future Research: Double-Ratchet DM Layer](../roadmap/future-research.md#double-ratchet-dm-layer))
and as background for understanding how MLS generalises ratcheting to groups.
**"The X3DH Key Agreement Protocol"**
Moxie Marlinspike and Trevor Perrin.
[signal.org/docs/specifications/x3dh](https://signal.org/docs/specifications/x3dh/)
Defines the extended triple Diffie-Hellman key agreement used in Signal's initial
key exchange. MLS KeyPackages serve an analogous role to X3DH's prekeys,
enabling asynchronous group setup.
### Post-Quantum Cryptography
**"CRYSTALS-Kyber: A CCA-Secure Module-Lattice-Based KEM"**
Roberto Avanzi et al.
[NIST PQC Round 3 submission](https://pq-crystals.org/kyber/)
The predecessor to ML-KEM (NIST FIPS 203). CRYSTALS-Kyber was selected by NIST
and standardised as ML-KEM. quicnprotochat uses the `ml-kem` crate which
implements the final FIPS 203 standard.
### Noise Protocol
**"The Noise Protocol Framework"**
Trevor Perrin.
[noiseprotocol.org/noise.html](https://noiseprotocol.org/noise.html)
The specification for the Noise protocol framework, including the XX handshake
pattern used in quicnprotochat's M1 transport stack.
### Metadata Resistance
**"Sealed Sender"**
Signal Blog.
[signal.org/blog/sealed-sender](https://signal.org/blog/sealed-sender/)
Describes Signal's approach to hiding sender identity from the server. Relevant
to quicnprotochat's future research on metadata resistance (see
[Future Research](../roadmap/future-research.md)).
---
## Cross-references
- [Glossary](glossary.md) -- definitions of terms used in these references
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how the protocols layer in quicnprotochat
- [Cryptography Overview](../cryptography/overview.md) -- cryptographic properties and threat model
- [Future Research](../roadmap/future-research.md) -- technologies under consideration
- [Milestones](../roadmap/milestones.md) -- current project status

View File

@@ -0,0 +1,232 @@
# Crate Responsibilities
The quicnprotochat workspace is split into four crates with strict layering
rules. Each crate owns one concern and depends only on the crates below it.
This page documents what each crate provides, what it explicitly avoids, and
how the crates relate to one another.
---
## Dependency Flow Diagram
```text
┌──────────────────────────┐
│ quicnprotochat-client │
│ (CLI, QUIC client, │
│ GroupMember orchestr.) │
└─────────┬───────┬────────┘
│ │
┌───────┘ └────────┐
▼ ▼
┌────────────────────────┐ ┌────────────────────────┐
│ quicnprotochat-core │ │ quicnprotochat-server │
│ (crypto, Noise, │ │ (QUIC listener, │
│ MLS, hybrid KEM) │ │ NodeService RPC, │
│ │ │ storage) │
└──────────┬─────────────┘ └─────────┬──────────────┘
│ │
│ ┌───────────────────┘
▼ ▼
┌────────────────────────┐
│ quicnprotochat-proto │
│ (Cap'n Proto schemas, │
│ codegen, helpers) │
└────────────────────────┘
```
**Arrows point from dependant to dependency.** The proto crate sits at the base
of the dependency graph. The core crate depends on proto for envelope
serialisation. The server and client crates both depend on core and proto.
---
## quicnprotochat-core
**Role:** Pure cryptographic logic and transport primitives. No network I/O
(except for the Noise handshake helpers that take an existing `TcpStream`). No
async runtime dependency beyond what Noise transport needs.
### Modules
| Module | Public API | Description |
|---------------|-----------------------------------------------------------------------------|-------------|
| `keypair` | `NoiseKeypair` | Static X25519 keypair for Noise_XX. `StaticSecret` is `ZeroizeOnDrop`. `private_bytes()` returns `Zeroizing<[u8; 32]>`. |
| `identity` | `IdentityKeypair` | Ed25519 signing keypair for MLS credentials. Seed stored as `Zeroizing<[u8; 32]>`. Implements `openmls_traits::Signer`. |
| `noise` | `handshake_initiator`, `handshake_responder`, `NoiseTransport` | Noise_XX_25519_ChaChaPoly_BLAKE2s handshake over TCP. `NoiseTransport` provides `send_frame`/`recv_frame`, envelope helpers, and `into_capnp_io()` bridge. |
| `codec` | `LengthPrefixedCodec`, `NOISE_MAX_MSG` | Tokio `Encoder<Bytes>` + `Decoder`. 4-byte LE length prefix. Max frame 65,535 bytes. |
| `group` | `GroupMember` | MLS group state machine wrapping `openmls::MlsGroup`. Lifecycle: `new` -> `generate_key_package` -> `create_group` / `join_group` -> `send_message` / `receive_message`. |
| `keypackage` | `generate_key_package` | Standalone KeyPackage generation (returns TLS-encoded bytes + SHA-256 fingerprint). |
| `keystore` | `DiskKeyStore`, `StoreCrypto` | `OpenMlsKeyStore` implementation backed by an in-memory `HashMap` with optional bincode flush to disk. `StoreCrypto` couples `RustCrypto` + `DiskKeyStore` into an `OpenMlsCryptoProvider`. |
| `hybrid_kem` | `HybridKeypair`, `HybridPublicKey`, `hybrid_encrypt`, `hybrid_decrypt` | X25519 + ML-KEM-768 hybrid KEM. HKDF-SHA256 key derivation, ChaCha20-Poly1305 AEAD. Versioned envelope wire format. |
| `error` | `CoreError`, `CodecError`, `MAX_PLAINTEXT_LEN` | Unified error types. `CoreError` covers Noise, Codec, Cap'n Proto, MLS, and hybrid KEM failures. |
### What this crate does NOT do
- No network I/O beyond the Noise helpers (which take a pre-connected `TcpStream`).
- No QUIC or TLS -- that is the server and client crates' concern.
- No async runtime setup (it uses Tokio types internally but does not spawn or
manage a runtime).
- No CLI parsing.
### Key dependencies
`snow`, `x25519-dalek`, `ed25519-dalek`, `openmls`, `openmls_rust_crypto`,
`openmls_traits`, `tls_codec`, `ml-kem`, `chacha20poly1305`, `hkdf`, `sha2`,
`zeroize`, `capnp`, `quicnprotochat-proto`, `tokio`, `tokio-util`, `futures`,
`bytes`, `serde`, `bincode`, `serde_json`, `thiserror`.
---
## quicnprotochat-proto
**Role:** Cap'n Proto schema definitions, compile-time code generation, and
pure-synchronous serialisation helpers. This crate is the single source of truth
for the wire format.
### Contents
| Item | Description |
|---------------------------|-------------|
| `schemas/envelope.capnp` | `Envelope` struct and `MsgType` enum -- top-level wire message for Noise-channel traffic. |
| `schemas/auth.capnp` | `AuthenticationService` interface -- `uploadKeyPackage`, `fetchKeyPackage`. |
| `schemas/delivery.capnp` | `DeliveryService` interface -- `enqueue`, `fetch`. |
| `schemas/node.capnp` | `NodeService` interface (unified AS+DS) -- all RPC methods plus `Auth` struct. |
| `build.rs` | Invokes `capnpc` to generate Rust types from the four `.capnp` files. |
| `lib.rs` | `pub mod envelope_capnp`, `auth_capnp`, `delivery_capnp`, `node_capnp` -- re-exports generated modules. |
| `MsgType` | Re-exported enum from `envelope_capnp::envelope::MsgType`. |
| `ParsedEnvelope` | Owned, `Send + 'static` representation of a decoded `Envelope`. All byte fields are eagerly copied out of the Cap'n Proto reader. |
| `build_envelope` | Serialise a `ParsedEnvelope` to unpacked Cap'n Proto wire bytes. |
| `parse_envelope` | Deserialise wire bytes into a `ParsedEnvelope`. |
| `to_bytes` / `from_bytes` | Low-level Cap'n Proto message <-> byte conversions. |
### What this crate does NOT do
- **No crypto** -- key material never enters this crate.
- **No I/O** -- callers own the transport; this crate only converts bytes to
types and back.
- **No async** -- pure synchronous data-layer code.
### Key dependencies
`capnp` (runtime), `capnpc` (build-time only).
---
## quicnprotochat-server
**Role:** Network-facing server binary. Accepts QUIC + TLS 1.3 connections,
dispatches Cap'n Proto RPC calls to `NodeServiceImpl`, and persists state to
disk via `FileBackedStore`.
### Components
| Component | Description |
|----------------------|-------------|
| `NodeServiceImpl` | Implements `node_service::Server` (Cap'n Proto generated trait). Handles all eight RPC methods: `uploadKeyPackage`, `fetchKeyPackage`, `enqueue`, `fetch`, `fetchWait`, `health`, `uploadHybridKey`, `fetchHybridKey`. |
| `FileBackedStore` | Mutex-guarded `HashMap`s for KeyPackages (keyed by Ed25519 public key), delivery queues (keyed by `ChannelKey = (channelId, recipientKey)`), and hybrid public keys. Each mutation flushes the full map to a bincode file on disk. |
| `DashMap` waiters | `DashMap<Vec<u8>, Arc<Notify>>` -- per-recipient `tokio::sync::Notify` instances for `fetchWait` long-polling. `enqueue` calls `notify_waiters()` after appending. |
| TLS config | Self-signed certificate auto-generated on first run (`rcgen`). TLS 1.3 only, ALPN `capnp`. |
| CLI (`clap`) | `--listen` (default `0.0.0.0:7000`), `--data-dir`, `--tls-cert`, `--tls-key`. |
### Connection lifecycle
```text
QUIC accept
└─ TLS 1.3 handshake (self-signed cert, ALPN "capnp")
└─ accept_bi() -> bidirectional QUIC stream
└─ tokio_util::compat adapters (AsyncRead/AsyncWrite)
└─ capnp-rpc twoparty::VatNetwork (Side::Server)
└─ RpcSystem drives NodeServiceImpl
```
Because `capnp-rpc` uses `Rc<RefCell<>>` internally and is therefore `!Send`,
the entire RPC stack runs on a `tokio::task::LocalSet`. Each incoming connection
is handled by `spawn_local`.
### What this crate does NOT do
- No direct crypto operations (it delegates to `quicnprotochat-core` types
for fingerprinting and storage only).
- No MLS processing -- all payloads are opaque byte strings.
- No Noise transport (QUIC/TLS only).
### Key dependencies
`quicnprotochat-core`, `quicnprotochat-proto`, `quinn`, `quinn-proto`,
`rustls`, `rcgen`, `capnp`, `capnp-rpc`, `tokio`, `tokio-util`, `dashmap`,
`sha2`, `clap`, `tracing`, `anyhow`, `thiserror`, `bincode`, `serde`.
---
## quicnprotochat-client
**Role:** CLI client binary. Connects to the server over QUIC + TLS 1.3,
orchestrates MLS group operations via `GroupMember`, and persists identity and
group state to disk.
### Components
| Component | Description |
|-------------------------|-------------|
| `connect_node` | Establishes a QUIC/TLS connection, opens a bidirectional stream, and bootstraps a `capnp-rpc` `RpcSystem` to obtain a `node_service::Client`. |
| CLI subcommands (`clap`)| `ping`, `register`, `fetch-key`, `demo-group`, `register-state`, `create-group`, `invite`, `join`, `send`, `recv`. |
| `GroupMember` usage | The client creates a `GroupMember` (from `quicnprotochat-core`), calls `generate_key_package` / `create_group` / `add_member` / `join_group` / `send_message` / `receive_message`. |
| State persistence | `StoredState` holds `identity_seed` (32 bytes) and optional serialised `MlsGroup`. A companion `.ks` file stores the `DiskKeyStore` with HPKE init private keys. |
| Auth context | `ClientAuth` bundles an optional bearer token and device ID. Passed to every RPC via the `Auth` struct in `node.capnp`. |
### CLI subcommand summary
| Subcommand | What it does |
|-------------------|--------------|
| `ping` | Call `health()` and print RTT. |
| `register` | Generate a fresh identity + KeyPackage, upload to AS, print identity key. |
| `register-state` | Same as `register` but uses/creates persistent state file. |
| `fetch-key` | Fetch a peer's KeyPackage by hex identity key. |
| `create-group` | Create a new MLS group and save state. |
| `invite` | Fetch peer's KeyPackage, add to group, enqueue Welcome via DS. |
| `join` | Fetch Welcome from DS, join the MLS group. |
| `send` | Encrypt a message with MLS, enqueue via DS. |
| `recv` | Fetch pending payloads from DS, decrypt with MLS. Supports `--stream` for continuous long-polling. |
| `demo-group` | End-to-end Alice+Bob round-trip (ephemeral identities). |
### What this crate does NOT do
- No server-side logic.
- No Noise transport (QUIC/TLS only for server communication).
- No direct crypto beyond calling `GroupMember` and verifying SHA-256
fingerprints.
### Key dependencies
`quicnprotochat-core`, `quicnprotochat-proto`, `quinn`, `quinn-proto`,
`rustls`, `capnp`, `capnp-rpc`, `tokio`, `tokio-util`, `clap`, `sha2`,
`serde`, `bincode`, `anyhow`, `thiserror`, `tracing`.
---
## Layering Rules
1. **proto** depends on nothing in-workspace. It is pure data definition.
2. **core** depends on **proto** (for `ParsedEnvelope` and envelope helpers).
It does not depend on server or client.
3. **server** depends on **core** and **proto**. It does not depend on client.
4. **client** depends on **core** and **proto**. It does not depend on server.
5. **server** and **client** never depend on each other. They communicate
exclusively via the Cap'n Proto RPC wire protocol.
This layering ensures that:
- Crypto code can be tested in isolation (`cargo test -p quicnprotochat-core`).
- Schema changes propagate automatically through codegen.
- The server binary contains no client-side MLS orchestration logic.
- The client binary contains no server-side storage or listener logic.
---
## Further Reading
- [Architecture Overview](overview.md) -- high-level system diagram
- [Service Architecture](service-architecture.md) -- NodeService RPC details
- [Wire Format Overview](../wire-format/overview.md) -- Cap'n Proto schema reference
- [GroupMember Lifecycle](../internals/group-member-lifecycle.md) -- MLS state machine details
- [Storage Backend](../internals/storage-backend.md) -- FileBackedStore internals

View File

@@ -0,0 +1,350 @@
# End-to-End Data Flow
This page traces the three core data flows through the quicnprotochat system:
registration, group creation, and message exchange. Each flow is illustrated
with an ASCII sequence diagram showing control-plane (AS) and data-plane (DS)
traffic.
Throughout these flows the server is **MLS-unaware** -- it stores and forwards
opaque byte blobs without parsing their MLS content.
---
## 1. Registration Flow
Before a client can join any MLS group, it must generate an Ed25519 identity
keypair and upload at least one KeyPackage to the Authentication Service. Peers
fetch these KeyPackages to add the client to groups.
### Sequence Diagram
```text
Client (Alice) NodeService (AS)
────────────── ────────────────
│ │
│ 1. Generate Ed25519 identity keypair │
│ (IdentityKeypair::generate) │
│ │
│ 2. Generate MLS KeyPackage │
│ (GroupMember::generate_key_package) │
│ - Creates HPKE init keypair │
│ - Embeds Ed25519 pk in credential │
│ - Signs leaf node with Ed25519 sk │
│ - TLS-encodes the KeyPackage │
│ │
│ 3. QUIC connect + TLS 1.3 handshake │
│ ────────────────────────────────────────>│
│ │
│ 4. uploadKeyPackage(identityKey, pkg) │
│ ────────────────────────────────────────>│
│ │ 5. Validate:
│ │ - identityKey == 32 bytes
│ │ - package non-empty, <= 1 MB
│ │ - auth version allowed
│ │
│ │ 6. Compute SHA-256(package)
│ │
│ │ 7. Append to per-identity queue:
│ │ keyPackages[identityKey].push(pkg)
│ │
│ │ 8. Flush keypackages.bin to disk
│ │
│ fingerprint (SHA-256) │
│ <────────────────────────────────────────│
│ │
│ 9. Compare local fingerprint with │
│ server-returned fingerprint │
│ (tamper detection) │
│ │
```
### Key Points
- **KeyPackages are single-use** (RFC 9420 requirement). Each `fetchKeyPackage`
call atomically removes and returns one package. The client should upload
multiple KeyPackages if it expects to be added to several groups.
- The `identityKey` used as the AS index is the **raw 32-byte Ed25519 public
key**, not a fingerprint or hash. Peers must know Alice's public key out-of-
band (QR code, directory, etc.) to fetch her KeyPackage.
- The HPKE init private key generated during `generate_key_package` is stored
in the client's `DiskKeyStore`. The **same `GroupMember` instance** (or a
restored instance with the same key store) must later call `join_group` to
decrypt the Welcome message.
- The optional hybrid public key (`uploadHybridKey`) can also be uploaded
during registration for post-quantum envelope encryption.
---
## 2. Group Creation Flow
Alice creates a new MLS group, fetches Bob's KeyPackage from the AS, adds Bob
to the group (producing a Commit and a Welcome), and delivers the Welcome to
Bob via the DS.
### Sequence Diagram
```text
Alice NodeService (AS+DS) Bob
───── ────────────────── ───
│ │ │
│ 1. create_group("my-group") │ │
│ (local MLS operation -- │ │
│ Alice is sole member, │ │
│ epoch 0) │ │
│ │ │
│ 2. fetchKeyPackage(bob_pk) │ │
│ ───────────────────────────────>│ │
│ │ 3. Pop bob's KeyPackage │
│ │ from queue (atomic) │
│ bob_kp bytes │ │
│ <───────────────────────────────│ │
│ │ │
│ 4. add_member(bob_kp) │ │
│ Local MLS operations: │ │
│ a. Deserialise & validate │ │
│ Bob's KeyPackage │ │
│ b. Produce Commit message │ │
│ (adds Bob to ratchet │ │
│ tree, advances epoch) │ │
│ c. Produce Welcome message │ │
│ (encrypted to Bob's │ │
│ HPKE init key, contains │ │
│ group secrets + tree) │ │
│ d. merge_pending_commit() │ │
│ (Alice advances to │ │
│ epoch 1 locally) │ │
│ │ │
│ 5. enqueue(bob_pk, welcome) │ │
│ ───────────────────────────────>│ │
│ │ 6. Append welcome to │
│ │ deliveries[(ch, bob_pk)] │
│ │ │
│ │ 7. Notify bob_pk waiters │
│ │ │
│ │ │
│ │ 8. Bob connects and fetches │
│ │ <─────────────────────────────│
│ │ fetch(bob_pk) │
│ │ │
│ │ 9. Drain bob's queue │
│ │ (returns [welcome]) │
│ │ │
│ │ [welcome_bytes] │
│ │ ─────────────────────────────>│
│ │ │
│ │ │ 10. join_group(welcome)
│ │ │ - Decrypt Welcome with
│ │ │ HPKE init private key
│ │ │ - Extract ratchet tree
│ │ │ from GroupInfo ext
│ │ │ - Initialise MlsGroup
│ │ │ at epoch 1
│ │ │
│ │ │ Bob is now a group member
│ │ │
```
### Key Points
- The **Commit** message is relevant for groups with more than two members. In
the two-party case, Alice is the sole existing member and merges the commit
herself. In a multi-member group, the Commit would be sent to all existing
members via the DS so they can advance their epoch.
- The **Welcome** message is encrypted to Bob's HPKE init key (derived from
the KeyPackage). Only the `GroupMember` instance that generated that
KeyPackage holds the corresponding private key.
- The `use_ratchet_tree_extension = true` MLS config embeds the full ratchet
tree in the Welcome's `GroupInfo` extension. This means Bob does not need a
separate tree fetch -- `new_from_welcome` extracts it automatically.
- The DS routes solely by `recipientKey` (Bob's Ed25519 public key). It does
not parse the Welcome, the Commit, or any MLS structure.
---
## 3. Message Exchange Flow
After both Alice and Bob are group members, they exchange MLS Application
messages through the DS.
### Sequence Diagram
```text
Alice NodeService (DS) Bob
───── ────────────────── ───
│ │ │
│ ─── Alice sends a message to Bob ─── │
│ │ │
│ 1. send_message("hello bob") │ │
│ MLS create_message(): │ │
│ - Derive message key from │ │
│ epoch secret + gen counter│ │
│ - Encrypt plaintext with │ │
│ AES-128-GCM │ │
│ - Produce MlsMessageOut │ │
│ (PrivateMessage variant) │ │
│ - TLS-encode to bytes │ │
│ │ │
│ 2. enqueue(bob_pk, ciphertext) │ │
│ ───────────────────────────────>│ │
│ │ 3. Store in bob's queue │
│ │ 4. Notify bob_pk waiters │
│ │ │
│ │ (time passes) │
│ │ │
│ │ 5. Bob polls for messages │
│ │ <─────────────────────────────│
│ │ fetchWait(bob_pk, 30000) │
│ │ │
│ │ 6. Drain bob's queue │
│ │ [ciphertext] │
│ │ ─────────────────────────────>│
│ │ │
│ │ │ 7. receive_message(ct)
│ │ │ MLS process_message():
│ │ │ - Identify sender from
│ │ │ PrivateMessage header
│ │ │ - Derive decryption key
│ │ │ from epoch secret
│ │ │ - Decrypt AES-128-GCM
│ │ │ - Return plaintext:
│ │ │ "hello bob"
│ │ │
│ ─── Bob replies to Alice ─── │
│ │ │
│ │ │ 8. send_message("hello alice")
│ │ │ (same MLS encrypt flow)
│ │ │
│ │ 9. enqueue(alice_pk, ct) │
│ │ <─────────────────────────────│
│ │ 10. Store + notify │
│ │ │
│ 11. fetch(alice_pk) │ │
│ ───────────────────────────────>│ │
│ [ciphertext] │ │
│ <───────────────────────────────│ │
│ │ │
│ 12. receive_message(ct) │ │
│ -> "hello alice" │ │
│ │ │
```
### Key Points
- **MLS provides forward secrecy**: each message is encrypted with a key
derived from the current epoch secret and a per-sender generation counter.
Compromising a future key does not reveal past messages.
- **The DS is a dumb relay**: it does not decrypt, inspect, or reorder
messages. It stores opaque byte blobs in a FIFO queue keyed by recipient.
- **Long-polling** via `fetchWait` avoids the need for persistent connections
or WebSocket-style push. The client specifies a timeout in milliseconds; the
server blocks up to that duration using `tokio::sync::Notify`. The `recv
--stream` CLI flag loops `fetchWait` indefinitely for continuous message
reception.
- **Channel-aware routing** is supported: the `channelId` field in `enqueue`
and `fetch` allows scoping queues by channel (e.g., a 16-byte UUID for
1:1 conversations). When `channelId` is empty, messages go to the default
(legacy) queue.
---
## Control-Plane vs. Data-Plane Summary
```text
┌─────────────────────────────────────────────────────────────────────┐
│ Control Plane (AS) │
│ │
│ uploadKeyPackage ────> Store KeyPackage for identity │
│ fetchKeyPackage <──── Pop and return one KeyPackage │
│ uploadHybridKey ────> Store hybrid PQ public key │
│ fetchHybridKey <──── Return hybrid PQ public key │
│ │
│ Traffic: Infrequent. Once per group join (upload before, │
│ fetch during group add). │
└─────────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────────┐
│ Data Plane (DS) │
│ │
│ enqueue ────> Append payload to recipient queue │
│ fetch <──── Drain and return all queued payloads │
│ fetchWait <──── Long-poll drain with timeout │
│ │
│ Traffic: High-frequency. Every MLS message (Welcome, Commit, │
│ Application) flows through the DS. │
└─────────────────────────────────────────────────────────────────────┘
```
The separation means the AS can be rate-limited or placed behind stricter
access controls without affecting message throughput on the DS.
---
## State Transitions
The following diagram summarises the client-side state machine across all three
flows:
```text
┌──────────────┐
│ No State │
└──────┬───────┘
IdentityKeypair::generate()
┌──────────────┐
│ Identity │ Ed25519 keypair exists
│ Generated │ No KeyPackage, no group
└──────┬───────┘
generate_key_package() + uploadKeyPackage()
┌──────────────┐
│ Registered │ KeyPackage on AS
│ │ HPKE init key in DiskKeyStore
└──────┬───────┘
┌──────────────┴──────────────┐
│ │
create_group() join_group(welcome)
│ │
▼ ▼
┌─────────────┐ ┌──────────────┐
│ Group Owner │ │ Group Member │
│ (epoch 0) │ │ (epoch N) │
└──────┬──────┘ └──────┬───────┘
│ │
add_member() │
│ │
▼ ▼
┌──────────────────────────────────────────┐
│ Active Group Member │
│ │
│ send_message() -> enqueue via DS │
│ receive_message() <- fetch from DS │
│ │
│ Epoch advances on each Commit │
└──────────────────────────────────────────┘
```
---
## Further Reading
- [Architecture Overview](overview.md) -- system diagram and two-service model
- [Service Architecture](service-architecture.md) -- RPC method details and long-polling internals
- [GroupMember Lifecycle](../internals/group-member-lifecycle.md) -- detailed MLS state machine
- [KeyPackage Exchange Flow](../internals/keypackage-exchange.md) -- single-use semantics and AS internals
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- key schedule, ratchet tree, and ciphersuite details
- [Forward Secrecy](../cryptography/forward-secrecy.md) -- how MLS provides forward secrecy
- [Post-Compromise Security](../cryptography/post-compromise-security.md) -- group healing after key compromise

View File

@@ -0,0 +1,171 @@
# Architecture Overview
quicnprotochat is an end-to-end encrypted group messaging system built in Rust.
This page describes the high-level architecture: the services that compose the
system, the dual-key cryptographic model, and how the pieces fit together.
---
## Two-Service Model
The server exposes two logical services through a single **NodeService** RPC
interface, bound to **port 7000** over QUIC + TLS 1.3:
| Logical Service | Responsibility |
|--------------------------|-----------------------------------------------------------------|
| **Authentication Service (AS)** | Stores and distributes single-use MLS KeyPackages. Clients upload KeyPackages after identity generation; peers fetch them to add new members to a group. |
| **Delivery Service (DS)** | Store-and-forward relay for opaque payloads. The DS never inspects MLS ciphertext -- it routes solely by recipient Ed25519 public key (and optional channel ID). |
Combining both services into a single endpoint simplifies deployment and
reduces round-trips. The schema is defined in
[`schemas/node.capnp`](../wire-format/node-service-schema.md) as a unified
`NodeService` interface.
See [Service Architecture](service-architecture.md) for per-method details,
connection lifecycle, and the long-polling `fetchWait` mechanism.
---
## Dual-Key Model
quicnprotochat uses two independent asymmetric key pairs per client, each
serving a distinct role:
```text
quicnprotochat Key Model
┌──────────────────────────────────────────────────┐
│ │
│ X25519 static keypair (Noise transport) │
│ ───────────────────────────────────── │
│ - Generated once per node identity │
│ - Used in the Noise_XX handshake (M1 stack) │
│ - Provides mutual authentication + │
│ channel confidentiality at the TCP layer │
│ - Classical only (no PQ protection) │
│ - Managed by NoiseKeypair, zeroize-on-drop │
│ │
│ Ed25519 signing keypair (MLS identity) │
│ ────────────────────────────────────── │
│ - Generated once per user/device │
│ - Embedded in MLS BasicCredential │
│ - Signs KeyPackages, Commits, and group ops │
│ - Raw 32-byte public key is the AS index │
│ - Managed by IdentityKeypair, zeroize-on-drop │
│ │
└──────────────────────────────────────────────────┘
```
| Property | X25519 (Noise) | Ed25519 (MLS) |
|-------------------|-------------------------------------|--------------------------------------------|
| Curve | Curve25519 (Montgomery) | Ed25519 (Twisted Edwards) |
| Purpose | Transport authentication + secrecy | Identity binding, signing, MLS credentials |
| Crate | `x25519-dalek` | `ed25519-dalek` |
| Zeroize on drop | Yes (`StaticSecret`) | Yes (`Zeroizing<[u8; 32]>`) |
| PQ protection | None (classical X25519) | MLS key schedule uses DHKEM(X25519); hybrid PQ KEM available at envelope level |
For details on the cryptographic properties of each key type, see
[Ed25519 Identity Keys](../cryptography/identity-keys.md) and
[X25519 Transport Keys](../cryptography/transport-keys.md).
---
## System Diagram
```text
┌─────────────────┐ ┌─────────────────┐
│ Alice Client │ │ Bob Client │
│ │ │ │
│ IdentityKeypair │ │ IdentityKeypair │
│ (Ed25519) │ │ (Ed25519) │
│ │ │ │
│ GroupMember │ │ GroupMember │
│ (MLS state) │ │ (MLS state) │
│ │ │ │
│ NoiseKeypair* │ │ NoiseKeypair* │
│ (X25519, M1) │ │ (X25519, M1) │
└────────┬─────────┘ └────────┬─────────┘
│ │
│ QUIC + TLS 1.3 (quinn/rustls) │
│ ─── or ─── │
│ Noise_XX over TCP (snow, M1 stack) │
│ │
▼ ▼
┌────────────────────────────────────────────────────────────────────────────┐
│ NodeService (port 7000) │
│ │
│ ┌──────────────────────────┐ ┌───────────────────────────────────┐ │
│ │ Authentication Service │ │ Delivery Service │ │
│ │ │ │ │ │
│ │ uploadKeyPackage() │ │ enqueue(recipientKey, payload) │ │
│ │ fetchKeyPackage() │ │ fetch(recipientKey) │ │
│ │ uploadHybridKey() │ │ fetchWait(recipientKey, timeout) │ │
│ │ fetchHybridKey() │ │ │ │
│ │ │ │ Queues: DashMap + FileBackedStore│ │
│ │ Store: DashMap + │ │ │ │
│ │ FileBackedStore │ │ │ │
│ └──────────────────────────┘ └───────────────────────────────────┘ │
│ │
│ health() │
│ │
└────────────────────────────────────────────────────────────────────────────┘
```
**Key observations:**
1. The server never sees plaintext message content. MLS ciphertext is opaque to
the DS -- it merely routes by `recipientKey`.
2. KeyPackages are single-use (RFC 9420 requirement). The AS atomically removes
a KeyPackage on fetch to enforce this invariant.
3. The QUIC + TLS 1.3 stack is the primary transport (M3+). The Noise_XX over
TCP stack from M1 remains available for environments where QUIC is blocked.
---
## Protocol Layering
The system stacks three protocol layers:
1. **Transport** -- QUIC + TLS 1.3 (primary) or Noise_XX over TCP (M1
fallback). Provides confidentiality, integrity, and server authentication.
See [Protocol Stack](protocol-stack.md).
2. **Framing / RPC** -- Cap'n Proto serialisation and RPC. Provides zero-copy
typed messages, schema versioning, and async method dispatch.
See [Cap'n Proto Serialisation and RPC](../protocol-layers/capn-proto.md).
3. **End-to-End Encryption** -- MLS (RFC 9420). Provides group key agreement,
forward secrecy, and post-compromise security. The server never holds group
keys.
See [MLS (RFC 9420)](../protocol-layers/mls.md).
An optional fourth layer -- the **hybrid KEM envelope** (X25519 + ML-KEM-768)
-- wraps MLS payloads for post-quantum confidentiality at the per-message level.
See [Hybrid KEM](../protocol-layers/hybrid-kem.md).
---
## Crate Map
The implementation is split across four workspace crates:
| Crate | Role |
|----------------------------|-------------------------------------------------------------------|
| `quicnprotochat-core` | Crypto primitives, Noise transport, MLS state machine, hybrid KEM |
| `quicnprotochat-proto` | Cap'n Proto schemas, codegen, and serialisation helpers |
| `quicnprotochat-server` | QUIC listener, NodeService RPC, storage |
| `quicnprotochat-client` | QUIC client, CLI subcommands, state persistence |
See [Crate Responsibilities](crate-responsibilities.md) for a full breakdown
and dependency diagram.
---
## Further Reading
- [Protocol Stack](protocol-stack.md) -- layered comparison of the two transport stacks
- [Service Architecture](service-architecture.md) -- NodeService RPC methods, connection lifecycle, long-polling
- [End-to-End Data Flow](data-flow.md) -- registration, group creation, and message exchange sequence diagrams
- [Wire Format Overview](../wire-format/overview.md) -- Cap'n Proto schema reference
- [Cryptography Overview](../cryptography/overview.md) -- detailed cryptographic properties and threat model

View File

@@ -0,0 +1,208 @@
# Protocol Stack
quicnprotochat layers three protocol stages to move a plaintext message from
sender to recipient with end-to-end encryption, typed RPC framing, and
authenticated transport. This page describes each layer, explains why both the
QUIC and Noise transport stacks exist, and provides a side-by-side comparison.
---
## Primary Stack (M3+): QUIC + TLS 1.3
Starting from milestone M3, the primary transport is QUIC over UDP with TLS 1.3
negotiated by `quinn` and `rustls`. Cap'n Proto RPC rides on a bidirectional
QUIC stream.
```text
┌─────────────────────────────────────────────┐
│ Application / MLS ciphertext │ <- group key ratchet (RFC 9420)
├─────────────────────────────────────────────┤
│ Cap'n Proto RPC │ <- typed, schema-versioned framing
├─────────────────────────────────────────────┤
│ QUIC + TLS 1.3 (quinn / rustls) │ <- mutual auth + transport secrecy
└─────────────────────────────────────────────┘
```
### What each layer provides
**QUIC + TLS 1.3** (`quinn`, `rustls`)
- Encrypted, authenticated transport with 0-RTT connection establishment
(where resumed).
- TLS 1.3 provides perfect forward secrecy per connection via ephemeral ECDHE.
- The server presents a self-signed certificate by default; the client pins
the server certificate via `--ca-cert`.
- ALPN protocol identifier: `capnp`.
- Multiplexed streams over a single UDP socket -- one bidirectional stream
per RPC session.
**Cap'n Proto RPC** (`capnp`, `capnp-rpc`)
- Zero-copy, schema-versioned serialisation.
- Asynchronous RPC with promise pipelining (multiple in-flight calls).
- The `NodeService` interface (defined in `schemas/node.capnp`) multiplexes
Authentication and Delivery operations on a single connection.
- The two-party VatNetwork runs over `tokio::io::compat` adapters wrapping
QUIC send/recv streams.
**MLS (RFC 9420)** (`openmls`, `openmls_rust_crypto`)
- Group key agreement with ratchet-tree-based key schedule.
- Forward secrecy: past messages remain confidential if a member's key is
compromised.
- Post-compromise security (PCS): the group heals after a compromise once an
honest update occurs.
- Identity binding: each member's Ed25519 public key is embedded in the MLS
`BasicCredential`.
- Ciphersuite: `MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`.
---
## M1 Stack: Noise_XX over TCP
The original milestone-1 transport uses a Noise Protocol Framework handshake
directly over TCP. This stack is retained for environments where QUIC (UDP) is
blocked by middleboxes.
```text
TCP connection
└── Noise_XX handshake (snow)
└── Authenticated encrypted channel (ChaCha20-Poly1305)
└── [u32 frame_len LE][Cap'n Proto encoded message]
└── Cap'n Proto RPC (capnp-rpc)
```
### Layer details
**TCP**
- Reliable, ordered byte stream.
- No built-in encryption or authentication.
**Noise_XX** (`snow`)
- Pattern: `Noise_XX_25519_ChaChaPoly_BLAKE2s`.
- Three-message handshake that mutually authenticates both peers' static
X25519 keys:
```text
XX handshake (3 messages):
-> e (initiator sends ephemeral public key)
<- e, ee, s, es (responder: DH + static key)
-> s, se (initiator: static key + final DH)
```
- After the handshake, every frame is encrypted with ChaCha20-Poly1305 (AEAD)
using session keys derived from the Noise key schedule.
- Maximum Noise message size: 65,535 bytes.
**Length-Prefixed Codec** (`LengthPrefixedCodec` in `quicnprotochat-core`)
- Each frame is prefixed by a 4-byte little-endian `u32` length field.
- Little-endian was chosen for consistency with Cap'n Proto's segment table
encoding.
- Wire format:
```text
┌──────────────────────────┬──────────────────────────────────────┐
│ length (4 bytes, LE u32)│ payload (length bytes) │
└──────────────────────────┴──────────────────────────────────────┘
```
- Maximum payload size is `NOISE_MAX_MSG` (65,535 bytes), enforced on both
encode and decode.
- See [Length-Prefixed Framing Codec](../wire-format/framing-codec.md) for the
full specification.
**Cap'n Proto RPC**
- Same schema and RPC interface as the QUIC stack.
- The `NoiseTransport::into_capnp_io()` method bridges the message-oriented
Noise channel to the byte-stream interface that `capnp-rpc`'s
`twoparty::VatNetwork` expects, using a `tokio::io::duplex` pipe and a
background shuttle task.
---
## Why Both Stacks Exist
| Concern | QUIC + TLS 1.3 | Noise_XX over TCP |
|------------------------|----------------------------------------|----------------------------------------|
| **Milestone** | M3+ (primary) | M1 (original, retained) |
| **UDP availability** | Requires UDP; may be blocked on some networks | TCP-only; works everywhere |
| **Connection setup** | 1-RTT (or 0-RTT on resumption) | 1-RTT TCP + 1.5-RTT Noise handshake |
| **Multiplexing** | Native QUIC stream multiplexing | Single TCP connection, single stream |
| **Authentication** | Server cert (self-signed / CA-issued) | Mutual static-key authentication |
| **PQ gap** | TLS 1.3 key exchange is classical ECDHE | Noise key exchange is classical X25519 |
| **Crate** | `quinn`, `rustls` | `snow` |
Both stacks carry the same Cap'n Proto RPC and MLS layers on top, so
application logic is transport-agnostic. The Noise_XX stack may also serve as a
peer-to-peer transport in future mesh topologies where a QUIC server
certificate model does not apply.
---
## Comparison Table
| Layer | Provides | Crate(s) |
|-------------|------------------------------------------------------------------|-----------------------------------------|
| **Transport: QUIC + TLS 1.3** | Confidentiality, server authentication, forward secrecy, multiplexed streams, congestion control | `quinn`, `rustls` |
| **Transport: Noise_XX** | Confidentiality, mutual authentication, forward secrecy (per-session) | `snow` |
| **Framing: Cap'n Proto** | Zero-copy typed serialisation, schema versioning, async RPC with promise pipelining | `capnp`, `capnp-rpc` |
| **Encryption: MLS** | Group key agreement, forward secrecy, post-compromise security, identity binding | `openmls`, `openmls_rust_crypto` |
| **Encryption: Hybrid KEM** (optional) | Post-quantum confidentiality for individual payloads (X25519 + ML-KEM-768) | `ml-kem`, `x25519-dalek`, `chacha20poly1305`, `hkdf` |
---
## Data Path Summary
A plaintext message traverses the stack as follows:
```text
Sender Recipient
────── ─────────
plaintext bytes
MLS create_message()
│ ── encrypts with group AEAD key (AES-128-GCM) ──
TLS-encoded MlsMessageOut (opaque ciphertext blob)
Cap'n Proto: enqueue(recipientKey, payload)
│ ── serialised into NodeService RPC call ──
QUIC stream (TLS 1.3 encrypted) ─── or ─── Noise frame (ChaCha20-Poly1305)
│ │
▼ ▼
╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ network ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌
│ │
▼ ▼
Server: NodeService.enqueue() stores payload in FIFO queue
Cap'n Proto: fetch() / fetchWait() returns payload
MLS process_message()
│ ── decrypts with group AEAD key ──
plaintext bytes
```
The server **never** holds the MLS group key. It sees only the encrypted
`MlsMessageOut` blob.
---
## Further Reading
- [Architecture Overview](overview.md) -- high-level system diagram and dual-key model
- [Noise_XX Handshake](../protocol-layers/noise-xx.md) -- deep dive into the three-message handshake
- [QUIC + TLS 1.3](../protocol-layers/quic-tls.md) -- QUIC configuration, ALPN, and certificate handling
- [Cap'n Proto Serialisation and RPC](../protocol-layers/capn-proto.md) -- schema design and VatNetwork wiring
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- ciphersuite selection, key schedule, and ratchet tree
- [Hybrid KEM: X25519 + ML-KEM-768](../protocol-layers/hybrid-kem.md) -- post-quantum envelope encryption

View File

@@ -0,0 +1,259 @@
# Service Architecture
The quicnprotochat server exposes a single **NodeService** RPC endpoint that
combines Authentication and Delivery operations. This page documents the RPC
interface, per-connection lifecycle, storage model, long-polling mechanism, and
authentication context.
---
## NodeService Endpoint
A single QUIC + TLS 1.3 listener on **port 7000** serves all operations.
The schema is defined in `schemas/node.capnp` and documented in
[NodeService Schema](../wire-format/node-service-schema.md).
```text
NodeService (port 7000)
├── Authentication methods
│ ├── uploadKeyPackage(identityKey, package, auth) -> fingerprint
│ ├── fetchKeyPackage(identityKey, auth) -> package
│ ├── uploadHybridKey(identityKey, hybridPublicKey) -> ()
│ └── fetchHybridKey(identityKey) -> hybridPublicKey
├── Delivery methods
│ ├── enqueue(recipientKey, payload, channelId, version, auth) -> ()
│ ├── fetch(recipientKey, channelId, version, auth) -> payloads
│ └── fetchWait(recipientKey, channelId, version, timeoutMs, auth) -> payloads
└── Operational
└── health() -> status
```
---
## RPC Method Reference
### Authentication Service Methods
| Method | Params | Returns | Semantics |
|----------------------|-------------------------------------|------------------|-----------|
| `uploadKeyPackage` | `identityKey` (32 B Ed25519 pk), `package` (TLS-encoded KeyPackage), `auth` | `fingerprint` (SHA-256 of package) | Appends the KeyPackage to a per-identity FIFO queue. The fingerprint lets the client detect server-side tampering. Max package size: 1 MB. |
| `fetchKeyPackage` | `identityKey` (32 B), `auth` | `package` (or empty `Data`) | Atomically pops and returns the oldest KeyPackage for the identity. Returns empty bytes if none are stored. Single-use semantics per RFC 9420. |
| `uploadHybridKey` | `identityKey` (32 B), `hybridPublicKey` (X25519 pk + ML-KEM-768 ek) | `()` | Stores (or replaces) the hybrid PQ public key for envelope-level post-quantum encryption. |
| `fetchHybridKey` | `identityKey` (32 B) | `hybridPublicKey` (or empty `Data`) | Returns the stored hybrid public key for a peer, or empty if none. |
### Delivery Service Methods
| Method | Params | Returns | Semantics |
|--------------|------------------------------------------------------------------------|----------------------|-----------|
| `enqueue` | `recipientKey` (32 B), `payload` (opaque), `channelId`, `version`, `auth` | `()` | Appends `payload` to the recipient's FIFO queue. Max payload: 5 MB. Wakes any `fetchWait` waiter for this recipient. Supported versions: 0 (legacy), 1 (current). |
| `fetch` | `recipientKey` (32 B), `channelId`, `version`, `auth` | `payloads: List(Data)` | Atomically drains and returns the full queue in FIFO order. Returns empty list if nothing is pending. |
| `fetchWait` | `recipientKey` (32 B), `channelId`, `version`, `timeoutMs`, `auth` | `payloads: List(Data)` | Same as `fetch`, but if the queue is empty and `timeoutMs > 0`, blocks up to `timeoutMs` milliseconds waiting for a `Notify` signal from `enqueue`. Returns whatever is in the queue when the wait completes or times out. |
### Operational Methods
| Method | Params | Returns | Semantics |
|----------|--------|-----------------|-----------|
| `health` | none | `status: Text` | Returns `"ok"`. Used for liveness/readiness probes. |
---
## Per-Connection Lifecycle
Each incoming QUIC connection follows this sequence:
```text
┌──────────────────────────────────────────────────────────────────────┐
│ Client Server │
│ │
│ 1. UDP packet -> │
│ QUIC INITIAL │
│ │
│ 2. <- QUIC HANDSHAKE │
│ TLS 1.3 ServerHello + │
│ Certificate (self-signed) │
│ ALPN: "capnp" │
│ │
│ 3. Client verifies server │
│ cert against pinned CA │
│ cert (--ca-cert flag) │
│ │
│ 4. QUIC connection established │
│ │
│ 5. Client opens bidirectional ──────────> Server accepts bi stream │
│ QUIC stream (open_bi) (accept_bi) │
│ │
│ 6. tokio_util::compat adapters wrap the send/recv halves │
│ into AsyncRead + AsyncWrite │
│ │
│ 7. capnp-rpc twoparty::VatNetwork │
│ Client Side::Client Server Side::Server │
│ │
│ 8. RpcSystem::new() starts │
│ promise-pipelined RPC loop │
│ │
│ 9. Client bootstraps │
│ node_service::Client NodeServiceImpl created │
│ (shares Arc<FileBackedStore>, │
│ Arc<DashMap<..., Notify>>) │
│ │
│ 10. RPC calls flow over the bidirectional stream │
│ until either side closes the connection. │
└──────────────────────────────────────────────────────────────────────┘
```
### LocalSet requirement
`capnp-rpc` uses `Rc<RefCell<>>` internally, making it `!Send`. Therefore:
- The server runs the entire accept loop inside a `tokio::task::LocalSet`.
- Each connection handler is `spawn_local`, ensuring all RPC futures stay on a
single thread.
- The client wraps each subcommand invocation in its own `LocalSet::run_until`.
This is a fundamental constraint of the Cap'n Proto RPC runtime in Rust.
Attempts to spawn RPC futures on the multi-threaded Tokio executor will fail
with a compile error.
---
## Storage Model
`NodeServiceImpl` holds two pieces of shared state:
### FileBackedStore
```text
FileBackedStore
├── key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>
│ Key: Ed25519 public key (32 bytes)
│ Value: FIFO queue of TLS-encoded KeyPackage blobs
│ File: data/keypackages.bin (bincode)
├── deliveries: Mutex<HashMap<ChannelKey, VecDeque<Vec<u8>>>>
│ ChannelKey: { channel_id: Vec<u8>, recipient_key: Vec<u8> }
│ Value: FIFO queue of opaque payload blobs
│ File: data/deliveries.bin (bincode, v2 format)
└── hybrid_keys: Mutex<HashMap<Vec<u8>, Vec<u8>>>
Key: Ed25519 public key (32 bytes)
Value: serialised HybridPublicKey blob
File: data/hybridkeys.bin (bincode)
```
Every mutation (upload, fetch, enqueue) acquires the relevant `Mutex`, modifies
the in-memory `HashMap`, and then flushes the entire map to disk as a bincode
blob. This is intentionally simple for MVP-scale workloads. A production
deployment would replace this with an embedded database or external store.
The delivery map supports a **v1 -> v2 upgrade path**: if `deliveries.bin`
contains the legacy `QueueMapV1` format (keyed by `recipientKey` only), the
store transparently upgrades entries by wrapping them in `ChannelKey` with an
empty `channel_id`.
### DashMap Waiters
```text
Arc<DashMap<Vec<u8>, Arc<Notify>>>
Key: recipient Ed25519 public key (32 bytes)
Value: tokio::sync::Notify instance
```
The waiters map is orthogonal to `FileBackedStore`. It lives entirely in
memory and serves the `fetchWait` long-polling mechanism:
1. `enqueue` calls `waiter(&recipient_key).notify_waiters()` after storing the
payload.
2. `fetchWait` first tries a regular `fetch`. If the queue is empty and
`timeoutMs > 0`:
- Look up or insert a `Notify` for the recipient.
- `tokio::time::timeout(Duration::from_millis(timeoutMs), notify.notified())`
- When notified (or on timeout), perform a second `fetch` and return
whatever is available.
This design avoids busy-polling while keeping the implementation lock-free
(DashMap uses sharded RwLocks internally).
---
## Auth Struct
Every RPC method that modifies or reads user-specific state accepts an `Auth`
parameter:
```capnp
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based auth
accessToken @1 :Data; # opaque bearer token
deviceId @2 :Data; # optional UUID for auditing/rate limiting
}
```
### Version semantics
| Version | Meaning |
|---------|------------------------------------------------------------|
| 0 | Legacy / no authentication. The server accepts the request without checking credentials. Suitable for development and testing. |
| 1 | Token-based authentication. The `accessToken` field should contain an opaque bearer token issued at login. The server validates the token against a token store (not yet implemented -- see [Auth, Devices, and Tokens](../roadmap/authz-plan.md)). |
The server validates the `version` field on every request via `validate_auth()`.
Requests with unsupported versions are rejected with a Cap'n Proto error.
### Client-side usage
The client CLI accepts `--access-token` and `--device-id` flags (or the
corresponding environment variables). These are bundled into a `ClientAuth`
struct and injected into every outgoing RPC call via the `set_auth()` helper.
Currently, the client sends `version = 0` with empty token and device ID by
default. When the token-based auth flow is implemented, the client will populate
these fields.
---
## Validation and Limits
The server enforces the following constraints on every RPC call:
| Constraint | Value | Error on violation |
|-----------------------------|--------------------|--------------------|
| `identityKey` / `recipientKey` length | Exactly 32 bytes | Cap'n Proto error: "must be exactly 32 bytes" |
| KeyPackage size | <= 1 MB | Cap'n Proto error: "package exceeds max size" |
| Payload size | <= 5 MB | Cap'n Proto error: "payload exceeds max size" |
| Wire version | 0 or 1 | Cap'n Proto error: "unsupported wire version" |
| Auth version | 0 or 1 | Cap'n Proto error: "unsupported auth version" |
| KeyPackage non-empty | `package.len() > 0`| Cap'n Proto error: "package must not be empty" |
| Payload non-empty | `payload.len() > 0`| Cap'n Proto error: "payload must not be empty" |
---
## Configuration
The server binary is configured via CLI flags or environment variables:
| Flag | Env var | Default | Description |
|----------------|----------------------------|----------------------|-------------|
| `--listen` | `QUICNPROTOCHAT_LISTEN` | `0.0.0.0:7000` | QUIC listen address (host:port). |
| `--data-dir` | `QUICNPROTOCHAT_DATA_DIR` | `data` | Directory for persisted KeyPackages, delivery queues, and hybrid keys. |
| `--tls-cert` | `QUICNPROTOCHAT_TLS_CERT` | `data/server-cert.der` | Path to TLS certificate (DER). Auto-generated if missing. |
| `--tls-key` | `QUICNPROTOCHAT_TLS_KEY` | `data/server-key.der` | Path to TLS private key (DER). Auto-generated if missing. |
If the TLS certificate or key files do not exist at startup, the server
auto-generates a self-signed certificate for `localhost`, `127.0.0.1`, and
`::1` using `rcgen`.
Logging level is controlled by the `RUST_LOG` environment variable (default:
`info`).
---
## Further Reading
- [Architecture Overview](overview.md) -- two-service model and dual-key overview
- [NodeService Schema](../wire-format/node-service-schema.md) -- full Cap'n Proto schema
- [End-to-End Data Flow](data-flow.md) -- sequence diagrams showing registration, group creation, and messaging
- [Delivery Service Internals](../internals/delivery-service.md) -- queue routing and channel-aware delivery
- [Authentication Service Internals](../internals/authentication-service.md) -- KeyPackage lifecycle
- [Storage Backend](../internals/storage-backend.md) -- FileBackedStore details and upgrade path
- [Auth, Devices, and Tokens](../roadmap/authz-plan.md) -- planned token-based authentication

View File

@@ -0,0 +1,269 @@
# Coding Standards
This page defines the engineering standards for quicnprotochat. These are
non-negotiable -- all code merged into the repository must conform to these
rules. The standards exist to ensure that every milestone produces
production-ready, auditable, and secure code.
---
## Production-Ready Only
Every deliverable must be complete and functional. The following are prohibited
in any merged code:
- `todo!()` or `unimplemented!()` macros
- Stub implementations or placeholder logic
- Mock objects in production code paths (mocks are acceptable only in test code)
- Commented-out code blocks
- `#[allow(unused)]` on production code (acceptable on generated code from
Cap'n Proto codegen)
If a feature is out of scope for the current milestone, it is **explicitly
omitted** with a documented reason (in an ADR or code comment explaining why it
is deferred), not silently stubbed.
---
## YAGNI / KISS / DRY
- **YAGNI (You Aren't Gonna Need It):** Do not add features, abstractions, or
generic type parameters that are not required by the current milestone.
- **KISS (Keep It Simple):** Favour clarity over cleverness. A straightforward
`match` statement is preferred over a complex trait hierarchy.
- **DRY (Don't Repeat Yourself):** Extract shared logic into functions or
modules, but do not create abstractions prematurely. Two occurrences is a
coincidence; three is a pattern worth extracting.
---
## Spec-First Development
Document the design before implementing it:
1. **ADR (Architecture Decision Record)** for significant design decisions. ADRs
live in `docs/src/design-rationale/` and are referenced from the
[Design Rationale](../design-rationale/overview.md) section.
2. **Doc comments** on every public API (`///` for items, `//!` for modules).
Doc comments must explain:
- What the function/type does.
- Invariants and preconditions.
- Error conditions and what each error variant means.
- Examples where the API is non-obvious.
```rust
/// Creates a new MLS group with the given group ID and returns
/// the initial `GroupMember` state.
///
/// # Errors
///
/// Returns `GroupError::CryptoBackend` if the MLS crypto provider
/// fails to generate the initial key schedule.
///
/// Returns `GroupError::InvalidGroupId` if `group_id` is empty.
pub fn create_group(
group_id: &[u8],
identity: &IdentityKeypair,
) -> Result<GroupMember, GroupError> {
// ...
}
```
---
## Security-by-Design
### Secrets and Key Material
- All private key material must be wrapped in `Zeroizing<T>` (from the `zeroize`
crate) or implement `Zeroize + ZeroizeOnDrop`.
- No secret material in log output at any level (`TRACE`, `DEBUG`, `INFO`,
`WARN`, `ERROR`).
- When logging key-related operations, log only fingerprints (SHA-256 of the
public key), never the key bytes themselves.
### Error Handling
- No `unwrap()` or `expect()` on cryptographic operations. All crypto errors
must be typed and propagated.
- Use `thiserror` for library error types (`quicnprotochat-core`,
`quicnprotochat-proto`) and `anyhow` for application-level error handling
(`quicnprotochat-server`, `quicnprotochat-client`).
- `unwrap()` is acceptable only in:
- Test code.
- Cases where the invariant is provably guaranteed by the type system
(e.g., indexing a fixed-size array with a constant).
### Constant-Time Comparisons
- Use constant-time comparison (`subtle::ConstantTimeEq` or equivalent) when
comparing authentication tokens, key fingerprints, or any value that could
be used in a timing side-channel attack.
- Never use `==` to compare secrets or tokens in authentication paths.
### Input Validation
- Validate all incoming data at the boundary (RPC handler entry point) before
passing it to internal logic.
- Length checks: group ID (32 bytes), identity key (32 bytes), channel ID
(16 bytes), payload (max 5 MB).
- Reject unexpected enum variants or unknown wire versions.
---
## Containerisation
- The server runs in Docker. The `Dockerfile` and `docker-compose.yml` must
always be kept up to date with the current build.
- Multi-stage build: `rust:bookworm` builder stage, `debian:bookworm-slim`
runtime stage.
- The Docker image must build and run correctly after every merge to the main
branch.
---
## Dependency Hygiene
### Pinned Major Versions
All dependencies are pinned to a major version in `Cargo.toml`. Minor and patch
updates are allowed; major version bumps require justification and review.
### Preferred Ecosystem
| Domain | Preferred Crate(s) |
|--------|-------------------|
| Classical crypto (signing) | `ed25519-dalek` |
| Classical crypto (key exchange) | `x25519-dalek` |
| Noise protocol | `snow` |
| MLS | `openmls`, `openmls_rust_crypto` |
| Post-quantum KEM | `ml-kem` |
| Serialisation / RPC | `capnp`, `capnp-rpc` |
| Async runtime | `tokio` |
| Zeroisation | `zeroize` |
Do not introduce new dependencies without justification. In particular:
- No alternative async runtimes (async-std, smol).
- No alternative serialisation formats (protobuf, MessagePack, JSON) for wire
protocol use.
- No alternative crypto libraries unless the preferred crate lacks required
functionality.
### Dependency Auditing
- `cargo audit` must pass in CI with no known vulnerabilities.
- `cargo deny check` for license compatibility and duplicate detection.
---
## Git Standards
### Signed Commits
All commits must be GPG-signed. Unsigned commits will be rejected by CI.
Configure signing:
```bash
git config --global commit.gpgsign true
git config --global user.signingkey <YOUR_GPG_KEY_ID>
```
### Conventional Commits
Commit messages follow the [Conventional Commits](https://www.conventionalcommits.org/)
specification:
| Prefix | Use |
|--------|-----|
| `feat:` | A new feature or capability |
| `fix:` | A bug fix |
| `chore:` | Maintenance (deps, CI, config) |
| `docs:` | Documentation changes |
| `test:` | Adding or updating tests |
| `refactor:` | Code restructuring without behaviour change |
### Commit Message Format
```
<type>: <short description>
<body: explain WHY, not just WHAT>
```
The body describes the motivation and context for the change, not just a
restatement of the diff. Why was this change necessary? What problem does it
solve? What alternatives were considered?
Example:
```
feat: add KeyPackage TTL eviction on fetch
KeyPackages older than 24 hours are now filtered out at fetch time,
preventing stale key material from being used in MLS group additions.
Background sweep is deferred to M6 (requires persistent storage).
```
### Branch Strategy
- Feature branches per milestone: `feat/m1-noise-transport`,
`feat/m2-keypackage-as`, etc.
- Branch names use lowercase with hyphens.
- All work happens on feature branches; direct commits to the main branch are
prohibited.
---
## Code Style
### Formatting
- `cargo fmt` with default settings. No custom `rustfmt.toml` overrides.
- CI rejects unformatted code.
### Linting
- `cargo clippy` with default lints. No `#[allow(clippy::...)]` without a
comment explaining why the lint is suppressed.
- CI treats clippy warnings as errors.
### Naming
- Types: `PascalCase` (Rust convention).
- Functions and variables: `snake_case`.
- Constants: `SCREAMING_SNAKE_CASE`.
- Module names: `snake_case`, matching the file name.
### Module Organisation
- One concept per file. A file should not exceed ~500 lines (guideline, not
hard rule).
- Public API at the top of the file; private helpers at the bottom.
- `mod.rs` files are avoided; use `foo.rs` + `foo/` directory when a module
has submodules.
---
## Review Checklist
Before presenting any code for review, verify:
- [ ] No missing error handling (all `Result` values handled).
- [ ] No security gaps (secrets zeroized, no secret logging, typed crypto errors).
- [ ] No incomplete implementations (no `todo!()`, `unimplemented!()`, stubs).
- [ ] No deviation from these standards.
- [ ] Doc comments on all public items.
- [ ] Tests for all new functionality (see [Testing Strategy](testing.md)).
- [ ] `cargo fmt`, `cargo clippy`, and `cargo test --workspace` all pass.
---
## Cross-references
- [Testing Strategy](testing.md) -- test structure and conventions
- [Design Rationale](../design-rationale/overview.md) -- ADR index
- [Milestones](../roadmap/milestones.md) -- what each milestone delivers
- [Production Readiness WBS](../roadmap/production-readiness.md) -- governance and CI requirements

View File

@@ -0,0 +1,239 @@
# Testing Strategy
This page describes the testing structure, conventions, and current coverage for
quicnprotochat. All tests run with `cargo test --workspace` and must pass before
any code is merged.
For the coding standards that tests must follow, see
[Coding Standards](coding-standards.md).
---
## Test Organisation
### Unit Tests
Unit tests live alongside the code they test, in `#[cfg(test)] mod tests` blocks
at the bottom of each source file. They test individual functions and types in
isolation.
**quicnprotochat-core:**
| Module | Tests | What they cover |
|--------|-------|----------------|
| `codec` | 7 tests | Length-prefixed frame encoding/decoding, edge cases (empty payload, max size, partial frame, exact boundary) |
| `keypair` | 3 tests | Ed25519 keypair generation, public key extraction, deterministic re-derivation |
| `group` | 2 tests | Group round-trip (create + add + join + send + recv), group\_id lifecycle |
| `hybrid_kem` | 11 tests | Encapsulate/decapsulate round-trip, key generation, combiner correctness, wrong-key rejection, serialisation |
**quicnprotochat-proto:**
| Module | Tests | What they cover |
|--------|-------|----------------|
| `lib` | 3 tests | Cap'n Proto builder/reader round-trip, canonical serialisation, schema validation |
### Integration Tests
Integration tests live in `crates/quicnprotochat-client/tests/` and test the
full client-server interaction. Each test spawns a server using `tokio::spawn`
within the same test binary, then runs client operations against it.
| File | Milestone | What it covers |
|------|-----------|---------------|
| `noise_transport.rs` | M1 | Noise\_XX handshake over TCP, Ping/Pong frame exchange, connection lifecycle |
| `auth_service.rs` | M2 | KeyPackage upload via AS, KeyPackage fetch (single-use consume semantics), identity key validation |
| `mls_group.rs` | M3 | Full MLS round-trip: register state, create group, add member via Welcome, send encrypted message, receive and decrypt |
### Test Pattern
All integration tests follow the same pattern:
```rust
#[tokio::test]
async fn test_something() {
// 1. Start server in background
let server_handle = tokio::spawn(async move {
server::run(config).await.unwrap();
});
// 2. Wait for server to be ready
tokio::time::sleep(Duration::from_millis(100)).await;
// 3. Run client operations
let result = client::do_something(server_addr).await;
// 4. Assert
assert!(result.is_ok());
// ...
// 5. Cleanup
server_handle.abort();
}
```
This pattern ensures tests are self-contained and do not require an external
server process.
---
## Running Tests
### Full Workspace
```bash
cargo test --workspace
```
This runs all unit tests and integration tests across all four crates.
### Single Crate
```bash
cargo test -p quicnprotochat-core
cargo test -p quicnprotochat-proto
cargo test -p quicnprotochat-server
cargo test -p quicnprotochat-client
```
### Single Test
```bash
cargo test -p quicnprotochat-core -- codec::tests::test_round_trip
cargo test -p quicnprotochat-client --test mls_group
```
### With Output
```bash
cargo test --workspace -- --nocapture
```
---
## Current Results
All tests pass as of the M3 milestone on branch `feat/m1-noise-transport`.
Summary:
| Crate | Unit Tests | Integration Tests | Total |
|-------|-----------|-------------------|-------|
| `quicnprotochat-core` | 23 | -- | 23 |
| `quicnprotochat-proto` | 3 | -- | 3 |
| `quicnprotochat-server` | 0 | -- | 0 |
| `quicnprotochat-client` | 0 | 6 | 6 |
| **Total** | **26** | **6** | **32** |
---
## Test Conventions
### Naming
Test functions use descriptive names that state what is being tested and the
expected outcome:
```rust
#[test]
fn encode_decode_round_trip_preserves_payload() { ... }
#[test]
fn empty_payload_produces_length_zero_frame() { ... }
#[test]
fn fetch_consumes_keypackage_single_use() { ... }
```
### Assertions
- Use `assert_eq!` with both expected and actual values.
- Use `assert!(result.is_ok(), "descriptive message: {result:?}")` for
`Result` checks.
- For crypto operations, assert on specific error variants, not just
`is_err()`.
### No External Dependencies
Tests must not depend on external services, network access, or filesystem state
outside the test's temporary directory. The `tokio::spawn` pattern for
client-server tests ensures everything runs in-process.
### Determinism
Tests must be deterministic. If randomness is needed (e.g., key generation),
the test must not depend on specific random values -- only on the properties of
the output (correct length, successful round-trip, etc.).
---
## Planned Testing Enhancements
The following testing improvements are planned for future milestones:
### Fuzzing Targets (M5+)
Fuzz testing for parser and deserialisation code:
- **Cap'n Proto message parser:** Feed arbitrary bytes to the Cap'n Proto reader
and verify it either parses correctly or returns a typed error (no panics,
no undefined behaviour).
- **MLS message handler:** Feed arbitrary `MLSMessage` bytes to the
`GroupMember::receive_message` path.
- **Length-prefixed codec:** Fuzz the frame decoder with arbitrary byte streams.
Tool: `cargo-fuzz` with `libfuzzer`.
### Golden-Wire Fixtures (M5+)
Serialised test vectors for regression testing across versions:
- Capture the wire bytes of known-good Cap'n Proto messages (Envelope, Auth,
Delivery structs) at the current version.
- Store as `.bin` files in `tests/fixtures/`.
- Each test deserialises the fixture and verifies the expected field values.
- When the wire format changes, fixtures are updated with a version bump.
This catches accidental wire-format regressions that would break client-server
compatibility.
### N-1 Compatibility Tests (M5+)
Test that a client built at version N can communicate with a server built at
version N-1 (and vice versa):
- Build two versions of the binary (current and previous release).
- Run the older server with the newer client and verify all RPCs succeed.
- Run the newer server with the older client and verify graceful degradation
(legacy mode works, new features return clean errors).
### Criterion Benchmarks (M5)
Performance benchmarks using [Criterion.rs](https://docs.rs/criterion/):
- Key generation latency (Ed25519, X25519, ML-KEM-768).
- MLS encap/decap (KeyPackage generation, Welcome processing).
- Group-add latency scaling: 2, 10, 100, 1000 members.
- Cap'n Proto serialise/deserialise throughput.
- Noise handshake latency.
Benchmarks run separately from tests (`cargo bench`) and are not part of the
CI gate, but are tracked for regression detection.
### Docker-based E2E Tests (Phase 5)
End-to-end tests using `testcontainers-rs` (see
[Future Research: Testcontainers-rs](../roadmap/future-research.md#testcontainers-rs)):
- Spin up server container from the Docker image.
- Run client operations from the test process against the containerised server.
- Verify real network boundaries, container startup, and multi-process
interactions.
---
## Cross-references
- [Coding Standards](coding-standards.md) -- quality requirements for test code
- [Milestones](../roadmap/milestones.md) -- which tests were added at each milestone
- [Production Readiness WBS](../roadmap/production-readiness.md) -- Phase 5 (E2E Harness and Security Tests)
- [Future Research: Testcontainers-rs](../roadmap/future-research.md#testcontainers-rs) -- Docker-based testing

View File

@@ -0,0 +1,205 @@
# Forward Secrecy
Forward secrecy (FS), also called perfect forward secrecy (PFS), is a property
of a cryptographic protocol that guarantees: **if a long-term secret key is
compromised, past session keys cannot be recovered.** In other words, an
attacker who obtains today's long-term key cannot use it to decrypt messages
recorded yesterday.
quicnprotochat provides forward secrecy at two independent layers: the transport
layer and the application layer. Even if one layer's FS mechanism is defeated,
the other continues to protect message confidentiality.
## Transport Layer Forward Secrecy
### TLS 1.3 (QUIC)
The QUIC transport (via `quinn 0.11` + `rustls 0.23`) uses TLS 1.3, which
mandates ephemeral key exchange in every handshake. Unlike TLS 1.2, which
allowed static RSA key exchange (no FS), TLS 1.3 exclusively uses ephemeral
ECDHE (Elliptic Curve Diffie-Hellman Ephemeral).
In each TLS 1.3 handshake:
1. Both client and server generate ephemeral ECDHE key pairs.
2. They exchange public keys and compute a shared secret via Diffie-Hellman.
3. Session keys are derived from the shared secret using HKDF.
4. The ephemeral private keys are discarded after key derivation.
Because the ephemeral keys exist only for the duration of the handshake,
compromising the server's long-term TLS certificate key (currently self-signed
in quicnprotochat) does not reveal past session keys.
### Noise\_XX
Inside the QUIC stream, the Noise\_XX handshake
(`Noise_XX_25519_ChaChaPoly_BLAKE2s`) provides an additional layer of forward
secrecy. The Noise\_XX pattern uses both ephemeral and static X25519 keys:
```text
→ e Initiator sends ephemeral public key
← e, ee, s, es Responder: ephemeral, DH(e,e), static, DH(e,s)
→ s, se Initiator: static, DH(s,e)
```
The `ee` DH (ephemeral-ephemeral) provides forward secrecy: even if both
parties' static keys (`s`) are later compromised, the ephemeral keys that
contributed to `ee` have already been discarded.
The `es` and `se` DH operations mix in the static keys for authentication, but
the session key depends on the ephemeral contribution. An attacker who
compromises only the static key learns the identity of the parties but cannot
recover the session key without the ephemeral key.
See [X25519 Transport Keys](transport-keys.md) for details on the static
keypair.
## Application Layer Forward Secrecy
### MLS Epoch Ratchet
The MLS protocol (RFC 9420) provides forward secrecy at the application layer
through its epoch ratchet mechanism. This is independent of the transport
layer's FS and protects message content even if transport session keys are
leaked.
Each MLS group maintains a **ratchet tree** -- a binary tree where each leaf
represents a group member and internal nodes hold derived key material. The
tree defines a current **epoch**, which determines the encryption keys for all
messages in that epoch.
When the epoch advances (via a Commit message):
1. The ratchet tree is updated with new key material from the committing member.
2. New epoch keys are derived from the updated tree.
3. **Old epoch keys are deleted.**
This deletion is the mechanism that provides forward secrecy: once old epoch
keys are erased, messages encrypted under those keys cannot be decrypted, even
if the current group state is compromised.
In quicnprotochat, epoch advancement occurs when:
- `add_member()` is called, which creates a Commit and calls
`merge_pending_commit()`.
- A received Commit is processed via `receive_message()`, which calls
`merge_staged_commit()`.
```rust
// Epoch advances here -- old keys deleted internally by openmls
group.merge_pending_commit(&self.backend)?; // sender side
group.merge_staged_commit(&self.backend, *staged)?; // receiver side
```
### Single-Use KeyPackages
MLS KeyPackages contain a single-use HPKE init public key. Each init key is
used exactly once -- to encrypt the Welcome message that bootstraps a new
member's group state. After the Welcome is processed, the init private key is
consumed and deleted from the `DiskKeyStore`.
This single-use design provides forward secrecy for the initial key exchange:
- Even if a member's long-term Ed25519 identity key is later compromised, the
attacker cannot reconstruct the HPKE init private key that was used to decrypt
the Welcome.
- The init key was ephemeral to the join operation and no longer exists.
This property is critical because the Welcome message contains the full ratchet
tree state, including the secrets needed to decrypt messages in the initial
epoch. If the init key could be reused or recovered, an attacker could
reconstruct the entire initial group state.
See [Key Lifecycle and Zeroization](key-lifecycle.md) for the full lifecycle of
HPKE init keys.
## Layered Forward Secrecy
A distinctive property of quicnprotochat's design is that forward secrecy
operates at two independent layers:
```text
+------------------------------------------------------+
| Network Adversary (records ciphertext) |
+------------------------------------------------------+
|
v
+------------------------------------------------------+
| TLS 1.3 / Noise_XX |
| Forward secrecy via ephemeral ECDHE / X25519 DH |
| Even if TLS cert or Noise static key is compromised,|
| past transport sessions are protected. |
+------------------------------------------------------+
|
v
+------------------------------------------------------+
| MLS (RFC 9420) |
| Forward secrecy via epoch ratchet |
| Even if current MLS state is compromised, |
| past epochs are protected (keys deleted). |
+------------------------------------------------------+
|
v
+------------------------------------------------------+
| Plaintext message content |
+------------------------------------------------------+
```
**Why this matters:** If the transport layer's forward secrecy is broken (e.g.,
an attacker obtains a TLS session key through a side channel), the MLS layer
still protects message content independently. The attacker would see the MLS
ciphertext but could not decrypt it without the MLS epoch keys.
Conversely, if MLS epoch keys are somehow leaked, the transport layer prevents
a network-level attacker from correlating them with specific network flows
unless they also break the transport encryption.
## Comparison with Signal
Signal's Double Ratchet protocol also provides forward secrecy, but the
mechanisms differ:
| Property | Signal Double Ratchet | MLS (quicnprotochat) |
|----------|----------------------|---------------------|
| Scope | Pairwise (1:1 sessions) | Group (n-party) |
| Ratchet granularity | Per message (symmetric ratchet) + per DH round (DH ratchet) | Per epoch (Commit) |
| FS granularity | Individual messages | All messages in an epoch |
| Group support | Sender Keys (no per-message FS in groups) | Native group FS via ratchet tree |
| Efficiency | O(1) per message | O(log n) per Commit, O(1) per message |
Signal achieves finer-grained forward secrecy in 1:1 conversations (per message
via the symmetric ratchet), but in group settings, Signal uses Sender Keys,
which do **not** provide per-message forward secrecy. A compromised Sender Key
reveals all past messages from that sender.
MLS provides forward secrecy at the epoch level for the entire group. Within an
epoch, all messages share the same key material. The trade-off is that FS
granularity is coarser (per epoch rather than per message), but it applies
uniformly to all group members.
## Practical Implications
1. **Epoch advancement frequency:** More frequent Commits provide more
fine-grained forward secrecy. In the current implementation, epochs advance
when members are added. Future milestones will add periodic Update proposals
to advance epochs even without membership changes.
2. **Key deletion timing:** Forward secrecy depends on old keys being actually
deleted from memory and disk. The `DiskKeyStore`'s flush-on-write behavior
ensures that consumed HPKE init keys are removed from the persistent store.
MLS epoch key deletion is handled internally by openmls.
3. **State file security:** The client state file contains the Ed25519 identity
seed and potentially the DiskKeyStore contents. If this file is compromised,
the attacker obtains the current identity key and any stored HPKE init keys
(for pending Welcome messages). Past epoch keys are not in the state file
(they have been deleted), so forward secrecy is preserved for past epochs.
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- when keys are created and destroyed
- [Post-Compromise Security](post-compromise-security.md) -- the complementary property (protecting the future)
- [Threat Model](threat-model.md) -- attacker models and what FS protects against
- [X25519 Transport Keys](transport-keys.md) -- Noise ephemeral DH details
- [Ed25519 Identity Keys](identity-keys.md) -- long-term key that FS protects against compromising

View File

@@ -0,0 +1,199 @@
# Ed25519 Identity Keys
The Ed25519 identity keypair is the long-term cryptographic identity of a
quicnprotochat client. It is generated once, persisted across sessions, and used
for MLS credential signing, Authentication Service registration, and delivery
queue addressing.
**Source:** `crates/quicnprotochat-core/src/identity.rs`
## Structure
The `IdentityKeypair` struct holds two fields:
```rust
pub struct IdentityKeypair {
/// Raw 32-byte private seed -- zeroized on drop.
seed: Zeroizing<[u8; 32]>,
/// Corresponding 32-byte public verifying key.
verifying: VerifyingKey,
}
```
| Field | Type | Size | Secret? |
|-------|------|------|---------|
| `seed` | `Zeroizing<[u8; 32]>` | 32 bytes | Yes -- zeroized on drop |
| `verifying` | `ed25519_dalek::VerifyingKey` | 32 bytes | No -- public |
The private seed is stored as raw bytes wrapped in `Zeroizing<[u8; 32]>` rather
than directly as a `SigningKey`. This design choice avoids a conflict with
`ed25519-dalek`'s own `Zeroize` implementation: the `Zeroizing<T>` wrapper
requires `T: DefaultIsZeroes`, which `[u8; 32]` satisfies (being `Copy +
Default`) but `SigningKey` does not.
## Key Generation
A fresh identity keypair is generated from the OS CSPRNG (`OsRng`) via
`ed25519-dalek`:
```rust
use quicnprotochat_core::identity::IdentityKeypair;
let identity = IdentityKeypair::generate();
// The signing key seed is generated from OsRng (getrandom on Linux).
// The verifying key is derived from the seed automatically.
```
Internally, `generate()` calls `SigningKey::generate(&mut OsRng)`, extracts the
32-byte seed with `to_bytes()`, wraps it in `Zeroizing`, and derives the
`VerifyingKey`:
```rust
pub fn generate() -> Self {
use rand::rngs::OsRng;
let signing = SigningKey::generate(&mut OsRng);
let verifying = signing.verifying_key();
let seed = Zeroizing::new(signing.to_bytes());
Self { seed, verifying }
}
```
## Fingerprint Computation
The fingerprint is a SHA-256 digest of the raw 32-byte Ed25519 public key. It
serves as a compact, collision-resistant identifier for logging and protocol
indexing:
```rust
pub fn fingerprint(&self) -> [u8; 32] {
let mut hasher = Sha256::new();
hasher.update(self.verifying.to_bytes());
hasher.finalize().into()
}
```
The `Debug` implementation uses the first 4 bytes of the fingerprint as a
human-readable prefix:
```rust
// Output example:
// IdentityKeypair { fingerprint: "a1b2c3d4...", .. }
```
This ensures the private seed is never accidentally printed to logs.
## Zeroization
The 32-byte private seed is wrapped in `Zeroizing<[u8; 32]>` from the `zeroize`
crate. When the `IdentityKeypair` struct is dropped, the `Zeroizing` wrapper
overwrites the seed bytes with zeros before deallocation. This mitigates the
risk of key material lingering in memory after the struct is no longer needed.
Key points about the zeroization strategy:
- **On drop:** The seed is overwritten with zeros automatically.
- **Serialization:** `seed_bytes()` returns a plain `[u8; 32]` copy for
persistence. The caller is responsible for securely handling this copy.
- **Reconstruction:** `from_seed(seed)` wraps the provided bytes in a fresh
`Zeroizing` immediately.
- **No `Clone`/`Copy`:** `IdentityKeypair` does not implement `Clone` or
`Copy`, preventing accidental duplication of secret material.
See [Key Lifecycle and Zeroization](key-lifecycle.md) for the full lifecycle of
this key type.
## Role in MLS
The `IdentityKeypair` implements the `openmls_traits::signatures::Signer` trait,
allowing it to be passed directly to `KeyPackage::builder().build(...)`:
```rust
impl Signer for IdentityKeypair {
fn sign(&self, payload: &[u8]) -> Result<Vec<u8>, MlsError> {
let sk = self.signing_key();
let sig: ed25519_dalek::Signature = sk.sign(payload);
Ok(sig.to_bytes().to_vec())
}
fn signature_scheme(&self) -> SignatureScheme {
SignatureScheme::ED25519
}
}
```
This integration means `IdentityKeypair`:
1. Signs MLS Commits, Proposals, and KeyPackages with Ed25519.
2. Is embedded in `BasicCredential` as the raw 32-byte public key bytes.
3. Provides the `signature_key` field in `CredentialWithKey` used throughout
the `GroupMember` lifecycle.
The MLS ciphersuite (`MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`) mandates
Ed25519 for signing, making the `IdentityKeypair` the natural fit.
## Role in the Authentication Service
The Ed25519 public key bytes (`public_key_bytes()`) are used as the
`identityKey` in `auth.capnp` RPC calls. The Authentication Service stores
KeyPackages indexed by this key, and the Delivery Service routes messages to
queues indexed by the same key.
## Distinction from the X25519 Noise Keypair
It is critical to understand that the Ed25519 identity key and the X25519
transport key are **separate keys on different curves serving different
purposes**:
| Property | Ed25519 Identity Key | X25519 Noise Key |
|----------|---------------------|-----------------|
| Curve | Twisted Edwards (Ed25519) | Montgomery (Curve25519) |
| Operation | Digital signatures | Diffie-Hellman key exchange |
| Purpose | MLS credentials, AS registration | Noise\_XX mutual authentication |
| Lifetime | Permanent (per client) | Per server process or per connection |
| Persistence | Serialized to state file | Not serialized (M6 deferred) |
| Source | `identity.rs` | `keypair.rs` |
Although both curves are related (Curve25519 is birationally equivalent to
Ed25519's curve), the keys are **not interchangeable**. Converting between them
requires explicit birational mapping, which quicnprotochat intentionally avoids
to maintain clean separation of concerns.
## Serialization
`IdentityKeypair` implements `Serialize` and `Deserialize` (serde) by
serializing only the 32-byte seed. On deserialization, `from_seed()` is called
to reconstruct the verifying key:
```rust
impl Serialize for IdentityKeypair {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer,
{
serializer.serialize_bytes(&self.seed[..])
}
}
impl<'de> Deserialize<'de> for IdentityKeypair {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: serde::Deserializer<'de>,
{
let bytes: Vec<u8> = serde::Deserialize::deserialize(deserializer)?;
let seed: [u8; 32] = bytes
.as_slice()
.try_into()
.map_err(|_| serde::de::Error::custom("identity seed must be 32 bytes"))?;
Ok(IdentityKeypair::from_seed(seed))
}
}
```
This means the state file contains only the 32-byte seed, and the verifying key
is deterministically re-derived on load.
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory
- [X25519 Transport Keys](transport-keys.md) -- the other keypair
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- full lifecycle diagram
- [Post-Compromise Security](post-compromise-security.md) -- how MLS credentials interact with PCS
- [Threat Model](threat-model.md) -- what identity keys protect and do not protect

View File

@@ -0,0 +1,402 @@
# Key Lifecycle and Zeroization
quicnprotochat uses multiple key types with different lifetimes, creation
patterns, and destruction guarantees. This page provides a comprehensive
lifecycle diagram for every key type in the system, from generation through
zeroization.
## Lifecycle Overview
```text
Key Type Creation Distribution Use Destruction
--------------------------------------------------------------------------------------------------------------
Ed25519 Identity Once per client AS registration MLS signing, Zeroizing<[u8;32]>
(OsRng) + MLS credential credential binding on struct drop
X25519 Noise Per server process Noise_XX handshake DH key exchange ZeroizeOnDrop
or per client conn (in-band) (transport session) on struct drop
HPKE Init Key Per KeyPackage Uploaded to AS Decrypt Welcome Consumed by openmls;
(openmls backend) in KeyPackage (join_group) deleted from keystore
MLS Epoch Keys Per Commit Internal (ratchet Encrypt/decrypt Old epoch keys deleted
(openmls ratchet) tree derivation) application messages after processing Commit
Hybrid KEM Keys Per peer (future) Public portion X25519+ML-KEM-768 Ephemeral per encrypt;
(OsRng) sent to peers hybrid encryption static part on drop
```
## Ed25519 Identity Key
**Source:** `crates/quicnprotochat-core/src/identity.rs`
The Ed25519 identity key is the most long-lived secret in the system. It
represents the client's cryptographic identity across all sessions and groups.
### Lifecycle
```text
+-----------------+
| OsRng |
| (getrandom) |
+--------+--------+
|
generate()
|
+--------v--------+
| IdentityKeypair |
| seed: Zeroizing | <-- 32-byte Ed25519 seed
| verifying: Vk | <-- 32-byte public key
+--------+--------+
|
+--------------+--------------+
| | |
Persist to Register with Embed in MLS
state file Auth Service BasicCredential
(seed_bytes) (public_key) (CredentialWithKey)
| | |
| | Sign KeyPackages,
| | Commits, Proposals
| | |
Load on next Server stores Used for lifetime
client start public key of client
(from_seed) as queue index |
| | |
+------+-------+--------------+
|
struct dropped
|
+--------v--------+
| Zeroizing<T> |
| overwrites |
| seed with 0x00 |
+-----------------+
```
### Key Properties
- **Generation:** `SigningKey::generate(&mut OsRng)` produces 32 bytes of
entropy from the OS CSPRNG. The seed is immediately wrapped in `Zeroizing`.
- **Persistence:** The `seed_bytes()` method returns a plain `[u8; 32]` for
serialization to the client state file. The caller must handle this copy
securely.
- **Reconstruction:** `from_seed(seed)` re-derives the `VerifyingKey` from the
seed deterministically. The seed is wrapped in `Zeroizing` upon construction.
- **Destruction:** When the `IdentityKeypair` struct is dropped, the
`Zeroizing<[u8; 32]>` wrapper overwrites the 32 seed bytes with zeros.
- **No Clone/Copy:** The struct does not implement `Clone` or `Copy`, preventing
accidental duplication of the secret seed.
### Fingerprint
The fingerprint (`SHA-256(public_key_bytes)`) is derived from the public key and
is used as a compact identifier in logs. It is not secret and does not require
zeroization.
## X25519 Noise Key
**Source:** `crates/quicnprotochat-core/src/keypair.rs`
The X25519 Noise key provides mutual authentication during the Noise\_XX
handshake. It is shorter-lived than the identity key and is not currently
persisted.
### Lifecycle
```text
+-----------------+
| OsRng |
| (getrandom) |
+--------+--------+
|
generate()
|
+--------v--------+
| NoiseKeypair |
| private: Secret | <-- StaticSecret (ZeroizeOnDrop)
| public: PubKey | <-- 32-byte public key
+--------+--------+
|
+--------------+--------------+
| |
private_bytes() public_bytes()
-> Zeroizing<[u8;32]> -> [u8; 32]
| |
Passed to snow::Builder Exchanged during
local_private_key() Noise_XX handshake
| |
Zeroizing copy drops Stored by peer
immediately after use (not secret)
| |
+-------------+---------------+
|
Noise handshake completes
|
+-------------v--------------+
| Transport session holds |
| derived symmetric keys |
| (managed by snow) |
+-------------+--------------+
|
Connection closes
|
+-------------v--------------+
| NoiseKeypair dropped |
| StaticSecret::drop() |
| overwrites scalar with 0 |
+----------------------------+
```
### Key Properties
- **Generation:** `StaticSecret::random_from_rng(OsRng)` generates a 32-byte
Curve25519 scalar.
- **Dual zeroization:** The `StaticSecret` itself implements `ZeroizeOnDrop`,
and `private_bytes()` returns a `Zeroizing<[u8; 32]>` wrapper.
- **Debug redaction:** The `Debug` impl shows only the first 4 bytes of the
public key and prints `[redacted]` for the private key.
- **No serialization:** `NoiseKeypair` does not implement `Serialize`. Persistence
is deferred to M6.
- **Current lifetime:** Per server process start (server) or per connection
attempt (client). After M6, keys may be persisted with passphrase encryption.
## HPKE Init Keys
**Source:** `crates/quicnprotochat-core/src/keystore.rs` and
`crates/quicnprotochat-core/src/group.rs`
HPKE init keys are generated by the openmls backend as part of MLS KeyPackage
creation. They are single-use: each init key is consumed exactly once when
processing a Welcome message.
### Lifecycle
```text
+----------------------------+
| generate_key_package() |
| (GroupMember method) |
+-------------+--------------+
|
openmls internally generates
HPKE init keypair (X25519)
|
+-------------v--------------+
| DiskKeyStore / StoreCrypto |
| stores init private key |
| keyed by init key ref |
+-------------+--------------+
|
KeyPackage (containing init
public key) is TLS-encoded
and uploaded to Auth Service
|
+-------------v--------------+
| Peer fetches KeyPackage |
| from AS; includes it in |
| Welcome message |
+-------------+--------------+
|
+-------------v--------------+
| join_group(welcome) |
| openmls calls |
| new_from_welcome() |
| reads init private key |
| from DiskKeyStore |
| decrypts Welcome's HPKE |
| ciphertext |
+-------------+--------------+
|
Init key is consumed
(never reused per MLS spec)
|
+-------------v--------------+
| Key deleted from store |
| (openmls manages cleanup) |
+----------------------------+
```
### Key Properties
- **Generation:** Handled internally by the openmls backend (`StoreCrypto`).
The `generate_key_package()` method on `GroupMember` triggers this.
- **Storage:** The `DiskKeyStore` is either ephemeral (in-memory `HashMap`) or
persistent (bincode-serialized to disk). The init private key is stored as a
JSON-serialized `MlsEntity` keyed by the init key reference bytes.
- **Single use:** Per RFC 9420, each HPKE init key is used exactly once. This
prevents replay attacks and ensures forward secrecy of the initial key exchange.
- **Critical constraint:** The same `GroupMember` instance (and therefore the same
`StoreCrypto` backend) that called `generate_key_package()` must later call
`join_group()`. If a different backend is used, the init private key will not be
found and `new_from_welcome()` will fail.
- **Persistence mode:** `DiskKeyStore::persistent(path)` writes the entire
key-value map to disk on every store/delete operation, ensuring HPKE init keys
survive process restarts.
## MLS Epoch Keys
**Managed by:** `openmls` (internal to the `MlsGroup` state machine)
MLS epoch keys are derived internally by the openmls ratchet tree. They are not
directly accessible in quicnprotochat code but are critical to understanding the
system's security properties.
### Lifecycle
```text
+----------------------------+
| create_group(group_id) |
| or join_group(welcome) |
+-------------+--------------+
|
Epoch 0: initial key material
derived from ratchet tree
|
+-------------v--------------+
| send_message(plaintext) |
| encrypts with current |
| epoch's application key |
+-------------+--------------+
|
+-------------v--------------+
| add_member() / Commit |
| merge_pending_commit() |
| or merge_staged_commit() |
+-------------+--------------+
|
Epoch advances: new key
material derived from
updated ratchet tree
|
+-------------v--------------+
| Old epoch keys deleted |
| by openmls internally |
| (forward secrecy) |
+----------------------------+
```
### Key Properties
- **Derivation:** Each epoch's key material is derived from the ratchet tree, a
binary tree where each leaf represents a group member. Internal nodes hold
derived key material. See
[Post-Compromise Security](post-compromise-security.md) for details.
- **Advancement:** Epochs advance when a Commit is processed --
`merge_pending_commit()` (for the sender) or `merge_staged_commit()` (for
receivers).
- **Deletion:** Old epoch keys are deleted after the Commit is processed. This
deletion is what provides [forward secrecy](forward-secrecy.md) at the MLS
layer.
- **No direct access:** quicnprotochat code interacts with epoch keys only
indirectly through `send_message()` and `receive_message()`.
## Hybrid KEM Keys (Future -- M5+)
**Source:** `crates/quicnprotochat-core/src/hybrid_kem.rs`
The hybrid KEM keypair combines X25519 (classical) with ML-KEM-768
(post-quantum) for content encryption that resists both classical and quantum
attacks.
### Lifecycle
```text
+----------------------------+
| HybridKeypair::generate() |
| OsRng for both components |
+-------------+--------------+
|
+-------------v--------------+
| HybridKeypair |
| x25519_sk: StaticSecret | 32B (ZeroizeOnDrop)
| x25519_pk: PublicKey | 32B
| mlkem_dk: DecapsulationKey | 2400B
| mlkem_ek: EncapsulationKey | 1184B
+-------------+--------------+
|
+--------------+--------------+
| |
public_key() to_bytes()
-> HybridPublicKey -> HybridKeypairBytes
(32B + 1184B) (for persistence)
| |
Distributed to peers Stored securely
for encryption (serde Serialize)
|
+----v----+
| Sender |
| hybrid_encrypt(pk, pt) |
| 1. Ephemeral X25519 DH |
| 2. ML-KEM-768 encapsulate |
| 3. HKDF(x25519_ss||mlkem_ss)|
| 4. ChaCha20-Poly1305 AEAD |
+----+----+
|
Envelope: ver(1) | eph_pk(32) | mlkem_ct(1088) | nonce(12) | ct(var)
|
+----v----+
| Recipient |
| hybrid_decrypt(kp, env) |
| 1. X25519 DH with eph_pk |
| 2. ML-KEM-768 decapsulate |
| 3. HKDF derive same key |
| 4. ChaCha20-Poly1305 decrypt|
+---------+
```
### Key Properties
- **Generation:** Both X25519 and ML-KEM-768 components use `OsRng`.
- **Key sizes:** X25519 secret: 32B, ML-KEM-768 decapsulation key: 2400B,
encapsulation key: 1184B.
- **Serialization:** `HybridKeypairBytes` (serde) stores all components for
persistence. `HybridPublicKey` stores only the public portions.
- **Zeroization of IKM:** The combined shared secret (`x25519_ss || mlkem_ss`)
is wrapped in `Zeroizing<Vec<u8>>` and cleared after HKDF derivation.
- **Ephemeral per encryption:** Each call to `hybrid_encrypt` generates a fresh
`EphemeralSecret` for X25519, ensuring that even if the static keypair is
compromised, past encryptions remain protected.
- **Integration timeline:** M5 will integrate this into the MLS crypto provider.
See [Post-Quantum Readiness](post-quantum-readiness.md).
## Zeroization Summary
| Key Type | Zeroization Mechanism | When |
|----------|----------------------|------|
| Ed25519 seed | `Zeroizing<[u8; 32]>` | `IdentityKeypair` drop |
| Ed25519 seed (accessor) | Plain `[u8; 32]` copy | Caller responsibility |
| X25519 private | `ZeroizeOnDrop` (x25519-dalek) | `NoiseKeypair` drop |
| X25519 private (accessor) | `Zeroizing<[u8; 32]>` | Accessor drop |
| HPKE init private | Managed by openmls/`DiskKeyStore` | After Welcome processing |
| MLS epoch keys | Managed by openmls internally | After Commit processing |
| Hybrid IKM | `Zeroizing<Vec<u8>>` | After HKDF derivation |
| Hybrid X25519 static | `ZeroizeOnDrop` (x25519-dalek) | `HybridKeypair` drop |
| Hybrid ephemeral | `EphemeralSecret` (x25519-dalek) | After DH computation |
## Security Considerations
1. **Memory residue:** Zeroization prevents key material from lingering in freed
memory, but it does not protect against an attacker with live memory access
(e.g., a debugger or cold-boot attack). Full protection would require
hardware-backed key storage (e.g., HSM, TPM, or OS keychain), which is not
yet implemented.
2. **Swap and core dumps:** If the process's memory is swapped to disk or
written to a core dump, key material may persist on non-volatile storage.
Mitigations include `mlock()` (not yet implemented) and disabling core dumps.
3. **Compiler optimizations:** The `zeroize` crate uses compiler barriers
(`core::sync::atomic::compiler_fence`) to prevent the optimizer from eliding
the zeroing write as a dead store.
4. **Copies via `seed_bytes()`:** The `IdentityKeypair::seed_bytes()` method
returns a plain `[u8; 32]`. The caller (typically the persistence layer) is
responsible for zeroizing this copy after writing it to disk.
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory
- [Ed25519 Identity Keys](identity-keys.md) -- identity key details
- [X25519 Transport Keys](transport-keys.md) -- transport key details
- [Forward Secrecy](forward-secrecy.md) -- how key deletion enables FS
- [Post-Compromise Security](post-compromise-security.md) -- epoch advancement
- [Post-Quantum Readiness](post-quantum-readiness.md) -- hybrid KEM integration

View File

@@ -0,0 +1,102 @@
# Cryptography Overview
quicnprotochat layers multiple cryptographic protocols to provide confidentiality,
integrity, authentication, forward secrecy, and post-compromise security. This
page catalogues every algorithm in the system, the crate that supplies it, and
the security margin it provides.
## Algorithm Inventory
| Algorithm | Purpose | Crate | Security Level |
|-----------|---------|-------|----------------|
| Ed25519 | Identity signing, MLS credentials | `ed25519-dalek 2` | 128-bit classical |
| X25519 | Noise DH, MLS HPKE key exchange | `x25519-dalek 2` | 128-bit classical |
| ChaCha20-Poly1305 | Noise AEAD | `chacha20poly1305 0.10` | 256-bit key |
| AES-128-GCM | MLS AEAD | `openmls` (via RustCrypto) | 128-bit |
| BLAKE2s | Noise hash function | `snow 0.9` (built-in) | 128-bit |
| SHA-256 | Key fingerprints, HKDF | `sha2 0.10` | 128-bit collision resistance |
| ML-KEM-768 | Post-quantum KEM | `ml-kem 0.2` | NIST Level 3 (~192-bit PQ) |
| HKDF-SHA256 | Key derivation | `hkdf 0.12` | Depends on input entropy |
> **Note:** The system provides 128-bit classical security throughout. When the
> hybrid KEM is active (M5 onward), content encryption gains 192-bit
> post-quantum security via ML-KEM-768.
## Where Each Algorithm Appears
### Transport Layer
The transport layer uses two independent encryption substrates:
1. **QUIC/TLS 1.3** (via `quinn 0.11` + `rustls 0.23`): Provides the
outermost encrypted tunnel. The TLS 1.3 handshake negotiates an ephemeral
ECDHE key exchange (X25519 or P-256, depending on the peer) and an AEAD
cipher (AES-128-GCM or ChaCha20-Poly1305). This layer protects connection
metadata from passive network observers.
2. **Noise\_XX** (via `snow 0.9`): Runs inside the QUIC stream. The Noise
pattern `Noise_XX_25519_ChaChaPoly_BLAKE2s` provides mutual authentication
using static X25519 keys, with ChaCha20-Poly1305 as the AEAD and BLAKE2s
as the hash function. See [X25519 Transport Keys](transport-keys.md) for
details on the keypair.
### Application Layer
1. **MLS (RFC 9420)** (via `openmls 0.5`): Provides end-to-end encrypted
group messaging. The ciphersuite is
`MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`, which uses:
- X25519 for DHKEM (HPKE key exchange)
- AES-128-GCM for content encryption
- SHA-256 for the KDF and transcript hashing
- Ed25519 for signing Commits, Proposals, and credentials
2. **Hybrid KEM** (via `ml-kem 0.2` + `x25519-dalek 2` + `hkdf 0.12`):
An outer encryption layer combining X25519 and ML-KEM-768. The combined
shared secret is derived through HKDF-SHA256 and used with
ChaCha20-Poly1305 for AEAD. See
[Post-Quantum Readiness](post-quantum-readiness.md) for integration plans.
### Identity Layer
- **Ed25519** provides long-term identity signing. Each client generates a
single Ed25519 keypair that serves as the MLS `BasicCredential`, the
Authentication Service registration key, and the delivery queue index. See
[Ed25519 Identity Keys](identity-keys.md).
- **SHA-256** computes key fingerprints -- a 32-byte digest of the Ed25519
public key bytes used for compact, collision-resistant identification in logs
and protocol messages.
## Security Level Summary
All classical algorithms in the system target at least 128-bit security. The
post-quantum component (ML-KEM-768) targets NIST Level 3, which corresponds to
roughly 192-bit security against quantum adversaries.
The weakest classical link is the 128-bit security level of AES-128-GCM in the
MLS ciphersuite. This is consistent with the IETF's recommended MLS ciphersuite
and is considered adequate for the foreseeable future.
```text
Layer Classical Security Post-Quantum Security
--------------------------------------------------------------------
QUIC/TLS 1.3 128-bit (ECDHE) None (classical only)
Noise_XX 128-bit (X25519) None (classical only)
MLS (content) 128-bit (AES-128-GCM) None (classical only)
Hybrid KEM (M5+) 128-bit (X25519) ~192-bit (ML-KEM-768)
```
See the [Threat Model](threat-model.md) for a discussion of what is and is not
protected, and [Forward Secrecy](forward-secrecy.md) and
[Post-Compromise Security](post-compromise-security.md) for the advanced
security properties these algorithms enable.
## Related Pages
- [Ed25519 Identity Keys](identity-keys.md) -- long-term signing keypair
- [X25519 Transport Keys](transport-keys.md) -- Noise handshake keypair
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- creation through destruction
- [Forward Secrecy](forward-secrecy.md) -- past message protection
- [Post-Compromise Security](post-compromise-security.md) -- future message recovery
- [Post-Quantum Readiness](post-quantum-readiness.md) -- ML-KEM-768 hybrid KEM
- [Threat Model](threat-model.md) -- attacker models and known gaps

View File

@@ -0,0 +1,239 @@
# Post-Compromise Security
Post-compromise security (PCS) is a property of a cryptographic protocol that
guarantees: **after an attacker compromises a participant's state, the protocol
automatically heals so that future messages are protected.** The attacker's
window of access is limited to the current epoch; once the compromised member (or
any other member) issues an Update or Commit, the group state is re-randomized
and the attacker is locked out.
PCS is the complement of [forward secrecy](forward-secrecy.md):
- **Forward secrecy** protects the **past** from a future compromise.
- **Post-compromise security** protects the **future** from a past compromise.
MLS (RFC 9420) is specifically designed to provide both properties simultaneously
for group messaging. This is a key differentiator of quicnprotochat's design.
## How MLS Provides PCS
### The Ratchet Tree
At the heart of MLS's PCS mechanism is the **ratchet tree**, a binary tree where:
- Each **leaf** represents a group member and contains their public key material.
- Each **internal node** holds derived key material computed from its children.
- The **root** of the tree determines the epoch's group key material.
```text
[Root]
/ \
[A,B] [C,D]
/ \ / \
[A] [B] [C] [D]
```
When a member updates their leaf (by generating fresh key material and issuing an
Update proposal or Commit), the change propagates up the tree path from the leaf
to the root:
```text
[Root]* <- re-derived
/ \
[A,B]* [C,D] <- re-derived
/ \ / \
[A]* [B] [C] [D] <- A updated leaf
```
Every node marked with `*` receives new key material. This means the new epoch's
group secrets depend on A's freshly generated randomness. An attacker who
previously compromised A's state must now also compromise the new key material --
which they cannot do because it was generated after the compromise was detected
(or automatically healed by the protocol).
### Cost
The path from a leaf to the root has length O(log n) for a group of n members.
This means:
- An Update/Commit produces O(log n) encrypted path secrets.
- Each group member processes the Update in O(log n) time.
- This is dramatically more efficient than pairwise rekeying (O(n)) or
broadcast encryption.
For a group of 1,000 members, the path length is approximately 10 nodes --
making PCS practical even for large groups.
## Epoch Advancement in quicnprotochat
In the current implementation, epoch advancement occurs through the `GroupMember`
methods in `group.rs`:
### Adding a Member (Commit)
When `add_member()` is called, openmls creates a Commit that adds the new member
and updates the ratchet tree:
```rust
// Alice adds Bob -- this creates a Commit + Welcome
let (commit_bytes, welcome_bytes) = alice.add_member(&bob_kp)?;
// Alice merges the pending Commit, advancing her epoch
// Internally, openmls re-derives the ratchet tree with Bob's leaf
group.merge_pending_commit(&self.backend)?;
```
After `merge_pending_commit()`, Alice's group is in a new epoch with fresh key
material. Any attacker who had compromised Alice's state before this Commit
must now also compromise the new epoch's keys.
### Receiving a Commit
When a member receives and processes a Commit from another member:
```rust
ProcessedMessageContent::StagedCommitMessage(staged) => {
// Merge the Commit into local state -- epoch advances
group.merge_staged_commit(&self.backend, *staged)?;
Ok(None)
}
```
This advances the receiver's epoch, incorporating the committer's fresh key
material into the ratchet tree.
### Future: Periodic Updates
The current implementation only advances epochs when members are added. A more
robust PCS strategy involves periodic Update proposals, where members
re-randomize their leaf key material on a regular schedule (e.g., every hour, or
every N messages). This is planned for future milestones and will look like:
```text
1. Member generates fresh leaf key material
2. Member creates an Update proposal
3. Any member (or the updater) creates a Commit including the Update
4. All members process the Commit and advance to the new epoch
5. The attacker's compromised state is now stale
```
## PCS vs Forward Secrecy
These two properties are often confused but protect against different attack
scenarios:
| Property | Protects | Mechanism | Threat |
|----------|----------|-----------|--------|
| Forward Secrecy | Past messages | Delete old epoch keys | Attacker compromises state **now**, tries to read **past** |
| Post-Compromise Security | Future messages | Re-randomize ratchet tree | Attacker compromised state **before**, tries to read **future** |
Together, they provide a strong guarantee: the attacker's window of access is
limited to the **current epoch only**. Past epochs are protected by FS (old keys
deleted), and future epochs are protected by PCS (new key material generated).
```text
Past epochs Current epoch Future epochs
+-----------------+ +-----------------+ +-----------------+
| Protected by | | Attacker has | | Protected by |
| Forward | | access if they | | Post-Compromise|
| Secrecy | | hold current | | Security |
| (keys deleted) | | epoch keys | | (tree updated) |
+-----------------+ +-----------------+ +-----------------+
```
## Comparison with Signal Groups
Signal's group messaging uses **Sender Keys**, a fundamentally different
mechanism from MLS's ratchet tree. The comparison is instructive because it
highlights why MLS was chosen for quicnprotochat:
### Signal Sender Keys
In Signal's group protocol:
1. Each member generates a Sender Key -- a symmetric key used to encrypt all
messages they send to the group.
2. The Sender Key is distributed to all group members via pairwise Signal
sessions.
3. Each message from a sender is encrypted with their Sender Key.
4. The Sender Key includes a symmetric ratchet (hash ratchet) that advances per
message, providing forward secrecy within a sender's chain.
**The critical limitation:** Sender Keys do **not** provide post-compromise
security. If an attacker compromises a member's Sender Key:
- The attacker can derive all future message keys from that sender (the hash
ratchet is one-way, but the attacker has the current state).
- The key is only rotated when the member manually refreshes it or when group
membership changes.
- There is no automatic healing mechanism analogous to MLS's ratchet tree.
### MLS Ratchet Tree (quicnprotochat)
In contrast, MLS's ratchet tree provides PCS because:
1. Any member can issue an Update that re-randomizes their leaf.
2. The Commit propagates new key material up the tree, affecting the group
secret.
3. The attacker, who holds old state, cannot predict the new randomness.
4. The group automatically heals after at most one epoch advance.
| Property | Signal Sender Keys | MLS Ratchet Tree |
|----------|-------------------|-----------------|
| Forward secrecy (group) | Per-sender hash ratchet | Per-epoch (Commit) |
| Post-compromise security | **No** -- compromised Sender Key reveals all future messages from that sender | **Yes** -- any Commit/Update heals the tree |
| Key rotation | Manual or on membership change | Any Commit (add/remove/update) |
| Healing time | Until manual rotation | Next epoch (automatic) |
| Cost per update | O(n) pairwise re-encryption | O(log n) tree path |
## Practical Implications
### What happens during a compromise?
Suppose an attacker compromises Member A's MLS state (including the current epoch
keys):
1. **Current epoch:** The attacker can decrypt all messages in the current epoch
from all members (because epoch keys are shared group secrets).
2. **Past epochs:** Protected by forward secrecy. Old epoch keys have been
deleted by openmls.
3. **After the next Commit:** Any member (including A, after recovering) can
issue a Commit. The ratchet tree is updated with fresh key material. The
attacker's stale state cannot derive the new epoch keys. The attacker is
locked out.
### How quickly does healing occur?
In the current implementation, healing occurs whenever:
- A new member is added (`add_member()` issues a Commit).
- A member is removed (not yet implemented, but will issue a Commit).
- In the future: periodic Update proposals are issued on a timer.
The practical healing window is the time between Commits. For active groups
with frequent membership changes, this window is small. For static groups,
periodic Updates (planned) will bound the healing window.
### Server compromise does not prevent PCS
The quicnprotochat server is MLS-unaware -- it stores and forwards encrypted
MLS messages without access to the group state. A compromised server cannot:
- Prevent PCS by blocking Commits (it could perform denial-of-service, but
cannot selectively suppress Update proposals without being detected, because
MLS epoch numbers must be sequential).
- Inject fraudulent Commits (it lacks the signing key of any group member).
See [Threat Model](threat-model.md) for the full analysis of a compromised
server.
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory
- [Forward Secrecy](forward-secrecy.md) -- the complementary property
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- how key deletion enables FS and PCS
- [Threat Model](threat-model.md) -- attacker models including compromised clients
- [Post-Quantum Readiness](post-quantum-readiness.md) -- PQ protection for PCS mechanisms
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- protocol deep dive

View File

@@ -0,0 +1,256 @@
# Post-Quantum Readiness
quicnprotochat includes a fully implemented and tested hybrid key encapsulation
mechanism (KEM) combining X25519 (classical) with ML-KEM-768 (post-quantum).
This page describes the current implementation, the integration plan, the
security rationale, and the known gaps.
**Source:** `crates/quicnprotochat-core/src/hybrid_kem.rs`
## Current State
The hybrid KEM is **fully implemented and tested** in `quicnprotochat-core`. The
implementation provides:
- `HybridKeypair::generate()` -- generate a combined X25519 + ML-KEM-768 keypair
- `hybrid_encrypt(recipient_pk, plaintext)` -- encrypt to a hybrid public key
- `hybrid_decrypt(keypair, envelope)` -- decrypt with the hybrid private key
- Serialization/deserialization for both keypairs and public keys
- Comprehensive test coverage: round-trip, wrong-key rejection, tampering
detection, version validation, large payload handling
The test suite in `hybrid_kem.rs` includes 10 tests covering:
- Basic encrypt/decrypt round-trip
- Wrong key decryption failure
- Tampered AEAD ciphertext detection
- Tampered ML-KEM ciphertext detection
- Tampered X25519 ephemeral public key detection
- Unsupported version rejection
- Envelope-too-short rejection
- Keypair serialization round-trip
- Public key serialization round-trip
- Large payload (50 KB) round-trip
## ML-KEM-768 (FIPS 203)
ML-KEM (Module-Lattice-Based Key Encapsulation Mechanism) is the NIST-standardized
post-quantum KEM, published as FIPS 203. quicnprotochat uses ML-KEM-768, the
middle parameter set:
| Parameter Set | NIST Level | Security (PQ) | EK Size | CT Size | SS Size |
|---------------|-----------|---------------|---------|---------|---------|
| ML-KEM-512 | 1 | ~128-bit | 800 B | 768 B | 32 B |
| **ML-KEM-768** | **3** | **~192-bit** | **1184 B** | **1088 B** | **32 B** |
| ML-KEM-1024 | 5 | ~256-bit | 1568 B | 1568 B | 32 B |
### Why ML-KEM-768 (not 512 or 1024)?
- **Level 3 provides a strong security margin.** The ~192-bit post-quantum
security level exceeds the 128-bit classical security of the rest of the
system, providing headroom against future cryptanalytic advances.
- **Moderate key and ciphertext sizes.** The encapsulation key (1184 bytes) and
ciphertext (1088 bytes) are large compared to X25519 (32 bytes each) but
manageable for a messaging protocol. ML-KEM-1024 would add ~400 bytes to each
with diminishing returns.
- **Consistent with industry practice.** Signal, Google Chrome, and Cloudflare
have all deployed ML-KEM-768 (or its predecessor Kyber-768) in production.
The `ml-kem 0.2` crate provides a pure-Rust implementation of FIPS 203 with all
three parameter sets compiled in by default.
## Hybrid Construction
The hybrid KEM follows the combiner approach described in
`draft-ietf-tls-hybrid-design`. Both a classical and a post-quantum KEM are
executed, and their shared secrets are combined through a KDF:
### Key Derivation
```text
ikm = X25519_shared_secret(32 bytes) || ML-KEM_shared_secret(32 bytes)
key = HKDF-SHA256(salt=[], ikm, info="quicnprotochat-hybrid-v1", L=32)
nonce = HKDF-SHA256(salt=[], ikm, info="quicnprotochat-hybrid-nonce-v1", L=12)
```
The combined IKM (input key material) is wrapped in `Zeroizing<Vec<u8>>` and
cleared after HKDF expansion.
### Why a Hybrid?
A pure ML-KEM deployment would be vulnerable if lattice-based cryptography is
broken (which, while considered unlikely, cannot be ruled out for a newly
standardized algorithm). A pure X25519 deployment provides no post-quantum
protection. The hybrid approach provides a "belt and suspenders" guarantee:
- If ML-KEM-768 is broken, X25519 still provides 128-bit classical security.
- If X25519 is broken (by a quantum computer), ML-KEM-768 still provides
~192-bit post-quantum security.
- Both must be broken simultaneously to compromise the shared secret.
## Wire Format
The hybrid envelope uses a versioned binary format:
```text
Offset Length Field
------ ------ -----
0 1 Version byte (0x01)
1 32 X25519 ephemeral public key
33 1088 ML-KEM-768 ciphertext
1121 12 ChaCha20-Poly1305 nonce
1133 var ChaCha20-Poly1305 AEAD ciphertext (plaintext + 16-byte tag)
```
Total overhead: **1133 bytes** of header + 16 bytes of AEAD tag = **1149 bytes**
per message (in addition to the plaintext length).
The version byte enables future algorithm agility. Version `0x01` denotes
X25519 + ML-KEM-768 + HKDF-SHA256 + ChaCha20-Poly1305.
## Encryption Flow
```text
Sender Recipient
------ ---------
HybridKeypair::generate()
├── x25519_sk, x25519_pk
└── mlkem_dk, mlkem_ek
public_key()│
┌─── HybridPublicKey ◄──────────────────────────────┘
│ (x25519_pk: 32B, mlkem_ek: 1184B)
│ hybrid_encrypt(pk, plaintext):
│ 1. eph_sk = EphemeralSecret::random()
│ 2. eph_pk = PublicKey::from(&eph_sk)
│ 3. x25519_ss = eph_sk.diffie_hellman(pk.x25519_pk)
│ 4. (mlkem_ct, mlkem_ss) = pk.mlkem_ek.encapsulate()
│ 5. ikm = x25519_ss || mlkem_ss
│ 6. (key, nonce) = HKDF(ikm, info)
│ 7. ct = ChaCha20Poly1305::encrypt(key, nonce, plaintext)
│ 8. envelope = ver || eph_pk || mlkem_ct || nonce || ct
└──────────── envelope ──────────────────────────────►
hybrid_decrypt(kp, envelope):
1. Parse ver, eph_pk, mlkem_ct, nonce, ct
2. x25519_ss = kp.x25519_sk.dh(eph_pk)
3. mlkem_ss = kp.mlkem_dk.decapsulate(mlkem_ct)
4. ikm = x25519_ss || mlkem_ss
5. (key, _) = HKDF(ikm, info)
6. plaintext = ChaCha20Poly1305::decrypt(key, nonce, ct)
```
## Integration Plan (M5)
The hybrid KEM is currently a standalone module. Milestone M5 will integrate it
into the MLS pipeline by creating a custom `OpenMlsCryptoProvider` that uses the
hybrid KEM for HPKE init key exchange:
1. **Custom crypto provider:** Wrap the existing `StoreCrypto` with a hybrid KEM
layer that intercepts HPKE operations and replaces the classical X25519 DHKEM
with the hybrid X25519 + ML-KEM-768 KEM.
2. **KeyPackage extension:** Store the hybrid public key (1216 bytes:
32B X25519 + 1184B ML-KEM) in a custom MLS extension within the KeyPackage.
3. **Welcome encryption:** When creating a Welcome message, the hybrid KEM
encrypts the group secrets instead of (or in addition to) the standard HPKE.
4. **Backward compatibility:** Groups can negotiate whether to use hybrid KEM
via the MLS group context extensions. Classical-only clients can still
participate in groups that do not require PQ protection.
## The PQ Gap (ADR-006)
There is an important asymmetry in quicnprotochat's post-quantum protection:
```text
Layer Classical Protection Post-Quantum Protection
---------------------------------------------------------------------
QUIC/TLS 1.3 Yes (ECDHE) No
Noise_XX Yes (X25519) No
MLS content (M5+) Yes (X25519 DHKEM) Yes (hybrid KEM)
```
**What this means:**
- **Message content** (the MLS application data) is protected against quantum
adversaries from M5 onward. An attacker with a quantum computer cannot decrypt
the message payload.
- **Transport metadata** (who connects to the server, when, message sizes) is
protected only by classical cryptography. A quantum attacker who recorded the
TLS/Noise handshake transcripts could, in theory, recover the transport session
keys and observe the metadata.
This is the **PQ gap**: content is safe, but metadata is not.
### Why not PQ transport?
Post-quantum TLS (via ML-KEM in the TLS 1.3 handshake) is being standardized by
the IETF and is supported by some TLS libraries, but `rustls` does not yet
support it in a stable release. When `rustls` adds ML-KEM support, quicnprotochat
will adopt it to close the PQ gap at the transport layer.
Similarly, post-quantum Noise patterns are an active research area but are not
yet standardized. The `snow` crate does not currently support post-quantum DH
primitives.
## Harvest-Now, Decrypt-Later Risk
The "harvest-now, decrypt-later" (HNDL) threat model assumes an adversary who:
1. Records all encrypted traffic today (inexpensive storage).
2. Waits for a sufficiently powerful quantum computer (years or decades).
3. Decrypts the recorded traffic retroactively.
In quicnprotochat's case:
- **Content is safe from M5 onward.** The hybrid KEM wrapping MLS content uses
ML-KEM-768, which resists quantum attacks. Even if the recorded traffic is
decrypted at the transport layer, the MLS ciphertext inside is still protected.
- **Transport metadata is at risk.** An HNDL attacker who records TLS/Noise
handshakes today could, with a future quantum computer, recover the transport
session keys and observe:
- Which clients connected to the server and when.
- Message sizes and timing patterns.
- The encrypted MLS blobs (which they still cannot decrypt if hybrid KEM is
active).
- **Content before M5 is at risk.** Messages sent before the hybrid KEM
integration (M5) use classical-only MLS encryption. If the HPKE init key
exchange used X25519-only DHKEM, a quantum attacker could recover the HPKE
shared secret and decrypt the Welcome message, gaining access to the group
state.
This risk is the primary motivation for deploying the hybrid KEM as early as
possible.
## Key Sizes and Performance
| Component | Key/Ciphertext | Size |
|-----------|---------------|------|
| X25519 public key | `x25519_pk` | 32 bytes |
| X25519 shared secret | DH result | 32 bytes |
| ML-KEM-768 encapsulation key | `mlkem_ek` | 1,184 bytes |
| ML-KEM-768 decapsulation key | `mlkem_dk` | 2,400 bytes |
| ML-KEM-768 ciphertext | `mlkem_ct` | 1,088 bytes |
| ML-KEM-768 shared secret | KEM result | 32 bytes |
| Hybrid public key | `x25519_pk + mlkem_ek` | 1,216 bytes |
| Hybrid envelope overhead | Header + AEAD tag | 1,149 bytes |
The ML-KEM-768 operations (keygen, encapsulate, decapsulate) are fast in
software -- typically sub-millisecond on modern hardware. The primary cost is
bandwidth, not computation.
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory including ML-KEM-768
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- hybrid KEM key lifecycle
- [Forward Secrecy](forward-secrecy.md) -- how FS interacts with PQ protection
- [Threat Model](threat-model.md) -- harvest-now-decrypt-later in context
- [Hybrid KEM: X25519 + ML-KEM-768](../protocol-layers/hybrid-kem.md) -- protocol layer details

View File

@@ -0,0 +1,332 @@
# Threat Model
This page defines the attacker models quicnprotochat is designed to resist,
catalogues what is and is not protected, identifies known gaps in the current
implementation, and outlines future mitigations.
## Attacker Models
### 1. Passive Eavesdropper
**Capabilities:** Records all network traffic between clients and the server.
Can observe IP addresses, connection timing, message sizes, and encrypted
payloads. Cannot modify traffic.
**What they learn:**
- Connection metadata: which IP addresses connect to the server and when.
- Message timing and sizes: observable patterns (e.g., message frequency,
payload lengths) that could reveal communication patterns.
- Encrypted payloads: TLS 1.3 ciphertext containing Noise ciphertext containing
MLS ciphertext. Three layers of encryption must be broken to access content.
**What they cannot learn:**
- Message content: protected by MLS encryption inside Noise inside TLS.
- Group membership details: MLS Commits are encrypted.
- Which specific recipient a message is destined for (from the network
perspective, all messages go to the server).
**Residual risk:** Traffic analysis. Even without decryption, the timing and
size of messages can reveal communication patterns. For example, a message sent
by Alice followed shortly by a message to Bob may indicate they are in the same
group. See [Future Mitigations](#future-mitigations) for countermeasures.
### 2. Active Network Attacker (MITM)
**Capabilities:** Can intercept, modify, drop, and inject network traffic.
Positioned between the client and server (e.g., compromised router, ISP, or
state-level adversary).
**What they can do:**
- Attempt TLS 1.3 MITM: TLS 1.3 prevents this if the client validates the
server's certificate. However, quicnprotochat currently uses **self-signed
certificates**, which means the client has no CA chain to verify. On the first
connection, a MITM could present their own certificate and intercept the
session (trust-on-first-use vulnerability).
- Denial of service: drop or delay packets.
- Traffic analysis: same as passive eavesdropper, with the added ability to
inject timing perturbations.
**What they cannot do (assuming no cert MITM):**
- Decrypt TLS/Noise traffic: both use authenticated ephemeral key exchange.
- Forge MLS messages: MLS Commits and application messages are signed by the
sender's Ed25519 identity key. The attacker does not possess any member's
signing key.
- Inject members into MLS groups: adding a member requires a valid KeyPackage
signed by the new member's identity key.
**Current weakness:** Self-signed TLS certificates. See
[Known Gaps](#known-gaps).
### 3. Compromised Server
**Capabilities:** Full access to the server's memory, disk, and network
interfaces. Can read all data stored on the server, modify server behavior,
and observe all client connections.
**What the server sees:**
- Connection metadata: which clients connect, when, how often, from which IPs.
- Ed25519 public keys: used as delivery queue indices. The server knows the
public identity key of every registered client.
- Message sizes and timing: the server forwards MLS messages and can observe
their sizes and the timing of enqueue/fetch operations.
- Encrypted MLS blobs: the server stores and forwards MLS ciphertext but cannot
decrypt it (the server is MLS-unaware by design, per ADR-004).
**What the server cannot do:**
- **Decrypt message content:** The server does not hold any MLS group keys. MLS
application messages are encrypted end-to-end between group members. The
server sees only opaque ciphertext.
- **Forge MLS messages:** MLS messages are signed by the sender's Ed25519 key.
The server does not possess any member's signing key and cannot produce valid
MLS signatures.
- **Read past messages:** Even if the server stored old MLS ciphertext, it
cannot decrypt it. Forward secrecy at the MLS layer (epoch key deletion)
ensures that even compromising a member's state in the future does not reveal
past epoch keys.
**What the server can do maliciously:**
- **Traffic analysis:** Correlate senders and recipients based on timing,
message sizes, and queue access patterns.
- **Selective denial of service:** Drop or delay specific messages or refuse
service to specific clients.
- **Metadata correlation:** Link Ed25519 public keys to IP addresses and
connection patterns.
- **Replay (limited):** Re-deliver an MLS message. MLS has replay protection
via epoch numbers and message counters, so the recipient will reject the
duplicate.
- **KeyPackage manipulation:** Withhold or substitute KeyPackages during the
join flow. If the server substitutes a KeyPackage, the resulting MLS group
would include the attacker's key, but the legitimate member would not be able
to join (they would not receive a matching Welcome). This is detectable.
### 4. Compromised Client
**Capabilities:** Full access to a group member's device, including the MLS
group state, Ed25519 identity key, and any stored messages.
**What the attacker learns:**
- **Current epoch messages:** The attacker can decrypt all messages in the
current MLS epoch from all group members (epoch keys are shared group secrets).
- **Identity key:** The attacker obtains the member's Ed25519 signing key and
can impersonate the member (sign messages, create KeyPackages).
**What the attacker cannot learn:**
- **Past epoch messages:** Protected by [forward secrecy](forward-secrecy.md).
Old epoch keys have been deleted by openmls.
- **Future epoch messages (after healing):** Protected by
[post-compromise security](post-compromise-security.md). After the next
Commit or Update, the ratchet tree is re-randomized and the attacker is
locked out.
**Healing mechanism:**
1. The compromised member (or any other member) issues a Commit.
2. The ratchet tree is updated with fresh key material.
3. The attacker's stale state cannot derive the new epoch keys.
4. The attacker is locked out of future epochs.
The healing window is the time between the compromise and the next Commit. See
[Post-Compromise Security](post-compromise-security.md) for details.
## What Is Protected
| Asset | Protection Mechanism | Against |
|-------|---------------------|---------|
| Message content | MLS end-to-end encryption (AES-128-GCM) | All attacker models |
| Message integrity | MLS signing (Ed25519) | Forgery by server or network |
| Group membership changes | MLS Commits (signed, authenticated) | Unauthorized modification |
| Key exchange material | Single-use HPKE init keys | Replay, forward compromise |
| Transport confidentiality | TLS 1.3 + Noise\_XX (double encryption) | Passive eavesdropper |
| Transport integrity | TLS 1.3 AEAD + Noise AEAD | Active network attacker |
| Past messages | Forward secrecy (epoch key deletion) | Future client compromise |
| Future messages | Post-compromise security (ratchet tree update) | Past client compromise |
## What Is NOT Protected (Current State)
| Asset | Visibility | Observer |
|-------|-----------|----------|
| Transport metadata (who connects, when) | IP addresses, connection timing | Network adversary, server |
| Message timing and sizes | Observable in TLS records | Network adversary, server |
| Recipient identity | Server routes by Ed25519 public key | Server |
| Sender identity (partial) | Server can correlate connections to senders | Server |
| Number of groups a client belongs to | Observable via message patterns | Server (with analysis) |
| Client IP address | Visible in TCP/QUIC connection | Server, network adversary |
## Known Gaps
### Self-Signed TLS Certificates
The server uses self-signed TLS certificates generated at startup via `rcgen`.
Clients currently accept any server certificate without CA chain validation.
This makes the system vulnerable to a man-in-the-middle attack on the first
connection: an attacker positioned between the client and server can present
their own certificate and intercept all traffic.
**Impact:** Complete loss of transport confidentiality and integrity for affected
connections. MLS content remains protected (the MITM cannot decrypt MLS
ciphertext or forge MLS signatures), but the attacker can observe encrypted MLS
blobs, perform traffic analysis, and potentially block or delay messages.
**Mitigation path:** Implement certificate pinning (trust-on-first-use) or
integrate with a public CA (e.g., Let's Encrypt). Certificate transparency logs
could provide an additional detection mechanism.
### No Client Authentication on the Delivery Service
The Delivery Service does not currently authenticate clients. Anyone who knows
a recipient's Ed25519 public key can enqueue messages for that recipient. This
enables spam and potential denial-of-service by flooding a recipient's queue.
**Impact:** Queue flooding, spam delivery. MLS provides its own authentication
(the recipient will reject messages not signed by a group member), so forged
content will not be accepted, but the recipient must still download and attempt
to process the spam.
**Mitigation path:** The AUTHZ\_PLAN introduces token-based authentication,
binding identityKey to accounts and requiring valid access tokens for all
DS operations.
### No Rate Limiting
The server does not currently enforce per-client or per-IP rate limits. A
malicious client could flood the server with requests, consuming resources and
degrading service for other users.
**Impact:** Denial of service.
**Mitigation path:** The AUTHZ\_PLAN specifies per-IP and per-account/device
rate limits (e.g., 50 requests/second, 5 MB payload cap).
### BasicCredential Only
MLS `BasicCredential` contains only the raw Ed25519 public key bytes. There is
no certificate authority chain, no credential revocation mechanism, and no
binding to a human-readable identity (e.g., phone number, email).
**Impact:** No way to verify that a public key belongs to a specific person
without out-of-band verification (e.g., comparing fingerprints in person). An
attacker who compromises the Authentication Service could substitute public keys.
**Mitigation path:** Implement X.509-based MLS credentials with a certificate
chain, or integrate with a Key Transparency system that provides a verifiable
log of public key bindings.
### Classical-Only Transport
As discussed in [Post-Quantum Readiness](post-quantum-readiness.md), the
transport layer (TLS 1.3, Noise\_XX) uses classical-only cryptography. An
adversary performing harvest-now-decrypt-later (HNDL) could record transport
traffic today and decrypt it with a future quantum computer, revealing transport
metadata.
**Impact:** Future exposure of transport metadata (not content, assuming
hybrid KEM is active for MLS).
**Mitigation path:** Adopt post-quantum TLS (ML-KEM in TLS 1.3 handshake) when
`rustls` supports it. Investigate post-quantum Noise patterns.
## Future Mitigations
### Sealed Sender
**Goal:** Hide the sender's identity from the server.
**Approach:** Encrypt the sender's identity inside the MLS ciphertext. The
server cannot determine who sent a message -- it only knows the recipient
(delivery queue index). Signal implements a version of this as "Sealed Sender."
**Benefit:** Reduces the server's metadata visibility from "who sent to whom"
to "someone sent to this recipient."
### Private Information Retrieval (PIR)
**Goal:** Fetch messages without revealing the recipient's identity to the
server.
**Approach:** Use PIR protocols (e.g., SealPIR, SimplePIR) to query the
delivery queue without the server learning which queue was accessed.
**Benefit:** Combined with Sealed Sender, this would make the server metadata-blind:
it would know only that "someone sent something to someone."
**Trade-off:** PIR is computationally expensive and may increase latency
significantly, especially for large mailboxes.
### Key Transparency
**Goal:** Detect public key substitution attacks.
**Approach:** Publish all Ed25519 public keys in a verifiable, append-only log
(similar to Certificate Transparency for TLS). Clients can audit the log to
verify that their public key has not been replaced by an attacker.
**Benefit:** Detects attacks where the server (or an attacker who compromised
the server) substitutes a victim's public key with the attacker's key.
### OPAQUE Authentication
**Goal:** Zero-knowledge password authentication.
**Approach:** Use the OPAQUE protocol (RFC 9497) for client-server
authentication. OPAQUE allows the client to prove knowledge of a password
without revealing it to the server, even during registration.
**Benefit:** The server never learns the client's password, preventing
credential theft in a server compromise.
### Tor/I2P Integration
**Goal:** Hide client IP addresses from the server and network adversaries.
**Approach:** Route QUIC connections through the Tor network or I2P. The server
sees only the Tor exit node's IP, not the client's real IP.
**Benefit:** Prevents the server and network adversaries from linking
connections to physical locations or ISP accounts.
**Trade-off:** Significant latency increase. QUIC over Tor requires careful
configuration to avoid leaking the real IP through WebRTC-style mechanisms.
### Padding and Traffic Shaping
**Goal:** Defeat traffic analysis based on message sizes and timing.
**Approach:** Pad all messages to fixed sizes (or random sizes from a
distribution) and send dummy messages at regular intervals to mask real
communication patterns.
**Benefit:** Makes it harder for network adversaries and the server to infer
communication patterns from traffic analysis.
**Trade-off:** Increased bandwidth usage.
## Summary Table
| Threat | Current Protection | Gap | Planned Fix |
|--------|-------------------|-----|-------------|
| Passive eavesdropper | TLS + Noise + MLS (3 layers) | Traffic analysis | Padding, Tor |
| Active MITM | TLS 1.3 + Noise\_XX | Self-signed certs | Cert pinning, CA |
| Compromised server | MLS E2E encryption | Metadata visible | Sealed Sender, PIR |
| Compromised client | FS + PCS | Current epoch exposed | Periodic Updates |
| Spam/flooding | None | No auth on DS | AUTHZ\_PLAN |
| Key substitution | None | BasicCredential only | Key Transparency |
| Quantum adversary (content) | Hybrid KEM (M5+) | Pre-M5 messages | Deploy hybrid ASAP |
| Quantum adversary (transport) | None | Classical TLS/Noise | PQ TLS, PQ Noise |
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory and security levels
- [Forward Secrecy](forward-secrecy.md) -- protecting past messages
- [Post-Compromise Security](post-compromise-security.md) -- protecting future messages
- [Post-Quantum Readiness](post-quantum-readiness.md) -- ML-KEM-768 and the PQ gap
- [Ed25519 Identity Keys](identity-keys.md) -- identity key used for MLS credentials
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- key destruction guarantees

View File

@@ -0,0 +1,191 @@
# X25519 Transport Keys
The X25519 transport keypair is used for mutual authentication in the Noise\_XX
handshake. Unlike the [Ed25519 identity key](identity-keys.md), which is a
signing key, the X25519 key performs Diffie-Hellman key exchange to establish
encrypted transport sessions.
**Source:** `crates/quicnprotochat-core/src/keypair.rs`
## Structure
The `NoiseKeypair` struct holds two fields:
```rust
pub struct NoiseKeypair {
/// Private scalar -- zeroized on drop via x25519_dalek's ZeroizeOnDrop impl.
private: StaticSecret,
/// Corresponding public key -- derived from private at construction time.
public: PublicKey,
}
```
| Field | Type | Size | Secret? |
|-------|------|------|---------|
| `private` | `x25519_dalek::StaticSecret` | 32 bytes | Yes -- `ZeroizeOnDrop` |
| `public` | `x25519_dalek::PublicKey` | 32 bytes | No -- safe to log/transmit |
## Key Generation
A fresh keypair is generated from the OS CSPRNG:
```rust
use quicnprotochat_core::keypair::NoiseKeypair;
let keypair = NoiseKeypair::generate();
// private: random 32-byte scalar from OsRng
// public: derived via Curve25519 scalar multiplication
```
Internally:
```rust
pub fn generate() -> Self {
let private = StaticSecret::random_from_rng(OsRng);
let public = PublicKey::from(&private);
Self { private, public }
}
```
The `StaticSecret::random_from_rng` call uses the operating system's CSPRNG
(`getrandom` on Linux, `SecRandomCopyBytes` on macOS) and is suitable for
generating long-lived static identity keys.
## Accessing Key Material
### Private Key Bytes
The `private_bytes()` method returns the raw 32-byte private scalar wrapped in
`Zeroizing<[u8; 32]>`:
```rust
pub fn private_bytes(&self) -> Zeroizing<[u8; 32]> {
Zeroizing::new(self.private.to_bytes())
}
```
The `Zeroizing` wrapper ensures the caller's copy of the key material is
overwritten with zeros when it goes out of scope. The intended usage pattern is
to pass the bytes directly to `snow::Builder` and let the wrapper drop
immediately:
```rust
let private = keypair.private_bytes();
let session = snow::Builder::new(params)
.local_private_key(&private[..])
.build_initiator()?;
// private is zeroized here when it falls out of scope.
```
### Public Key Bytes
The `public_bytes()` method returns a plain `[u8; 32]`:
```rust
pub fn public_bytes(&self) -> [u8; 32] {
self.public.to_bytes()
}
```
The public key is not secret and may be freely cloned, logged, or transmitted
over the wire.
## Zeroization Strategy
The `NoiseKeypair` has two layers of zeroization protection:
1. **`StaticSecret` (inner):** The `x25519_dalek` crate implements
`ZeroizeOnDrop` on `StaticSecret`. When the `NoiseKeypair` struct is dropped,
the private scalar is automatically overwritten with zeros.
2. **`Zeroizing<[u8; 32]>` (accessor):** When callers use `private_bytes()`, the
returned copy is also wrapped in `Zeroizing`, so the caller's copy is zeroed
on drop too.
This dual-layer approach ensures that key material does not linger in memory
whether the key is accessed by value or held in the struct.
## Debug Redaction
The `Debug` implementation intentionally redacts the private key and shows only
the first 4 bytes of the public key as a sanity identifier:
```rust
impl std::fmt::Debug for NoiseKeypair {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let pub_bytes = self.public_bytes();
write!(
f,
"NoiseKeypair {{ public: {:02x}{:02x}{:02x}{:02x}..., private: [redacted] }}",
pub_bytes[0], pub_bytes[1], pub_bytes[2], pub_bytes[3],
)
}
}
```
This prevents accidental leakage of secret material through logging or
`println!("{:?}", keypair)`.
## Role in Noise\_XX
The Noise\_XX handshake pattern performs mutual authentication: both initiator
and responder prove possession of their static X25519 keys. The handshake
proceeds in three messages:
```text
→ e (initiator sends ephemeral public key)
← e, ee, s, es (responder sends ephemeral + static, DH results)
→ s, se (initiator sends static, final DH result)
```
After the handshake completes, both parties have:
- Authenticated each other's static X25519 public keys.
- Derived symmetric transport keys from the DH shared secrets.
- Established forward secrecy via the ephemeral keys (which are discarded).
The `NoiseKeypair` provides the `s` (static) key in this pattern. Ephemeral keys
(`e`) are generated internally by `snow` during each handshake.
## Ephemeral vs Static
In the context of Noise\_XX:
- **Ephemeral keys** are generated per-handshake by `snow` and discarded after
key derivation. They provide forward secrecy.
- **Static keys** (the `NoiseKeypair`) are longer-lived and provide identity.
In the current implementation, the server generates a new `NoiseKeypair` per
process start, and the client generates one per connection.
## Persistence
`NoiseKeypair` **intentionally does not implement `Serialize`**. Key persistence
to disk is deferred to milestone M6, which will add:
- Appropriate file permission checks (e.g., `0600` on Unix).
- Optional passphrase-based encryption of the key file.
- A key rotation mechanism.
Until M6, the transport key is ephemeral to the process lifetime. This is
acceptable because the Noise key is not used for MLS group membership -- that
role belongs to the [Ed25519 identity key](identity-keys.md).
## Comparison with Ed25519 Identity Key
| Property | X25519 Noise Key | Ed25519 Identity Key |
|----------|-----------------|---------------------|
| Curve | Montgomery (Curve25519) | Twisted Edwards (Ed25519) |
| Operation | Diffie-Hellman key exchange | Digital signatures |
| Purpose | Noise\_XX mutual authentication | MLS credentials, AS registration |
| Lifetime | Per process / per connection | Permanent (per client) |
| Serialization | Not implemented | Serde (seed bytes) |
| Zeroize | `ZeroizeOnDrop` (x25519-dalek) | `Zeroizing<[u8; 32]>` (manual) |
| Source file | `keypair.rs` | `identity.rs` |
## Related Pages
- [Cryptography Overview](overview.md) -- algorithm inventory
- [Ed25519 Identity Keys](identity-keys.md) -- the other keypair
- [Key Lifecycle and Zeroization](key-lifecycle.md) -- full lifecycle diagram
- [Forward Secrecy](forward-secrecy.md) -- how ephemeral DH provides FS at the transport layer
- [Noise\_XX Handshake](../protocol-layers/noise-xx.md) -- protocol details

View File

@@ -0,0 +1,118 @@
# ADR-001: Noise\_XX for Transport Authentication
**Status:** Accepted
---
## Context
quicnprotochat needs mutual authentication at the transport layer: both client and server must prove their identity before any application data is exchanged. The standard solution is TLS with X.509 certificates, but this brings significant operational complexity:
- A Certificate Authority (CA) must be operated or purchased from.
- Certificates must be provisioned, rotated, and revoked.
- Client certificate authentication in TLS is cumbersome and poorly supported by many libraries.
- The X.509 PKI is a large attack surface with a long history of CA compromises.
An alternative is needed that provides mutual authentication with simpler key management, ideally using raw public keys rather than certificates.
### Alternatives considered
1. **TLS 1.3 with X.509 certificates.** Standard, widely deployed, but requires CA infrastructure. Client certificate authentication is possible but adds complexity. Later adopted for the QUIC transport (M3+), where server authentication is sufficient and client auth is handled at the application layer via the `Auth` struct.
2. **TLS 1.3 with Raw Public Keys (RFC 7250).** Eliminates the CA dependency but has limited library support. The `rustls` crate did not support RPK at the time of the M1 design.
3. **Noise Protocol Framework.** Purpose-built for authenticated key exchange using raw static keys. Multiple handshake patterns available. Mature specification with formal security analysis. Well-supported by the `snow` crate in Rust.
4. **WireGuard-style handshake.** Based on Noise\_IK. Assumes the initiator already knows the responder's static key. Does not provide identity hiding for the initiator.
---
## Decision
Use the **Noise\_XX handshake pattern** for the M1 transport layer. Both parties hold static X25519 keypairs that are registered out-of-band (e.g., via a future directory service, QR code, or manual configuration).
### Why Noise\_XX specifically?
The Noise Protocol Framework defines several handshake patterns, differing in which static keys are transmitted during the handshake:
| Pattern | Initiator static key | Responder static key | Identity hiding |
|---|---|---|---|
| **NN** | Not transmitted | Not transmitted | No authentication |
| **NK** | Not transmitted | Known to initiator | Server-only auth |
| **KK** | Known to responder | Known to initiator | Mutual auth, no identity hiding |
| **XX** | Transmitted (encrypted) | Transmitted (encrypted) | **Mutual auth + identity hiding for initiator** |
| **IK** | Transmitted (encrypted) | Known to initiator | Mutual auth, initiator identity hidden from passive observers |
**XX** was chosen because:
1. **Mutual authentication.** Both parties prove possession of their static private keys during the handshake. The server verifies the client's identity, and the client verifies the server's identity.
2. **Identity hiding for the initiator.** The initiator's static public key is transmitted encrypted under an ephemeral key, so a passive network observer cannot determine who is connecting. The responder's static key is also transmitted encrypted, though an active attacker performing a man-in-the-middle on the first message could learn it (this is inherent to any pattern where the responder's key is not pre-known).
3. **No pre-shared keys required.** Unlike IK or KK, the XX pattern does not require either party to know the other's static key before the handshake begins. This simplifies bootstrapping: a new client can connect to a server without prior key exchange.
4. **Three-message handshake.** XX completes in 3 messages (-> e, <- e ee s es, -> s se), which is one round-trip more than IK but provides stronger identity hiding guarantees.
### Cryptographic parameters
| Parameter | Value |
|---|---|
| Handshake pattern | `Noise_XX_25519_ChaChaPoly_SHA256` |
| DH function | X25519 (Curve25519) |
| AEAD cipher | ChaCha20-Poly1305 |
| Hash function | SHA-256 |
| Static key size | 32 bytes (X25519 public key) |
| Ephemeral key size | 32 bytes (X25519 public key) |
### Implementation
The Noise handshake is implemented using the `snow` crate (`snow 0.9`). Key source files:
- `crates/quicnprotochat-core/src/noise.rs` -- `NoiseTransport` struct, handshake state machine, encrypted read/write methods.
- `crates/quicnprotochat-core/src/codec.rs` -- `LengthPrefixedCodec` that frames Noise handshake and transport messages over TCP.
- `crates/quicnprotochat-core/src/error.rs` -- `CoreError::Noise` variant for handshake and transport errors.
---
## Consequences
### Benefits
- **No CA infrastructure.** Key management is reduced to generating, storing, and distributing raw 32-byte X25519 public keys. No certificates, no expiration, no revocation lists.
- **Simpler key management.** Each node has a single static X25519 keypair. The public key is its transport-layer identity.
- **Identity hiding.** Passive network observers cannot determine which client is connecting to the server.
- **Well-analyzed security.** The Noise Protocol Framework has formal security proofs (Kobeissi et al., 2019). The XX pattern specifically has been analyzed for identity hiding and key compromise impersonation resistance.
- **Lightweight.** The `snow` crate is small, auditable, and has no transitive dependency on OpenSSL or ring (it uses pure-Rust cryptography).
### Costs and trade-offs
- **Three-message handshake.** XX requires 3 messages (1.5 round-trips) compared to TLS 1.3's 1-RTT handshake (or 0-RTT with resumption). This adds latency to connection establishment. In practice, this is only significant for short-lived connections.
- **No PQ protection.** The Noise handshake uses classical X25519. A quantum adversary performing a harvest-now-decrypt-later attack could recover the handshake transcript and learn the static keys. This is accepted as a known risk (see [ADR-006: PQ Gap](adr-006-pq-gap.md)).
- **Out-of-band key distribution.** Without a CA or directory service, clients must obtain the server's static public key through some out-of-band mechanism. This is currently handled by hardcoding or configuration.
- **Superseded for client-server transport.** With the move to QUIC + TLS 1.3 in M3, the Noise transport is no longer the primary client-server path. It remains available for direct peer-to-peer connections and as a fallback in environments where QUIC/UDP is blocked.
### Residual risks
- **Harvest-now-decrypt-later for metadata.** An adversary who records the Noise handshake today and obtains a quantum computer in the future could decrypt the handshake transcript, revealing the static public keys of both parties (identity metadata). However, no long-lived content secrets transit the handshake -- MLS provides its own key agreement. See [ADR-006](adr-006-pq-gap.md) for the full analysis.
- **Key compromise impersonation (KCI).** If a party's static private key is compromised, an attacker can impersonate other parties to the compromised party. This is inherent to any DH-based mutual authentication scheme without a PKI. Mitigated by key rotation and secure key storage.
---
## Code references
| File | Relevance |
|---|---|
| `crates/quicnprotochat-core/src/noise.rs` | `NoiseTransport` implementation: handshake, encrypted read/write |
| `crates/quicnprotochat-core/src/codec.rs` | `LengthPrefixedCodec`: frames Noise messages over TCP |
| `crates/quicnprotochat-core/src/error.rs` | `CoreError::Noise`, `CodecError` error types |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [ADR-003: RPC Inside the Noise Tunnel](adr-003-rpc-inside-noise.md) -- how Cap'n Proto RPC runs over the Noise channel
- [ADR-006: PQ Gap in Noise Transport](adr-006-pq-gap.md) -- analysis of the post-quantum gap
- [Framing Codec](../wire-format/framing-codec.md) -- the codec that frames Noise messages
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how Noise fits in the protocol stack
- [Noise Protocol Framework specification](https://noiseprotocol.org/noise.html) -- upstream specification

View File

@@ -0,0 +1,140 @@
# ADR-002: Cap'n Proto over MessagePack
**Status:** Accepted
---
## Context
quicnprotochat needs an efficient, typed wire format for client-server communication. The format must support:
1. **Typed messages** with compile-time schema enforcement to eliminate hand-rolled serialisation bugs.
2. **Schema evolution** so that new fields and methods can be added without breaking existing clients.
3. **RPC support** for clean method dispatch, eliminating the need for manual message-type routing.
4. **Efficient encoding** to minimize overhead on constrained networks and high-throughput server paths.
5. **Canonical serialisation** so that identical logical messages produce identical byte sequences, enabling reliable signing.
The original M0 prototype used MessagePack (via the `rmp-serde` crate) with hand-rolled dispatch based on integer message-type tags. This approach had several problems:
- **No schema enforcement.** The wire format was defined implicitly by Rust `#[derive(Serialize, Deserialize)]` annotations. There was no single source of truth for the wire format, and changes to Rust struct layout silently changed the wire format.
- **No RPC.** Message dispatch was a manual `match` on a `MsgType` enum. Adding a new message type required modifying the dispatch table in both client and server, with no compile-time guarantee that all cases were handled.
- **No canonical form.** MessagePack's map encoding does not guarantee key ordering, so the same logical message could produce different byte sequences depending on the Rust `HashMap` iteration order. This made signing over serialised data unreliable.
- **Deserialization overhead.** MessagePack requires a full decode pass that allocates and copies data. For a messaging system processing many small messages, this overhead is unnecessary.
### Alternatives considered
1. **MessagePack (status quo).** Keep the existing format. Rejected because of the schema, dispatch, and canonicity problems described above.
2. **Protocol Buffers (Protobuf).** Schema-defined, binary, widely used. However:
- Protobuf does not guarantee canonical serialisation (default value elision, field ordering, and unknown field handling can vary between implementations).
- Protobuf RPC requires a separate framework (gRPC), which brings in HTTP/2 and a heavy runtime.
- Protobuf deserialization requires an allocation and copy pass (not zero-copy).
3. **FlatBuffers.** Zero-copy, schema-defined. However:
- No built-in RPC framework.
- The Rust crate ecosystem was less mature than Cap'n Proto at the time of evaluation.
- No canonical serialisation guarantee.
4. **Cap'n Proto.** Zero-copy, schema-defined, canonical serialisation, built-in async RPC. The `capnp` and `capnp-rpc` Rust crates are mature and actively maintained.
---
## Decision
Replace MessagePack with **Cap'n Proto** for all wire-format serialisation and RPC dispatch. Define all message types and service interfaces in `.capnp` schema files, and use the `capnpc` compiler for Rust code generation.
### Key properties of Cap'n Proto
**Zero-copy deserialization:**
Cap'n Proto's wire format is designed so that the byte layout on the wire is identical to the byte layout in memory. A receiver can traverse the message in-place using pointer arithmetic, without allocating or copying data. For a messaging server that processes many small messages per second, this eliminates a significant class of allocation overhead.
```text
Traditional serialisation: wire bytes -> decode -> allocate -> application struct
Cap'n Proto: wire bytes == application struct (traverse in place)
```
**Schema enforcement:**
All messages and RPC interfaces are defined in `.capnp` schema files checked into the repository. The `capnpc` compiler generates Rust code with type-safe builders and readers. A mismatched field type or missing field is caught at compile time, not at runtime.
**Canonical serialisation:**
Cap'n Proto defines a canonical form for messages: fields are laid out in a deterministic order with deterministic padding. Two implementations that build the same logical message produce identical byte sequences. This property is essential for signing: the MLS layer signs over serialised Cap'n Proto data, and non-deterministic serialisation would make signature verification unreliable.
**Built-in async RPC:**
The `capnp-rpc` crate provides a full RPC framework built on top of Cap'n Proto serialisation. Features include:
- **Method dispatch:** Each interface method has a unique ordinal, and the RPC runtime dispatches incoming calls to the correct handler automatically.
- **Promise pipelining:** A client can call a method on the result of a previous call before the first call has returned. The RPC runtime resolves the pipeline when the result is available.
- **Cancellation:** An in-flight RPC call can be cancelled by the client, and the server is notified.
- **Level 1 RPC:** The `capnp-rpc` crate implements Cap'n Proto's Level 1 RPC protocol, which supports most features needed for client-server communication.
**Schema evolution:**
Cap'n Proto supports forward-compatible schema evolution:
- New fields can be added to structs (with the next available field number). Old readers ignore unknown fields.
- New methods can be added to interfaces (with the next available ordinal). Old clients cannot call them; old servers reject unknown method calls.
- Fields and methods can never be removed or renumbered, but they can be deprecated.
- The `version` field in the `Auth` struct provides application-level versioning on top of structural evolution.
### Schema files
The Cap'n Proto schemas are stored in the `schemas/` directory:
| File | Content | Documentation |
|---|---|---|
| `schemas/envelope.capnp` | Legacy `Envelope` struct and `MsgType` enum | [Envelope Schema](../wire-format/envelope-schema.md) |
| `schemas/auth.capnp` | `AuthenticationService` interface | [Auth Schema](../wire-format/auth-schema.md) |
| `schemas/delivery.capnp` | `DeliveryService` interface | [Delivery Schema](../wire-format/delivery-schema.md) |
| `schemas/node.capnp` | `NodeService` interface and `Auth` struct | [NodeService Schema](../wire-format/node-service-schema.md) |
---
## Consequences
### Benefits
- **Eliminated hand-rolled dispatch.** The manual `MsgType` match table is replaced by Cap'n Proto RPC's automatic method dispatch. Adding a new operation means adding a method to the `.capnp` schema and implementing the handler -- no dispatch table to update.
- **Compile-time type safety.** Schema violations are caught at compile time by the generated Rust code. A field type mismatch or missing required parameter is a compile error, not a runtime panic.
- **Zero-copy performance.** The server avoids deserialization overhead for messages it routes but does not inspect (which is most messages, since the DS is MLS-unaware). The server can read the routing fields (recipient key, channel ID) directly from the wire bytes.
- **Canonical form for signing.** MLS operations that sign over serialised data can rely on Cap'n Proto producing deterministic byte sequences.
- **Schema as documentation.** The `.capnp` files serve as the authoritative specification of the wire format, readable by both humans and tools.
### Costs and trade-offs
- **Build-time code generation.** The `capnpc` compiler must run during the build (via `build.rs` in `quicnprotochat-proto`). This adds a build dependency and increases compile times slightly.
- **Learning curve.** Cap'n Proto's builder/reader API is different from typical `serde`-based Rust serialisation. Developers must learn the Cap'n Proto programming model (builders for construction, readers for traversal, owned messages for storage).
- **Generated code verbosity.** The generated Rust code is verbose and not intended to be read directly. Application code interacts with it through the builder/reader traits.
- **Smaller ecosystem than Protobuf.** Cap'n Proto has fewer users, fewer tutorials, and fewer third-party tools than Protobuf. However, the core Rust crates are well-maintained.
- **No dynamic reflection.** Unlike Protobuf (which supports `Any` and `DynamicMessage`), Cap'n Proto does not provide runtime reflection over unknown schemas. This has not been a limitation in practice.
### Residual risks
- **Crate maintenance.** The `capnp` and `capnp-rpc` crates are maintained primarily by David Renshaw. If maintenance lapses, the project would need to fork or switch serialisation formats. Mitigated by the crates' maturity and the relatively stable Cap'n Proto specification.
- **RPC limitations.** The Rust `capnp-rpc` crate implements Level 1 of the Cap'n Proto RPC protocol. Level 3 features (three-party handoffs) are not supported. This has not been a limitation for quicnprotochat's client-server architecture.
---
## Code references
| File | Relevance |
|---|---|
| `schemas/envelope.capnp` | Legacy Envelope struct definition |
| `schemas/auth.capnp` | AuthenticationService RPC interface |
| `schemas/delivery.capnp` | DeliveryService RPC interface |
| `schemas/node.capnp` | NodeService unified RPC interface |
| `crates/quicnprotochat-proto/build.rs` | Build script that invokes `capnpc` for code generation |
| `crates/quicnprotochat-proto/src/lib.rs` | Re-exports generated Cap'n Proto modules |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [Wire Format Overview](../wire-format/overview.md) -- how Cap'n Proto fits in the serialisation pipeline
- [ADR-003: RPC Inside the Noise Tunnel](adr-003-rpc-inside-noise.md) -- how Cap'n Proto RPC runs over the encrypted transport
- [Why This Design, Not Signal/Matrix/...](why-not-signal.md) -- serialisation comparison against Protobuf and JSON
- [Cap'n Proto encoding specification](https://capnproto.org/encoding.html) -- upstream specification

View File

@@ -0,0 +1,147 @@
# ADR-003: RPC Inside the Noise Tunnel
**Status:** Accepted
---
## Context
Cap'n Proto RPC provides typed method dispatch, promise pipelining, and automatic serialisation -- but it has **no built-in transport security**. The RPC protocol assumes it operates over a trusted byte stream. If that byte stream is a raw TCP connection, all RPC traffic (method names, parameters, return values) is transmitted in cleartext.
quicnprotochat requires that all client-server communication be encrypted and authenticated. The question is: how should encryption and RPC be composed?
### Alternatives considered
1. **RPC over raw TCP, with application-level encryption.** Each RPC payload would be individually encrypted by the application before passing it to Cap'n Proto. This is complex, error-prone, and does not protect RPC metadata (method ordinals, message structure).
2. **RPC over TLS.** Use TLS 1.3 as the transport for the Cap'n Proto RPC byte stream. This is the conventional approach for web services (gRPC uses TLS). However, in the M1 design, TLS with mutual authentication required CA infrastructure that we wanted to avoid (see [ADR-001](adr-001-noise-xx.md)).
3. **RPC over Noise.** Use the Noise\_XX handshake to establish an encrypted, authenticated session, then feed the Cap'n Proto RPC byte stream through the Noise transport layer. The RPC layer is completely unaware of the encryption beneath it.
4. **RPC over QUIC.** Use QUIC + TLS 1.3 as the transport. Cap'n Proto RPC operates over a QUIC bidirectional stream. This is the approach adopted in M3+.
---
## Decision
Cap'n Proto RPC operates over the encrypted byte stream provided by the transport layer. The transport layer -- whether Noise\_XX (M1) or QUIC + TLS 1.3 (M3+) -- owns all security properties (confidentiality, integrity, authentication). Cap'n Proto owns all framing and dispatch properties (serialisation, method routing, schema enforcement).
This is a **separation of concerns** at the protocol layer boundary:
```text
┌─────────────────────────────────┐
│ Cap'n Proto RPC │ Dispatch, serialisation, typing
│ (capnp-rpc crate) │
├─────────────────────────────────┤
│ Encrypted byte stream │ Confidentiality, integrity, auth
│ (Noise_XX or QUIC/TLS 1.3) │
├─────────────────────────────────┤
│ TCP or UDP │ Reliable (TCP) or datagram (UDP)
└─────────────────────────────────┘
```
### Noise transport path (M1)
In the M1 stack, the composition works as follows:
1. Client and server perform a Noise\_XX handshake over a TCP connection, establishing a shared session key.
2. The resulting `NoiseTransport` wraps the TCP stream, providing `AsyncRead + AsyncWrite` that transparently encrypts/decrypts all data.
3. Cap'n Proto RPC is instantiated over this `NoiseTransport`. The RPC runtime reads and writes to the `NoiseTransport` as if it were a plain byte stream.
4. Each RPC message is framed by the [LengthPrefixedCodec](../wire-format/framing-codec.md) before encryption and after decryption.
```text
Client Server
| |
| --- Noise_XX handshake (3 messages) -----------> |
| <-- Noise_XX handshake ------------------------- |
| |
| [Noise-encrypted Cap'n Proto RPC traffic] |
| --- uploadKeyPackage(identityKey, pkg, auth) --> |
| <-- (fingerprint) -------------------------------- |
| --- enqueue(recipientKey, payload, ch, v, a) --> |
| <-- () ------------------------------------------ |
| ... |
```
### QUIC transport path (M3+)
In the M3+ stack, the composition is:
1. Client connects to the server via QUIC, which performs a TLS 1.3 handshake internally.
2. The client opens a bidirectional QUIC stream.
3. Cap'n Proto RPC is instantiated over the QUIC stream. The `quinn` crate provides `AsyncRead + AsyncWrite` for each stream.
4. The `LengthPrefixedCodec` is **not used** in this path -- QUIC provides native stream framing, and `capnp-rpc` handles message delimitation internally.
```text
Client Server
| |
| --- QUIC handshake (TLS 1.3) -----------------> |
| <-- QUIC handshake ---------------------------- |
| |
| [QUIC-encrypted Cap'n Proto RPC traffic] |
| --- uploadKeyPackage(identityKey, pkg, auth) --> |
| <-- (fingerprint) -------------------------------- |
| --- fetchWait(recipientKey, ch, v, t, a) ------> |
| <-- (payloads) ---------------------------------- |
| ... |
```
### Transport agnosticism
The key architectural property is that **Cap'n Proto RPC is transport-agnostic**. The same RPC interface (`NodeService`) works identically over both transport paths. The server implementation does not know or care which transport the client used -- it receives the same typed method calls either way.
This is achieved by abstracting the transport behind Rust's `AsyncRead + AsyncWrite` traits. The `capnp-rpc` crate accepts any type that implements these traits as its underlying stream.
---
## Consequences
### Benefits
- **Clean layering.** Each layer has a single, well-defined responsibility. The transport layer does not need to understand Cap'n Proto. Cap'n Proto does not need to understand encryption. This makes each layer independently testable and replaceable.
- **Transport flexibility.** Switching from Noise to QUIC (or adding a future transport) required no changes to the RPC interface or the application logic. Only the transport initialization code changed.
- **Full metadata protection.** Because encryption wraps the entire RPC byte stream, not just individual payloads, all RPC metadata is protected: method ordinals, parameter values, return values, and even the timing pattern of RPC calls (within the limits of the transport's traffic analysis resistance).
- **No double encryption.** The application layer does not need to encrypt RPC payloads separately. The transport layer provides confidentiality for the entire stream.
- **Composable security.** The Noise/QUIC layer provides transport security (server authentication, channel confidentiality). MLS provides end-to-end security (group key agreement, forward secrecy, PCS). The RPC layer is the bridge between them, carrying MLS ciphertext as opaque blobs. No single layer needs to provide all security properties.
### Costs and trade-offs
- **No end-to-end RPC security.** The RPC layer trusts the transport for confidentiality. If the transport is compromised (e.g., a TLS vulnerability), all RPC traffic is exposed. This is mitigated by MLS providing a second layer of encryption for message content.
- **Transport must be established first.** The Noise handshake or QUIC connection must complete before any RPC call can be made. This adds latency to the first interaction. In the QUIC path, this is mitigated by 0-RTT resumption.
- **Debugging complexity.** Because all traffic is encrypted, debugging wire-level issues requires either decrypting the transport (which requires the session keys) or logging at the application layer. This is an inherent trade-off of transport encryption.
### Residual risks
- **Transport-layer vulnerability.** A vulnerability in `snow` (Noise) or `rustls` (TLS) could expose the RPC byte stream. Mitigated by keeping dependencies updated and by the fact that MLS ciphertext within the stream is independently encrypted.
- **Side channels.** The transport encrypts content but may not fully hide message sizes or timing patterns. A sophisticated adversary could infer information from traffic analysis. This is a known limitation of any encrypted transport and is orthogonal to the RPC-inside-transport decision.
---
## Code references
| File | Relevance |
|---|---|
| `crates/quicnprotochat-core/src/noise.rs` | `NoiseTransport`: encrypted `AsyncRead + AsyncWrite` wrapper |
| `crates/quicnprotochat-core/src/codec.rs` | `LengthPrefixedCodec`: frames messages in the Noise path |
| `crates/quicnprotochat-server/src/main.rs` | Server: accepts QUIC connections, instantiates Cap'n Proto RPC over QUIC streams |
| `crates/quicnprotochat-client/src/main.rs` | Client: connects via QUIC, instantiates Cap'n Proto RPC client |
| `schemas/node.capnp` | `NodeService` RPC interface definition |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [ADR-001: Noise\_XX for Transport Auth](adr-001-noise-xx.md) -- the Noise transport that RPC runs inside (M1)
- [ADR-002: Cap'n Proto over MessagePack](adr-002-capnproto.md) -- why Cap'n Proto was chosen for serialisation
- [Wire Format Overview](../wire-format/overview.md) -- the full serialisation pipeline
- [Framing Codec](../wire-format/framing-codec.md) -- length-prefixed framing in the Noise path
- [NodeService Schema](../wire-format/node-service-schema.md) -- the RPC interface that runs over the encrypted tunnel
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how all protocol layers compose

View File

@@ -0,0 +1,124 @@
# ADR-004: MLS-Unaware Delivery Service
**Status:** Accepted
---
## Context
The Delivery Service (DS) is the server-side component that stores and forwards messages between clients. A fundamental design question is: **should the DS understand MLS messages?**
An MLS-aware DS could inspect message types and perform optimizations:
- **Fan-out:** When a client sends a Commit or Application message intended for all group members, an MLS-aware DS could parse the group membership and deliver the message to all members automatically, instead of requiring the client to enqueue separately for each recipient.
- **Membership validation:** An MLS-aware DS could verify that a sender is actually a member of the group before accepting a message, preventing spam from non-members.
- **Epoch filtering:** An MLS-aware DS could reject messages from stale epochs, reducing the processing burden on recipients.
- **Tree optimization:** An MLS-aware DS could cache the ratchet tree and assist with tree synchronization.
However, an MLS-aware DS would also:
- Have access to MLS message metadata (group IDs, epoch numbers, sender positions in the tree).
- Require an MLS library dependency on the server.
- Be more complex to implement, test, and audit.
- Potentially violate the MLS architecture's trust model.
### What RFC 9420 says
RFC 9420 Section 4 defines the DS as a component that:
> "is responsible for ordering handshake messages and delivering them to each client."
Critically, the RFC specifies that the DS **does not have access to group keys** and treats message content as opaque. The DS's role is limited to:
1. Ordering: ensuring that handshake messages (Commits) are applied in a consistent order across all group members.
2. Delivery: routing messages to the correct recipients.
3. Optional: enforcing access control (e.g., only group members can send to the group).
The RFC explicitly envisions that the DS operates on opaque blobs, not on decrypted MLS content.
---
## Decision
The quicnprotochat Delivery Service is **MLS-unaware**. It routes opaque byte strings by `(recipientKey, channelId)` without parsing, inspecting, or validating any MLS content.
### What the DS sees
```text
DS perspective:
enqueue(recipientKey=0x1234..., payload=<opaque bytes>, channelId=<uuid>, version=1)
fetch(recipientKey=0x1234..., channelId=<uuid>, version=1) -> [<opaque bytes>, ...]
DS does NOT see:
- Whether the payload is a Welcome, Commit, or Application message
- The MLS group ID or epoch number
- The sender's position in the ratchet tree
- Any plaintext content
```
### Routing responsibility
Because the DS does not parse MLS messages, the **client** is responsible for routing:
| MLS Operation | Client's Routing Responsibility |
|---|---|
| `add_members()` | Enqueue the Welcome message to the new member's `recipientKey`. Enqueue the Commit to each existing member's `recipientKey`. |
| `remove_members()` | Enqueue the Commit to each remaining member's `recipientKey`. |
| `create_message()` | Enqueue the Application message to each group member's `recipientKey`. |
| `self_update()` | Enqueue the Commit to each other member's `recipientKey`. |
This means that sending a message to a group of n members requires n-1 enqueue calls (one per recipient, excluding the sender). The client must maintain its own copy of the group membership list.
---
## Consequences
### Benefits
- **Correct MLS architecture.** The DS does not hold group keys or inspect group state, which is the architecture recommended by RFC 9420 Section 4. A compromised DS learns nothing about message content or group structure beyond the routing metadata (recipient keys and channel IDs).
- **Audit-friendly.** The DS's audit log is a simple append-only sequence of `(timestamp, recipientKey, channelId, payload_hash)` entries. There is no complex state machine to audit. The server's behavior is trivially verifiable: it accepts blobs and returns them in FIFO order.
- **No MLS dependency on the server.** The server does not depend on `openmls` or any MLS library. This reduces the server's attack surface, compile time, and binary size. It also means the server is completely decoupled from MLS version upgrades.
- **Simplicity.** The DS is a hash map of FIFO queues. The entire implementation fits in a few hundred lines of Rust. There are no edge cases around epoch transitions, tree synchronization, or membership conflicts.
- **Protocol agnosticism.** The DS can carry any payload, not just MLS messages. Future protocol extensions (e.g., signaling for voice/video, file transfer metadata) can reuse the same delivery infrastructure without modification.
### Costs and trade-offs
- **No server-side fan-out.** The client must enqueue separately for each recipient. For a group of n members, this means n-1 enqueue calls per message, compared to 1 call if the DS could fan out. This increases client bandwidth usage by a factor of approximately n for the routing metadata (though the payload is the same in each call).
- **No server-side membership validation.** The DS cannot verify that a sender is a member of the group. A malicious client could enqueue messages to any recipient key, potentially causing the recipient to process (and reject) invalid MLS messages. This is mitigated by MLS's own authentication: invalid messages are rejected during MLS processing.
- **No server-side ordering guarantees.** RFC 9420 envisions the DS providing a consistent ordering of handshake messages. The current DS provides FIFO ordering per `(recipientKey, channelId)` queue, but it does not provide global ordering across all group members. In practice, MLS handles out-of-order delivery gracefully (Commits include the epoch number, and clients can buffer messages for future epochs).
- **Client complexity.** The client must maintain the group membership list and perform per-recipient routing. This is additional state that the client must manage correctly. An incorrect membership list results in some members not receiving messages.
### Residual risks
- **Metadata exposure.** While the DS does not see message content, it does see routing metadata: which recipient keys receive messages, when, and on which channels. This metadata can reveal communication patterns. Mitigation: use channel IDs that are not correlated with real-world identifiers, and consider padding to hide message sizes.
- **Denial of service.** Because the DS does not validate senders, a malicious client could flood a recipient's queue with garbage payloads. Mitigation: rate limiting (planned for a future milestone) and the `Auth` struct for sender identification.
---
## Code references
| File | Relevance |
|---|---|
| `schemas/delivery.capnp` | DeliveryService RPC interface (opaque `Data` payloads) |
| `schemas/node.capnp` | NodeService: `enqueue`, `fetch`, `fetchWait` methods |
| `crates/quicnprotochat-server/src/storage.rs` | Server-side queue storage (DashMap-based FIFO queues) |
| `crates/quicnprotochat-server/src/main.rs` | NodeService RPC handler implementation |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [Delivery Schema](../wire-format/delivery-schema.md) -- the DS RPC interface definition
- [NodeService Schema](../wire-format/node-service-schema.md) -- the unified interface that includes DS methods
- [ADR-005: Single-Use KeyPackages](adr-005-single-use-keypackages.md) -- related AS design decision
- [Architecture Overview](../architecture/overview.md) -- system-level view showing DS in context
- [Why This Design, Not Signal/Matrix/...](why-not-signal.md) -- broader protocol comparison

View File

@@ -0,0 +1,114 @@
# ADR-005: Single-Use KeyPackages
**Status:** Accepted
---
## Context
MLS (RFC 9420) specifies that KeyPackages must be used at most once. A KeyPackage contains the client's HPKE init key, which is used during the `add_members()` operation to encrypt the Welcome message. If the same KeyPackage is used twice, the same HPKE shared secret is derived for both group additions, which destroys the forward secrecy of the initial key exchange.
The Authentication Service (AS) stores KeyPackages uploaded by clients and serves them to peers who want to add the client to a group. The design question is: **how should the AS enforce single-use semantics?**
### Alternatives considered
1. **Mark-as-used.** The AS could keep a "used" flag on each KeyPackage and reject subsequent fetch requests for packages already marked as used. This preserves the package on the server (for auditing or retry) but requires additional state tracking and introduces a race condition: if two peers fetch the same package concurrently, one of them will receive a "used" package unless the flag is set atomically with the first fetch.
2. **Reference counting.** The AS could allow a KeyPackage to be fetched a configurable number of times. This would support use cases like "allow the same package to be used in N group additions." However, MLS requires strict single-use, making this approach non-compliant.
3. **Atomic removal on fetch.** The AS removes the KeyPackage from storage in the same operation that returns it. The first fetch succeeds and returns the package; subsequent fetches for the same package find nothing. This is the simplest approach and provides the strongest guarantee.
---
## Decision
The Authentication Service **atomically removes** a KeyPackage when it is fetched. The `fetchKeyPackage` method is destructive: it returns the package and deletes it in a single operation. If no packages are stored for the requested identity, an empty response is returned.
### Implementation
The server stores KeyPackages in a per-identity queue (currently backed by `DashMap` with `Vec<Vec<u8>>` values). The `fetchKeyPackage` operation:
1. Locks the entry for the requested identity key.
2. Pops the first KeyPackage from the queue (FIFO order).
3. Returns the popped package.
4. The lock is released.
If the queue is empty (or no entry exists for the identity key), the method returns empty `Data`.
```text
Before fetch:
identity_key_0x1234 -> [KP_1, KP_2, KP_3]
After fetchKeyPackage(identity_key=0x1234):
Returns: KP_1
identity_key_0x1234 -> [KP_2, KP_3]
```
### Client responsibilities
Because the AS consumes KeyPackages on fetch, clients must manage their KeyPackage supply:
1. **Pre-upload multiple KeyPackages.** After generating their identity, a client should upload several KeyPackages (e.g., 10-100) so that multiple peers can add them to groups concurrently.
2. **Monitor supply.** Clients should periodically check (via a future monitoring endpoint or heuristic) whether their KeyPackage supply on the server is running low, and replenish by uploading more.
3. **Handle empty responses.** A client trying to add a peer whose KeyPackage supply is exhausted will receive an empty response from `fetchKeyPackage`. The client should handle this gracefully -- e.g., by notifying the user that the peer needs to upload more KeyPackages.
### Fingerprint for tamper detection
The `uploadKeyPackage` method returns a SHA-256 fingerprint of the uploaded package. This fingerprint serves as a tamper-detection mechanism:
1. The uploading client records the fingerprint.
2. When a peer fetches the KeyPackage, they can compute the SHA-256 hash of the received package.
3. If the fetched package's hash does not match the expected fingerprint (communicated out-of-band), the server may have tampered with the package.
This is a defense-in-depth measure. In practice, MLS's own signature verification on KeyPackages also detects tampering, since the KeyPackage includes a signature over its contents using the uploader's Ed25519 identity key.
---
## Consequences
### Benefits
- **Forward secrecy of initial key exchange.** Each `add_members()` operation uses a fresh HPKE init key, so the shared secret derived from the Welcome message is unique. Compromising one group addition does not compromise others.
- **Simplicity.** Atomic removal is the simplest possible implementation of single-use semantics. There is no "used" flag, no reference count, no expiration timer. The package is either in the store (available) or not (consumed).
- **No race conditions.** Because removal is atomic with fetch, two concurrent fetches for the same identity key will each receive a different KeyPackage (or one will receive an empty response if only one package remains). There is no window where two fetchers could receive the same package.
- **Compliance with RFC 9420.** The single-use semantics are a direct implementation of MLS's requirement that each KeyPackage's HPKE init key be used at most once.
### Costs and trade-offs
- **Client must manage supply.** Unlike a reusable credential, single-use KeyPackages are a consumable resource. Clients must proactively upload packages and monitor their supply. A client that goes offline for an extended period may exhaust its supply, becoming unreachable for new group additions.
- **No retry after fetch.** If a client fetches a KeyPackage and then fails to complete the `add_members()` operation (e.g., due to a crash or network error), the KeyPackage is consumed and wasted. The client must fetch a new one and retry.
- **Storage scaling.** If each client uploads N KeyPackages and there are M clients, the AS must store up to M * N packages. For reasonable values (e.g., 1000 clients, 100 packages each), this is 100,000 packages -- well within the capacity of an in-memory store. For larger deployments, persistent storage would be needed.
### Residual risks
- **KeyPackage exhaustion attack.** A malicious client could repeatedly fetch a target's KeyPackages without using them, draining the target's supply and preventing legitimate peers from adding the target to groups. Mitigation: rate limiting on `fetchKeyPackage` calls (planned for a future milestone) and the `Auth` struct for identifying and blocking abusive clients.
- **Server-side compromise.** If the AS is compromised, the attacker could read stored KeyPackages and use the HPKE init keys to decrypt future Welcome messages. Mitigation: this is inherent to any prekey distribution service (Signal has the same risk with X3DH prekey bundles). MLS's post-compromise security means that even if the initial key exchange is compromised, subsequent epoch updates restore security.
---
## Code references
| File | Relevance |
|---|---|
| `schemas/auth.capnp` | `AuthenticationService` interface: `uploadKeyPackage`, `fetchKeyPackage` |
| `schemas/node.capnp` | `NodeService` interface: same methods with `Auth` parameter |
| `crates/quicnprotochat-server/src/storage.rs` | Server-side KeyPackage storage (DashMap-backed queue) |
| `crates/quicnprotochat-server/src/main.rs` | RPC handler: `fetchKeyPackage` implementation with atomic removal |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [Auth Schema](../wire-format/auth-schema.md) -- the RPC interface for KeyPackage operations
- [NodeService Schema](../wire-format/node-service-schema.md) -- the unified interface including auth methods
- [ADR-004: MLS-Unaware Delivery Service](adr-004-mls-unaware-ds.md) -- related design decision for the DS
- [Architecture Overview](../architecture/overview.md) -- system-level view showing the AS in context

View File

@@ -0,0 +1,119 @@
# ADR-006: PQ Gap in Noise Transport
**Status:** Accepted
---
## Context
quicnprotochat's security architecture has two encryption layers:
1. **Transport layer** (Noise\_XX or QUIC + TLS 1.3): encrypts the byte stream between client and server using classical Diffie-Hellman key exchange (X25519).
2. **Content layer** (MLS, RFC 9420): provides end-to-end group key agreement using DHKEM(X25519, HKDF-SHA256) in the current ciphersuite, with a hybrid KEM (X25519 + ML-KEM-768) available at the envelope level and planned for integration into the MLS ciphersuite at M5.
The content layer will have post-quantum protection from M5 onward via the hybrid KEM. However, the transport layer uses classical X25519 exclusively. This creates a **post-quantum gap**: the transport layer is vulnerable to a quantum adversary, even after the content layer is PQ-protected.
### The threat: harvest-now, decrypt-later
A quantum adversary who does not yet have a cryptographically relevant quantum computer (CRQC) can still:
1. **Record** all encrypted traffic transiting the network today.
2. **Store** the recordings until a CRQC becomes available.
3. **Decrypt** the recorded traffic using Shor's algorithm to break X25519.
This is known as the "harvest-now, decrypt-later" (HNDL) attack. The question is: **what is the practical impact of HNDL on quicnprotochat's transport layer?**
### What a quantum adversary learns from breaking the transport
If the Noise\_XX handshake is broken, the adversary learns:
| Data | Sensitivity | Exposure |
|---|---|---|
| Static X25519 public keys of both parties | Identity metadata | Reveals which client connected to which server |
| Timing and size of RPC calls | Traffic metadata | Reveals communication patterns |
| Cap'n Proto RPC traffic (method calls, parameters) | Routing metadata | Reveals recipient keys, channel IDs, and message timestamps |
| MLS ciphertext (payload bytes) | **Still encrypted** | MLS uses its own key agreement; breaking the transport does not break MLS |
Critically, **no long-lived content secrets transit the Noise handshake**. The MLS key schedule derives group keys independently of the transport. Even with full transport decryption, the adversary sees only MLS ciphertext, which they cannot decrypt without breaking MLS's own key exchange (which will be PQ-protected from M5).
### Why not use PQ-Noise now?
The Noise Protocol Framework community has drafted extensions for post-quantum Noise (PQ-Noise), which replace or augment X25519 with PQ key exchange mechanisms (e.g., Kyber/ML-KEM). However:
1. **The `snow` crate does not support PQ-Noise.** As of snow 0.9, there is no API for PQ handshake patterns. Adding PQ support would require forking `snow` or switching to a different Noise implementation.
2. **PQ-Noise is not yet standardized.** The draft specifications (e.g., `draft-noise-pq`) are still evolving. Adopting an unstable specification risks incompatibility with future versions.
3. **Performance and size concerns.** ML-KEM-768 ciphertexts are 1,088 bytes, and encapsulation keys are 1,184 bytes. These are significantly larger than X25519's 32-byte keys. In a Noise handshake, where multiple key exchanges occur, the handshake size and latency increase substantially.
4. **The QUIC path uses TLS 1.3.** The primary transport in M3+ is QUIC + TLS 1.3, which has its own PQ migration path (via `rustls` and the `x25519-mlkem768` TLS key exchange group). This path is more likely to receive PQ support before `snow` does.
---
## Decision
Accept the PQ gap in the Noise transport layer for milestones M1 through M5. The content layer (MLS) will be PQ-protected from M5 via the hybrid KEM. The transport layer will gain PQ protection when either:
- The `snow` crate adds PQ-Noise support, or
- The QUIC/TLS path gains PQ key exchange support via `rustls`, or
- A PQ-Noise Rust implementation becomes available and is adopted.
Until then, the transport layer uses classical X25519, and the PQ gap is accepted as a known, bounded risk.
---
## Consequences
### What is protected
- **Message content** is protected by MLS's own key agreement. Even if the transport is broken, MLS ciphertext remains secure (assuming MLS uses a PQ-safe ciphersuite, which is the plan for M5).
- **MLS key material** (epoch secrets, application secrets) never transits the Noise handshake. They are derived from the MLS tree, not from the transport.
- **Forward secrecy of content** is provided by MLS epoch ratcheting, independent of the transport.
### What is exposed
- **Identity metadata.** A quantum adversary who breaks the Noise handshake learns the static X25519 public keys of both parties. This reveals *which* client connected to *which* server, and *when*.
- **Timing metadata.** The adversary learns the timing and size pattern of RPC calls, which can reveal communication patterns (e.g., "Alice and Bob exchanged messages at 3pm").
- **Routing metadata.** The adversary learns the recipient keys and channel IDs in RPC calls (since Cap'n Proto RPC traffic is visible after transport decryption). This reveals *who* is communicating with *whom*, even though the message content remains encrypted by MLS.
### Practical impact assessment
| Risk Factor | Assessment |
|---|---|
| **Timeline to CRQC** | Most estimates place cryptographically relevant quantum computers at 10-20+ years away. The PQ gap is a near-term risk only for adversaries with very long storage horizons. |
| **Value of metadata** | Identity and timing metadata is sensitive for high-value targets but less critical than message content for most users. |
| **Content protection** | Message content is independently protected by MLS. Breaking the transport does not break content encryption. |
| **Migration path** | PQ key exchange for TLS 1.3 is being standardized (ML-KEM in TLS). The QUIC/TLS path is likely to gain PQ protection before the Noise path. |
| **Overall risk** | **Low to moderate.** The PQ gap exposes metadata only, not content. The risk is limited to adversaries who (a) can record traffic today, (b) will have a CRQC in the future, and (c) are interested in metadata about quicnprotochat users. |
### Mitigation timeline
| Milestone | Transport PQ Status | Content PQ Status |
|---|---|---|
| M1 | Classical X25519 (Noise) | Classical DHKEM (MLS) |
| M2 | Classical X25519 (Noise) | Classical DHKEM (MLS) |
| M3 | Classical X25519 (QUIC/TLS 1.3) | Classical DHKEM (MLS) + hybrid KEM at envelope level |
| M4 | Classical X25519 (QUIC/TLS 1.3) | Classical DHKEM (MLS) + hybrid KEM at envelope level |
| M5 | Classical X25519 (QUIC/TLS 1.3) | **PQ-protected** (hybrid KEM integrated into MLS ciphersuite) |
| Future | **PQ-protected** (PQ key exchange in TLS or PQ-Noise) | PQ-protected |
---
## Code references
| File | Relevance |
|---|---|
| `crates/quicnprotochat-core/src/noise.rs` | Noise\_XX handshake using classical X25519 (`Noise_XX_25519_ChaChaPoly_SHA256`) |
| `crates/quicnprotochat-core/src/hybrid_kem.rs` | Hybrid KEM (X25519 + ML-KEM-768) for content-layer PQ protection |
| `crates/quicnprotochat-server/src/main.rs` | QUIC server using `rustls` with classical TLS 1.3 |
| `crates/quicnprotochat-client/src/main.rs` | QUIC client using `rustls` with classical TLS 1.3 |
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [ADR-001: Noise\_XX for Transport Auth](adr-001-noise-xx.md) -- the Noise transport that has the PQ gap
- [Why This Design, Not Signal/Matrix/...](why-not-signal.md) -- PQ readiness comparison across protocols
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how transport and content layers compose
- [Noise Protocol Framework specification](https://noiseprotocol.org/noise.html) -- upstream Noise specification

View File

@@ -0,0 +1,63 @@
# Design Decisions Overview
This section collects the Architecture Decision Records (ADRs) that document the key design choices in quicnprotochat. Each ADR follows a standard format: context (why the decision was needed), decision (what was chosen), and consequences (trade-offs, benefits, and residual risks).
These decisions are not immutable. Each ADR has a status field and can be superseded by a later ADR if circumstances change. The goal is to preserve the reasoning behind each choice so that future contributors understand *why* the system works the way it does, not just *how*.
---
## ADR index
| ADR | Title | Status | One-line summary |
|---|---|---|---|
| [ADR-001](adr-001-noise-xx.md) | Noise\_XX for Transport Auth | Accepted | Mutual authentication via static X25519 keys; no CA infrastructure required. |
| [ADR-002](adr-002-capnproto.md) | Cap'n Proto over MessagePack | Accepted | Zero-copy, schema-enforced serialisation with built-in async RPC replaces hand-rolled MessagePack dispatch. |
| [ADR-003](adr-003-rpc-inside-noise.md) | RPC Inside the Noise Tunnel | Accepted | Cap'n Proto RPC operates over the encrypted byte stream; transport owns security, RPC owns dispatch. |
| [ADR-004](adr-004-mls-unaware-ds.md) | MLS-Unaware Delivery Service | Accepted | The DS routes opaque blobs by recipient key; it never inspects MLS content. |
| [ADR-005](adr-005-single-use-keypackages.md) | Single-Use KeyPackages | Accepted | The AS atomically removes a KeyPackage on fetch to preserve MLS forward secrecy. |
| [ADR-006](adr-006-pq-gap.md) | PQ Gap in Noise Transport | Accepted | Classical X25519 in Noise is accepted for M1-M5; MLS content is PQ-protected separately. |
---
## Design comparison
For a broader comparison of quicnprotochat's design against alternative messaging protocols (Signal, Matrix/Olm/Megolm), see [Why This Design, Not Signal/Matrix/...](why-not-signal.md).
---
## How to read an ADR
Each ADR page follows this structure:
1. **Status** -- One of: Proposed, Accepted, Deprecated, Superseded. All current ADRs are Accepted.
2. **Context** -- The problem or force that motivated the decision. What constraints existed? What alternatives were considered?
3. **Decision** -- The specific choice that was made. What was selected and what was rejected?
4. **Consequences** -- The trade-offs that result from the decision. What are the benefits? What are the costs? What residual risks remain?
5. **Code references** -- Links to the source files where the decision is implemented.
---
## Cross-cutting themes
Several themes recur across multiple ADRs:
### Layered security
ADR-001, ADR-003, and ADR-006 all concern the separation between transport-layer security (Noise or QUIC/TLS) and application-layer security (MLS). The core principle is that **no single layer is trusted alone**. Transport encryption protects metadata and provides authentication; MLS provides end-to-end content encryption with forward secrecy and post-compromise security.
### Server minimalism
ADR-004 and ADR-005 reflect a design philosophy where the server does as little as possible. The DS does not parse MLS messages. The AS enforces single-use semantics through atomic removal rather than complex state tracking. This minimalism reduces the server's attack surface and makes it easier to audit.
### Schema-first design
ADR-002 and ADR-003 establish Cap'n Proto as the single source of truth for the wire format. Every message and RPC call is defined in `.capnp` schema files, which are checked into the repository and used for code generation. This eliminates the class of bugs that arises from hand-rolled serialisation and ensures that the wire format is documented, versioned, and evolvable.
---
## Further reading
- [Why This Design, Not Signal/Matrix/...](why-not-signal.md) -- comparative analysis against alternative protocols
- [Wire Format Overview](../wire-format/overview.md) -- the serialisation pipeline that implements these decisions
- [Architecture Overview](../architecture/overview.md) -- system-level view
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how the protocol layers stack

View File

@@ -0,0 +1,164 @@
# Why This Design, Not Signal/Matrix/...
This page compares quicnprotochat's protocol choices against two widely deployed secure messaging systems -- the Signal Protocol and the Matrix ecosystem (Olm/Megolm) -- to explain why a different architecture was chosen. The comparison covers four dimensions: group key agreement, transport, serialisation, and overall trade-offs.
---
## Group key agreement
The choice of group key agreement protocol is the most consequential architectural decision in any end-to-end encrypted group messenger. It determines the cryptographic properties available to the application, the cost of group operations, and the complexity of the client state machine.
### Signal Protocol (Double Ratchet + X3DH + Sender Keys)
The Signal Protocol was designed for **1:1 messaging** and later extended to groups via Sender Keys.
**1:1 (Double Ratchet + X3DH):**
- X3DH performs an initial key agreement between two parties using prekey bundles (analogous to MLS KeyPackages).
- The Double Ratchet derives per-message keys using a combination of a Diffie-Hellman ratchet and a symmetric hash ratchet.
- Provides forward secrecy (past messages are protected after key compromise) and post-compromise security (future messages are protected after a compromise is healed by a new DH exchange).
- Well-studied and battle-tested for over a decade. Formal security analysis by Cohn-Gordon et al. (2017).
**Groups (Sender Keys):**
- Each group member generates a Sender Key and distributes it to all other members via pairwise Double Ratchet channels.
- Sender Keys provide a symmetric ratchet for forward secrecy, but **no post-compromise security**. If a Sender Key is compromised, all future messages from that sender are compromised until the key is manually rotated.
- Group membership changes require O(n) pairwise Sender Key distributions. Adding or removing a member requires the affected member to generate a new Sender Key and distribute it to all n-1 other members.
- The pairwise key exchange for initial setup is O(n^2): each of n members must establish a Double Ratchet session with each of the other n-1 members.
**Limitations for quicnprotochat's use case:**
- O(n^2) pairwise setup cost limits practical group size.
- No post-compromise security for groups is a significant gap.
- The protocol requires a central server for X3DH prekey bundle distribution (similar to quicnprotochat's AS, but tightly coupled to the Signal server).
### Matrix / Olm / Megolm
The Matrix ecosystem uses two distinct cryptographic protocols:
**Olm (1:1):**
- An implementation of the Double Ratchet, similar to Signal's 1:1 protocol.
- Used to establish pairwise encrypted channels between devices.
- Provides forward secrecy and post-compromise security for 1:1 sessions.
**Megolm (groups):**
- A symmetric sender ratchet. Each sender in a group generates a Megolm session and distributes the initial ratchet state to all other members via Olm channels.
- The ratchet is **forward-only**: it provides forward secrecy (a compromised ratchet state cannot decrypt past messages) but **no post-compromise security** (a compromised ratchet state decrypts all future messages from that sender until a new Megolm session is created).
- Session rotation is typically triggered by membership changes or periodic timers, but it is not cryptographically enforced.
**Additional Matrix-specific considerations:**
- **Federation** adds significant complexity. Messages may traverse multiple homeservers, each of which sees encrypted ciphertext but also metadata (sender, recipient, room ID, timestamps). Federation increases metadata exposure compared to a single-server architecture.
- **Eventually consistent state** model means that room membership, key sharing, and message ordering can diverge between homeservers. The client must reconcile these inconsistencies, adding complexity to the state machine.
- **Device verification** is a persistent UX challenge. The cross-signing mechanism is powerful but difficult for users to understand.
**Limitations for quicnprotochat's use case:**
- No post-compromise security for groups (same limitation as Signal's Sender Keys).
- Federation adds latency, metadata exposure, and state management complexity that quicnprotochat does not need.
- JSON-based wire format is inefficient (see serialisation comparison below).
### quicnprotochat: MLS (RFC 9420)
quicnprotochat uses the **Messaging Layer Security (MLS)** protocol, standardized as RFC 9420 by the IETF.
**Key properties:**
- **Native group key agreement.** MLS was designed from the ground up for groups, not bolted onto a pairwise protocol. The ratchet tree structure provides O(log n) cost for group operations (add, remove, update), compared to O(n) or O(n^2) for pairwise-based schemes.
- **Post-compromise security.** Any group member can issue an Update proposal that replaces their leaf in the ratchet tree, generating a new group secret. This heals the tree: even if a member's key material was previously compromised, the new group secret is unknown to the attacker. This property is **not available** in Signal Sender Keys or Megolm.
- **Forward secrecy.** Each epoch (a new group state after a Commit) derives fresh keys. Past epoch keys are deleted and cannot decrypt old messages.
- **Single Commit to update all members.** A Commit message applies one or more proposals (Add, Remove, Update) atomically and is processed by all group members with a single message. No pairwise distribution is needed.
- **Standardized.** RFC 9420 was published by the IETF in July 2023 after years of design, analysis, and interoperability testing. Multiple independent implementations exist (openmls, mls-rs, Cisco's MLS, etc.).
**Cost of group operations:**
| Operation | Signal (Sender Keys) | Matrix (Megolm) | MLS (quicnprotochat) |
|---|---|---|---|
| Add member | O(n) Sender Key distributions | O(n) Megolm session shares | O(log n) tree update |
| Remove member | O(n) Sender Key rotations | O(n) new Megolm session | O(log n) tree update |
| Update (PCS heal) | Not supported | Not supported (session rotation is coarse) | O(log n) path update |
| Per-message encrypt | O(1) symmetric ratchet | O(1) symmetric ratchet | O(1) symmetric ratchet |
---
## Transport comparison
The transport layer determines how encrypted payloads reach the server and how client-server authentication is performed.
| Property | Signal | Matrix | quicnprotochat |
|---|---|---|---|
| **Transport protocol** | TLS over TCP (HTTP/2) | HTTPS (TLS over TCP) | QUIC (UDP) + TLS 1.3 |
| **Multiplexing** | HTTP/2 stream multiplexing | HTTP/1.1 or HTTP/2 | Native QUIC stream multiplexing |
| **Head-of-line blocking** | Mitigated by HTTP/2 streams, but TCP HOL blocking remains | Same as Signal | Eliminated: QUIC streams are independent at the transport layer |
| **Connection establishment** | 1-RTT (TLS 1.3) or 0-RTT (TLS resumption) | 1-RTT (TLS 1.3) or 0-RTT | 0-RTT capable (QUIC resumption) or 1-RTT |
| **Client authentication** | Bearer tokens over TLS | Bearer tokens over TLS | TLS client certs (rustls/quinn) or bearer tokens via `Auth` struct |
| **Fallback** | TCP only | TCP only | Noise\_XX over TCP (M1 stack) for environments where UDP/QUIC is blocked |
**Why QUIC?**
QUIC eliminates TCP head-of-line blocking, which is particularly important for a messaging application where multiple independent conversations may be active simultaneously. A lost packet in one QUIC stream does not block delivery of packets in other streams. QUIC also provides built-in connection migration (useful for mobile clients changing networks) and 0-RTT resumption for reduced latency on reconnection.
---
## Serialisation comparison
The serialisation format determines the overhead of encoding and decoding messages, the type safety of the wire format, and the feasibility of schema evolution.
| Property | Signal (Protobuf) | Matrix (JSON) | quicnprotochat (Cap'n Proto) |
|---|---|---|---|
| **Format** | Binary, schema-defined | Text, schema-optional (JSON Schema exists but is not enforced by the wire format) | Binary, schema-defined |
| **Deserialization cost** | Requires a decode pass (allocates and copies) | Requires a parse pass (allocates, copies, and handles UTF-8) | **Zero-copy**: the wire bytes are the in-memory representation. Readers traverse pointers in-place. |
| **Schema enforcement** | Compile-time via protoc codegen | Runtime only (if at all) | Compile-time via capnpc codegen |
| **Schema evolution** | Forward-compatible (unknown fields preserved) | Forward-compatible (unknown keys ignored) | Forward-compatible (unknown fields and methods ignored) |
| **RPC support** | Separate framework (gRPC) | REST/HTTP (no built-in RPC) | **Built-in async RPC** (capnp-rpc). Method dispatch, pipelining, and cancellation are part of the serialisation layer. |
| **Canonical form** | Not guaranteed (field ordering, default elision vary) | Not guaranteed (key ordering is implementation-dependent) | **Canonical serialisation** (deterministic byte output for identical messages). Suitable for signing. |
| **Overhead** | Low (varint encoding, no field names on wire) | High (field names as strings, quoting, escaping, UTF-8) | Very low (8-byte aligned, fixed-width fields, pointer-based data) |
**Why Cap'n Proto over Protobuf?**
While Protobuf is a reasonable choice (and Signal uses it successfully), Cap'n Proto provides two features that are particularly valuable for quicnprotochat:
1. **Zero-copy deserialization** eliminates a class of allocation and performance overhead. In a messaging system that processes many small messages, avoiding deserialization copies adds up.
2. **Built-in RPC** means that Cap'n Proto is both the serialisation format and the RPC framework. There is no need for a separate gRPC or HTTP layer. The same `.capnp` schema file defines both the data structures and the service interface.
3. **Canonical form** means that two implementations producing the same logical message will generate identical bytes. This is important for signatures: the MLS layer signs over serialised data, and non-deterministic serialisation would make signature verification unreliable.
---
## Summary comparison table
| Dimension | Signal | Matrix | quicnprotochat |
|---|---|---|---|
| **1:1 encryption** | Double Ratchet (FS + PCS) | Olm / Double Ratchet (FS + PCS) | MLS (FS + PCS) |
| **Group encryption** | Sender Keys (FS only) | Megolm (FS only) | MLS (FS + PCS) |
| **Group PCS** | No | No | **Yes** (any member can heal the tree) |
| **Group op cost** | O(n) to O(n^2) | O(n) | **O(log n)** |
| **Transport** | TLS/TCP (HTTP/2) | TLS/TCP (HTTPS) | **QUIC/UDP** (0-RTT, no HOL blocking) |
| **Serialisation** | Protobuf | JSON | **Cap'n Proto** (zero-copy, canonical, built-in RPC) |
| **Standardization** | De facto standard | Matrix spec (open, community-governed) | **IETF RFC 9420** (MLS) + Noise Protocol Framework |
| **Federation** | No (centralized) | Yes (decentralized) | No (single server per deployment) |
| **PQ readiness** | PQXDH (X3DH + ML-KEM) in 1:1, not in groups | Not yet | Hybrid KEM (X25519 + ML-KEM-768) at envelope layer; MLS PQ integration planned (M5) |
| **Maturity** | 10+ years, billions of users | 7+ years, millions of users | Early development (M1-M3) |
---
## What quicnprotochat gives up
No design is without trade-offs. Compared to Signal and Matrix, quicnprotochat:
- **Has no federation.** A single server per deployment means no decentralized architecture. This is a deliberate simplification -- federation adds significant complexity and metadata exposure.
- **Is less mature.** Signal and Matrix have years of production hardening, formal security audits, and battle-tested implementations. quicnprotochat is in early development.
- **Has a smaller ecosystem.** Signal and Matrix have extensive client libraries, bridges, and integrations. quicnprotochat is a standalone Rust implementation.
- **Requires MLS client complexity.** MLS clients must maintain a ratchet tree, process Commits, and handle epoch transitions. This is more complex than a simple symmetric ratchet (Sender Keys / Megolm), though the complexity buys post-compromise security.
---
## Further reading
- [Design Decisions Overview](overview.md) -- index of all ADRs
- [ADR-001: Noise\_XX for Transport Auth](adr-001-noise-xx.md) -- transport authentication choice
- [ADR-002: Cap'n Proto over MessagePack](adr-002-capnproto.md) -- serialisation format choice
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how quicnprotochat's layers compose
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- deep dive into the MLS protocol layer
- [Architecture Overview](../architecture/overview.md) -- system-level architecture

View File

@@ -0,0 +1,139 @@
# Building from Source
This page covers compiling the workspace, running the test suite, and understanding the build-time Cap'n Proto code generation step.
---
## Building the workspace
From the repository root:
```bash
cargo build --workspace
```
This compiles all four crates:
| Crate | Type | Purpose |
|---|---|---|
| `quicnprotochat-core` | library | Crypto primitives, Noise transport, MLS `GroupMember` state machine, frame codec |
| `quicnprotochat-proto` | library | Cap'n Proto schemas, generated types, envelope serialisation helpers |
| `quicnprotochat-server` | binary | Unified Authentication + Delivery Service (`NodeService`) |
| `quicnprotochat-client` | binary | CLI client with subcommands (`ping`, `register`, `send`, `recv`, etc.) |
For a release build with LTO, symbol stripping, and single codegen unit:
```bash
cargo build --workspace --release
```
The release profile is configured in the workspace `Cargo.toml`:
```toml
[profile.release]
opt-level = 3
lto = "thin"
codegen-units = 1
strip = "symbols"
```
---
## Running the test suite
```bash
cargo test --workspace
```
The test suite includes:
- **`quicnprotochat-proto`**: Round-trip serialisation tests for Cap'n Proto `Envelope` messages (Ping, Pong, corrupted-input error handling).
- **`quicnprotochat-core`**: Two-party MLS round-trip (`create_group` / `add_member` / `send_message` / `receive_message`), group ID lifecycle assertions.
- **`quicnprotochat-client`**: Integration tests for MLS group operations and auth service interactions (require a running server or use in-process mocks).
To run tests for a single crate:
```bash
cargo test -p quicnprotochat-core
```
---
## Cap'n Proto code generation
The `quicnprotochat-proto` crate does not contain hand-written Rust types for wire messages. Instead, its `build.rs` script invokes the `capnp` compiler at build time to generate Rust source from the `.capnp` schema files.
### How it works
1. `build.rs` locates the workspace-root `schemas/` directory (two levels above `crates/quicnprotochat-proto/`).
2. It invokes `capnpc::CompilerCommand` on all four schema files:
- `schemas/envelope.capnp` -- top-level wire envelope with `MsgType` discriminant
- `schemas/auth.capnp` -- `AuthenticationService` RPC interface
- `schemas/delivery.capnp` -- `DeliveryService` RPC interface
- `schemas/node.capnp` -- `NodeService` RPC interface (unified AS + DS)
3. The generated Rust source is written to `$OUT_DIR` (Cargo's build output directory).
4. `src/lib.rs` includes the generated code via `include!(concat!(env!("OUT_DIR"), "/envelope_capnp.rs"))` and similar macros for each schema.
### Rebuild triggers
The `build.rs` script emits `cargo:rerun-if-changed` directives for each schema file. If you modify a `.capnp` file, the next `cargo build` will automatically re-run code generation.
### Schema include path
The `src_prefix` is set to the `schemas/` directory so that inter-schema imports (e.g., `using Auth = import "auth.capnp".Auth;` inside `node.capnp`) resolve correctly.
### Design constraints of quicnprotochat-proto
The proto crate is intentionally restricted:
- **No crypto** -- key material never enters this crate.
- **No I/O** -- callers own the transport; this crate only converts bytes to types and back.
- **No async** -- pure synchronous data-layer code.
For details on the wire format, see the [Wire Format Reference](../wire-format/overview.md).
---
## Troubleshooting
### `capnp` binary not found
**Symptom:**
```
Cap'n Proto schema compilation failed.
Is `capnp` installed? (apt-get install capnproto / brew install capnp)
```
**Fix:** Install the Cap'n Proto compiler for your platform. See [Prerequisites](prerequisites.md) for platform-specific instructions.
Verify it is on your `PATH`:
```bash
which capnp
capnp --version
```
### Version mismatch between `capnp` CLI and `capnpc` Rust crate
The workspace uses `capnpc = "0.19"` (the Rust bindings for the Cap'n Proto compiler). If your system `capnp` binary is significantly older or newer, generated code may be incompatible. The recommended approach is to use a `capnp` binary whose major version matches the `capnpc` crate version. On most systems, the package manager version is compatible.
### linker errors on macOS with Apple Silicon
If you see linker errors related to `ring` or `aws-lc-sys` (used transitively by `rustls`), ensure you have Xcode Command Line Tools installed:
```bash
xcode-select --install
```
### Slow first build
The first build downloads and compiles all dependencies (including `openmls`, `quinn`, `rustls`, `capnp-rpc`, etc.). This can take several minutes depending on your hardware. Subsequent builds are incremental and much faster.
---
## Next steps
- [Running the Server](running-the-server.md) -- start the NodeService endpoint
- [Running the Client](running-the-client.md) -- CLI subcommands and usage examples
- [Docker Deployment](docker.md) -- build and run in containers

View File

@@ -0,0 +1,330 @@
# Demo Walkthrough: Alice and Bob
This page walks through a complete end-to-end encrypted conversation between two participants -- Alice and Bob -- using the persistent group CLI. By the end, you will have started a server, registered two identities, created an MLS group, exchanged a Welcome, and sent encrypted messages in both directions.
You will need **three terminal windows**: one for the server, one for Alice, and one for Bob.
---
## Overview
```
┌─────────┐ ┌──────────────────┐ ┌─────────┐
│ Alice │ │ Server │ │ Bob │
│ (client) │──── QUIC ────│ AS + DS (:7000) │──── QUIC ────│ (client)│
└─────────┘ └──────────────────┘ └─────────┘
```
---
## Sequence diagram
```
Alice Server (AS+DS) Bob
│ │ │
│ 1. register-state │ │
│ ─── uploadKeyPackage ─────> │ │
│ <── fingerprint ─────────── │ │
│ │ │
│ │ 2. register-state │
│ │ <── uploadKeyPackage ───────── │
│ │ ─── fingerprint ────────────> │
│ │ │
│ 3. create-group │ │
│ (local: epoch 0) │ │
│ │ │
│ 4. invite --peer-key <bob> │ │
│ ─── fetchKeyPackage ──────> │ (Bob's KP removed from AS) │
│ <── package ────────────── │ │
│ (local: add_member → Commit + Welcome) │
│ ─── enqueue(Welcome) ─────> │ (queued for Bob) │
│ │ │
│ │ 5. join │
│ │ <── fetch ──────────────────── │
│ │ ─── Welcome ────────────────> │
│ │ (local: new_from_welcome) │
│ │ │
│ 6. send --msg "Hi Bob" │ │
│ (local: create_message → PrivateMessage) │
│ ─── enqueue(ciphertext) ──> │ (queued for Bob) │
│ │ │
│ │ 7. recv │
│ │ <── fetch ──────────────────── │
│ │ ─── ciphertext ─────────────> │
│ │ (local: process_message) │
│ │ plaintext: "Hi Bob" │
│ │ │
│ │ 8. send --msg "Hi Alice" │
│ │ (local: create_message) │
│ │ <── enqueue(ciphertext) ────── │
│ │ │
│ 9. recv │ │
│ ─── fetch ────────────────> │ │
│ <── ciphertext ─────────── │ │
│ (local: process_message) │ │
│ plaintext: "Hi Alice" │ │
│ │ │
```
---
## Step-by-step instructions
### Step 1: Start the server
In **Terminal 1** (Server):
```bash
cargo run -p quicnprotochat-server
```
Wait for the log line confirming it is accepting connections:
```
INFO quicnprotochat_server: accepting QUIC connections addr="0.0.0.0:7000"
```
If this is the first run, you will also see a log line about generating the self-signed TLS certificate. The certificate is written to `data/server-cert.der`, which the client will use for TLS verification.
### Step 2: Alice registers her identity
In **Terminal 2** (Alice):
```bash
cargo run -p quicnprotochat-client -- register-state \
--state alice.bin \
--server 127.0.0.1:7000
```
This command:
- Generates a fresh Ed25519 identity keypair (or loads one from `alice.bin` if it already exists).
- Creates an MLS KeyPackage signed with that identity.
- Uploads the KeyPackage to the server's Authentication Service.
- Saves the identity seed and key store to `alice.bin` and `alice.ks`.
**Output:**
```
identity_key : <ALICE_KEY> (64 hex chars)
fingerprint : <fingerprint>
KeyPackage uploaded successfully.
```
**Copy the `identity_key` value** -- Bob will need it in Step 5.
### Step 3: Bob registers his identity
In **Terminal 3** (Bob):
```bash
cargo run -p quicnprotochat-client -- register-state \
--state bob.bin \
--server 127.0.0.1:7000
```
**Output:**
```
identity_key : <BOB_KEY> (64 hex chars)
fingerprint : <fingerprint>
KeyPackage uploaded successfully.
```
**Copy the `identity_key` value** -- Alice will need it in Step 4.
### Step 4: Alice creates a group and invites Bob
In **Terminal 2** (Alice):
First, create the group:
```bash
cargo run -p quicnprotochat-client -- create-group \
--state alice.bin \
--group-id "demo-chat"
```
```
group created: demo-chat
```
Alice is now the sole member of the group at epoch 0.
Next, invite Bob using his identity key from Step 3:
```bash
cargo run -p quicnprotochat-client -- invite \
--state alice.bin \
--peer-key <BOB_KEY> \
--server 127.0.0.1:7000
```
This command:
1. Fetches Bob's KeyPackage from the AS (this atomically removes it -- single-use).
2. Calls `add_member()` on Alice's local MLS group, producing a Commit (applied locally) and a Welcome.
3. Enqueues the Welcome message to the DS, addressed to Bob's identity key.
```
invited peer (welcome queued)
```
Alice's group state has now advanced to epoch 1.
### Step 5: Bob joins the group
In **Terminal 3** (Bob):
```bash
cargo run -p quicnprotochat-client -- join \
--state bob.bin \
--server 127.0.0.1:7000
```
This command:
1. Fetches all pending messages for Bob's identity key from the DS.
2. Finds the Welcome message that Alice enqueued.
3. Calls `MlsGroup::new_from_welcome()`, which decrypts the Welcome using the HPKE init private key from Bob's key store (`bob.ks`).
4. Saves the joined group state to `bob.bin`.
```
joined group successfully
```
Bob is now a member of the group at epoch 1, sharing the same group secret as Alice.
### Step 6: Alice sends an encrypted message
In **Terminal 2** (Alice):
```bash
cargo run -p quicnprotochat-client -- send \
--state alice.bin \
--peer-key <BOB_KEY> \
--msg "Hello Bob, this is encrypted with MLS!" \
--server 127.0.0.1:7000
```
This command:
1. Calls `create_message()` on Alice's MLS group, encrypting the plaintext as an MLS `PrivateMessage`.
2. Enqueues the ciphertext to the DS for Bob's identity key.
```
message sent
```
### Step 7: Bob receives and decrypts
In **Terminal 3** (Bob):
```bash
cargo run -p quicnprotochat-client -- recv \
--state bob.bin \
--server 127.0.0.1:7000
```
This command:
1. Fetches all pending messages from the DS.
2. For each message, calls `process_message()` on Bob's MLS group, which decrypts the `PrivateMessage` and returns the plaintext.
```
[0] plaintext: Hello Bob, this is encrypted with MLS!
```
### Step 8: Bob replies
In **Terminal 3** (Bob):
```bash
cargo run -p quicnprotochat-client -- send \
--state bob.bin \
--peer-key <ALICE_KEY> \
--msg "Hi Alice, received loud and clear!" \
--server 127.0.0.1:7000
```
```
message sent
```
### Step 9: Alice receives Bob's reply
In **Terminal 2** (Alice):
```bash
cargo run -p quicnprotochat-client -- recv \
--state alice.bin \
--server 127.0.0.1:7000
```
```
[0] plaintext: Hi Alice, received loud and clear!
```
---
## Automated demo (single command)
If you want to see the entire flow in a single command without managing three terminals, use the `demo-group` subcommand. This creates both Alice and Bob in-process with ephemeral identities and runs the full round-trip:
```bash
# Ensure the server is running, then:
cargo run -p quicnprotochat-client -- demo-group --server 127.0.0.1:7000
```
```
Alice -> Bob plaintext: hello bob
Bob -> Alice plaintext: hello alice
demo-group complete
```
---
## What happened under the hood
Here is a summary of the cryptographic operations and network calls that occurred during this walkthrough:
| Step | Client | Crypto operation | Network RPC |
|---|---|---|---|
| 2 | Alice | Ed25519 keygen, MLS KeyPackage creation | `uploadKeyPackage` |
| 3 | Bob | Ed25519 keygen, MLS KeyPackage creation | `uploadKeyPackage` |
| 4a | Alice | `MlsGroup::new_with_group_id` (epoch 0) | -- |
| 4b | Alice | `MlsGroup::add_members` (Commit + Welcome, epoch 0 -> 1) | `fetchKeyPackage`, `enqueue` |
| 5 | Bob | `MlsGroup::new_from_welcome` (HPKE decrypt, epoch 1) | `fetch` |
| 6 | Alice | `MlsGroup::create_message` (AES-128-GCM encrypt) | `enqueue` |
| 7 | Bob | `MlsGroup::process_message` (AES-128-GCM decrypt) | `fetch` |
| 8 | Bob | `MlsGroup::create_message` (AES-128-GCM encrypt) | `enqueue` |
| 9 | Alice | `MlsGroup::process_message` (AES-128-GCM decrypt) | `fetch` |
The MLS ciphersuite used throughout is `MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`:
- **DHKEM(X25519, HKDF-SHA256)** for the HPKE key encapsulation in KeyPackages
- **AES-128-GCM** for symmetric encryption of application messages
- **SHA-256** for the key schedule hash function
- **Ed25519** for signing KeyPackages, Commits, and leaf nodes
---
## Troubleshooting
### `join` fails with "HPKE init key not found"
This happens when the key store file (`.ks`) was deleted or when `join` is run with a different `--state` path than `register-state`. The HPKE init private key generated during KeyPackage creation must be available at join time. Solution: use the same `--state` path for both `register-state` and `join`, and do not delete the `.ks` file between them.
### `invite` fails with "server returned empty KeyPackage for peer"
The peer has not registered yet, or their KeyPackage was already consumed by a previous `invite` or `fetch-key`. Ask the peer to run `register-state` again to upload a fresh KeyPackage.
### `join` fails with "no Welcome found in DS for this identity"
The Welcome message has not been enqueued yet (the inviter has not run `invite`), or it was already consumed by a previous `join`. Check that `invite` completed successfully before running `join`.
### TLS verification fails
Ensure the client has access to the server's TLS certificate. By default, both server and client use `data/server-cert.der`. If the server regenerated its certificate (e.g., after deleting the `data/` directory), clients must pick up the new certificate.
---
## Next steps
- [Running the Client](running-the-client.md) -- full CLI reference
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- how the MLS group operations work
- [GroupMember Lifecycle](../internals/group-member-lifecycle.md) -- internal state machine details
- [Delivery Service Internals](../internals/delivery-service.md) -- how the DS queues and delivers messages

View File

@@ -0,0 +1,196 @@
# Docker Deployment
quicnprotochat includes a multi-stage Dockerfile and a Docker Compose configuration for building and running the server in containers.
---
## Quick start
```bash
docker compose up
```
This builds the server image (if not already built) and starts a single `server` service listening on port `7000`. The server will generate a self-signed TLS certificate on first launch and begin accepting QUIC connections.
To rebuild after code changes:
```bash
docker compose up --build
```
To run in the background:
```bash
docker compose up -d
```
---
## Docker Compose configuration
The `docker-compose.yml` at the repository root defines a single service:
```yaml
services:
server:
build:
context: .
dockerfile: docker/Dockerfile
ports:
- "7000:7000"
environment:
RUST_LOG: "info"
QUICNPROTOCHAT_LISTEN: "0.0.0.0:7000"
healthcheck:
test: ["CMD", "bash", "-c", "echo '' > /dev/tcp/localhost/7000"]
interval: 5s
timeout: 3s
retries: 10
start_period: 10s
restart: unless-stopped
```
### Port mapping
The container exposes port `7000` (QUIC/UDP). The `ports` directive maps host port `7000` to the container's `7000`. Note that QUIC uses UDP, so ensure your firewall allows UDP traffic on this port.
### Health check
The health check uses a TCP connection probe (`/dev/tcp/localhost/7000`). While QUIC is a UDP protocol, the TCP probe verifies that the process is running and the port is bound. A QUIC-aware health check (e.g., using the client's `ping` command) would be more precise but requires the client binary in the runtime image.
### Restart policy
`restart: unless-stopped` ensures the server restarts automatically after crashes but stays stopped if you explicitly `docker compose stop` or `docker compose down`.
---
## Multi-stage Docker build
The Dockerfile at `docker/Dockerfile` uses a two-stage build to produce a minimal runtime image.
### Stage 1: Builder (`rust:bookworm`)
```dockerfile
FROM rust:bookworm AS builder
RUN apt-get update \
&& apt-get install -y --no-install-recommends capnproto \
&& rm -rf /var/lib/apt/lists/*
```
Key steps:
1. **Base image**: `rust:bookworm` (Debian Bookworm with the Rust toolchain pre-installed).
2. **Install `capnproto`**: Required by `quicnprotochat-proto/build.rs` to compile `.capnp` schemas at build time.
3. **Copy manifests first**: `Cargo.toml` and `Cargo.lock` are copied before source code. Dummy `main.rs` / `lib.rs` stubs are created so that `cargo build` can resolve and cache the dependency graph. This ensures that dependency compilation is cached in a separate Docker layer -- subsequent builds that only change source code skip the dependency compilation step entirely.
4. **Copy schemas**: The `schemas/` directory is copied before the dependency build because `quicnprotochat-proto/build.rs` requires the `.capnp` files during compilation.
5. **Copy real source and build**: After the dependency cache layer, real source files are copied in and `cargo build --release` is run.
### Stage 2: Runtime (`debian:bookworm-slim`)
```dockerfile
FROM debian:bookworm-slim AS runtime
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /build/target/release/quicnprotochat-server /usr/local/bin/quicnprotochat-server
EXPOSE 7000
ENV RUST_LOG=info \
QUICNPROTOCHAT_LISTEN=0.0.0.0:7000
USER nobody
CMD ["quicnprotochat-server"]
```
Key characteristics:
- **Minimal image**: No Rust toolchain, no `capnp` compiler, no build artifacts.
- **`ca-certificates`**: Included for future HTTPS calls (e.g., ACME certificate provisioning or key sync endpoints).
- **Non-root execution**: The container runs as `nobody` for defense in depth.
- **Default port**: The Dockerfile defaults to port `7000` via `QUICNPROTOCHAT_LISTEN`, but the `docker-compose.yml` overrides this to `7000` for consistency with the development workflow.
> **Note**: The `EXPOSE 7000` directive in the Dockerfile and the `QUICNPROTOCHAT_LISTEN=0.0.0.0:7000` override in `docker-compose.yml` mean the effective listen port is `7000` when using Compose. If you run the Docker image directly without Compose, the server will listen on `7000` by default.
---
## Volume persistence
The server stores its state (TLS certificates, KeyPackages, delivery queues, hybrid keys) in the data directory (default `data/`). To persist this data across container restarts, mount a volume:
```yaml
services:
server:
# ... existing config ...
volumes:
- server-data:/data
environment:
QUICNPROTOCHAT_DATA_DIR: "/data"
volumes:
server-data:
```
Or use a bind mount for easier inspection:
```bash
docker compose run \
-v $(pwd)/server-data:/data \
-e QUICNPROTOCHAT_DATA_DIR=/data \
server
```
Without a volume, all server state (including TLS certificates and message queues) is lost when the container is removed. The server will generate a new self-signed certificate on each fresh start, which means clients will need the new certificate to connect.
---
## Building just the image
To build the Docker image without starting a container:
```bash
docker build -t quicnprotochat-server -f docker/Dockerfile .
```
To run it manually:
```bash
docker run -d \
--name quicnprotochat \
-p 7000:7000/udp \
-e QUICNPROTOCHAT_LISTEN=0.0.0.0:7000 \
-e RUST_LOG=info \
quicnprotochat-server
```
Note the `/udp` suffix on the port mapping -- QUIC runs over UDP.
---
## Connecting the client to a containerised server
When the server runs in Docker with `docker compose up`, the client can connect from the host:
```bash
# Extract the server's TLS cert from the container
docker compose cp server:/data/server-cert.der ./data/server-cert.der
# Connect
cargo run -p quicnprotochat-client -- ping \
--ca-cert ./data/server-cert.der \
--server-name localhost
```
If you mounted a volume (e.g., `./server-data:/data`), the certificate is directly accessible at `./server-data/server-cert.der`.
---
## Next steps
- [Running the Server](running-the-server.md) -- server configuration without Docker
- [Running the Client](running-the-client.md) -- CLI subcommands
- [Demo Walkthrough](demo-walkthrough.md) -- step-by-step messaging scenario

View File

@@ -0,0 +1,101 @@
# Prerequisites
Before building quicnprotochat you need a Rust toolchain and the Cap'n Proto schema compiler. Docker is optional but useful for reproducible builds and deployment.
---
## Rust toolchain
**Minimum supported Rust version: 1.77+ (stable)**
quicnprotochat uses the 2021 edition and workspace resolver v2. Any stable Rust release from 1.77 onward should work. Install or update via [rustup](https://rustup.rs/):
```bash
# Install rustup (if not already present)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Ensure you are on a recent stable release
rustup update stable
rustup default stable
# Verify
rustc --version # should print 1.77.0 or later
cargo --version
```
The workspace depends on several crates that use procedural macros (`serde_derive`, `clap_derive`, `tls_codec_derive`, `thiserror`). These compile during the build step and require no additional system libraries beyond what `rustc` ships.
---
## Cap'n Proto compiler (`capnp`)
The `quicnprotochat-proto` crate runs a `build.rs` script that invokes the `capnp` binary at compile time to generate Rust types from the `.capnp` schema files in `schemas/`. The `capnp` binary must be on your `PATH`.
### Debian / Ubuntu
```bash
sudo apt-get update
sudo apt-get install -y capnproto
```
### macOS (Homebrew)
```bash
brew install capnp
```
### Verify installation
```bash
capnp --version
# Expected output: Cap'n Proto version X.Y.Z
```
If `capnp` is not found, the build will fail with an error from `capnpc::CompilerCommand`:
```
Cap'n Proto schema compilation failed. Is `capnp` installed?
(apt-get install capnproto / brew install capnp)
```
See [Building from Source -- Troubleshooting](building.md#troubleshooting) for more details.
### Other platforms
| Platform | Install command |
|---|---|
| Fedora / RHEL | `dnf install capnproto` |
| Arch Linux | `pacman -S capnproto` |
| Nix | `nix-env -iA nixpkgs.capnproto` |
| Windows (vcpkg) | `vcpkg install capnproto` |
| From source | [capnproto.org/install.html](https://capnproto.org/install.html) |
---
## Optional: Docker and Docker Compose
If you prefer to build and run quicnprotochat in containers, you will need:
- **Docker Engine** 20.10+ (or Docker Desktop)
- **Docker Compose** v2+ (the `docker compose` plugin, not the legacy `docker-compose` binary)
```bash
docker --version # 20.10+
docker compose version # v2+
```
The provided `docker/Dockerfile` is a multi-stage build that installs `capnproto` in the builder stage, so you do **not** need the `capnp` binary on your host when building via Docker.
See [Docker Deployment](docker.md) for full instructions.
---
## Summary checklist
| Dependency | Required? | How to check |
|---|---|---|
| Rust stable 1.77+ | Yes | `rustc --version` |
| `capnp` CLI | Yes (host builds) | `capnp --version` |
| Docker + Compose | No (container builds only) | `docker --version` / `docker compose version` |
Once all prerequisites are satisfied, proceed to [Building from Source](building.md).

View File

@@ -0,0 +1,285 @@
# Running the Client
The quicnprotochat CLI client provides subcommands for connectivity testing, identity registration, KeyPackage exchange, and persistent group messaging. All commands connect to the server over QUIC + TLS 1.3 and issue Cap'n Proto RPC calls against the `NodeService` endpoint.
---
## Global flags
These flags apply to every subcommand:
| Flag | Env var | Default | Purpose |
|---|---|---|---|
| `--ca-cert` | `QUICNPROTOCHAT_CA_CERT` | `data/server-cert.der` | Path to the server's TLS certificate (DER format). The client uses this to verify the server's identity during the TLS handshake. |
| `--server-name` | `QUICNPROTOCHAT_SERVER_NAME` | `localhost` | Expected TLS server name. Must match a SAN in the server's certificate. |
Most subcommands also accept `--server` (default `127.0.0.1:7000`) to specify the server address.
---
## Connectivity
### `ping`
Send a health probe to the server and print the round-trip time.
```bash
cargo run -p quicnprotochat-client -- ping
```
```bash
cargo run -p quicnprotochat-client -- ping --server 192.168.1.10:7000
```
**Output:**
```
health=ok rtt=3ms
```
This exercises the full QUIC + TLS 1.3 connection setup plus a single Cap'n Proto `health()` RPC call. Useful for verifying that the server is reachable and TLS verification succeeds.
---
## Ephemeral identity commands
These commands generate a fresh identity keypair in memory each time they run. The identity is not persisted and is discarded when the process exits. They are useful for quick tests and for the automated `demo-group` scenario.
### `register`
Generate a fresh Ed25519 identity, create an MLS KeyPackage, and upload it to the Authentication Service.
```bash
cargo run -p quicnprotochat-client -- register
```
**Output:**
```
identity_key : a1b2c3d4e5f6... (64 hex chars = 32 bytes)
fingerprint : 9f8e7d6c5b4a... (SHA-256 of the KeyPackage)
KeyPackage uploaded successfully.
```
Share the `identity_key` value with peers who want to add you to a group. They will pass it to `fetch-key` or `invite --peer-key`.
### `fetch-key <identity_key>`
Fetch a peer's KeyPackage from the Authentication Service by their Ed25519 public key.
```bash
cargo run -p quicnprotochat-client -- fetch-key a1b2c3d4e5f6...
```
The `identity_key` argument must be exactly 64 lowercase hex characters (32 bytes).
**Output (success):**
```
fingerprint : 9f8e7d6c5b4a...
package_len : 742 bytes
KeyPackage fetched successfully.
```
**Output (no KeyPackage available):**
```
No KeyPackage available for this identity.
```
KeyPackages are single-use: fetching a KeyPackage atomically removes it from the server. If the peer needs to be added to another group, they must upload a new KeyPackage.
### `demo-group`
Run a complete Alice-and-Bob MLS round-trip against a live server. Both identities are created in-process; both communicate through the server's AS and DS.
```bash
cargo run -p quicnprotochat-client -- demo-group --server 127.0.0.1:7000
```
**Output:**
```
Alice -> Bob plaintext: hello bob
Bob -> Alice plaintext: hello alice
demo-group complete
```
This is the fastest way to verify that the entire stack (QUIC + TLS + Cap'n Proto RPC + MLS group operations + DS relay) is working end to end. For a detailed breakdown of what happens during `demo-group`, see the [Demo Walkthrough](demo-walkthrough.md).
---
## Persistent group commands
These commands use a state file (`--state`, default `quicnprotochat-state.bin`) to persist the Ed25519 identity seed and MLS group state between invocations. A companion key store file (same path with `.ks` extension) holds HPKE init private keys.
All persistent commands share the `--state` flag:
| Flag | Env var | Default |
|---|---|---|
| `--state` | `QUICNPROTOCHAT_STATE` | `quicnprotochat-state.bin` |
| `--server` | `QUICNPROTOCHAT_SERVER` | `127.0.0.1:7000` |
### `register-state`
Create or load a persistent identity, generate a KeyPackage, and upload it to the AS.
```bash
cargo run -p quicnprotochat-client -- register-state \
--state alice.bin \
--server 127.0.0.1:7000
```
If `alice.bin` does not exist, a new identity is generated and saved. If it already exists, the existing identity is loaded and a new KeyPackage is generated from it.
**Output:**
```
identity_key : a1b2c3d4e5f6...
fingerprint : 9f8e7d6c5b4a...
KeyPackage uploaded successfully.
```
### `create-group`
Create a new MLS group. The caller becomes the sole member at epoch 0.
```bash
cargo run -p quicnprotochat-client -- create-group \
--state alice.bin \
--group-id "project-chat"
```
**Output:**
```
group created: project-chat
```
The group state is saved to the state file. You can now invite peers with `invite`.
### `invite`
Fetch a peer's KeyPackage from the AS, add them to the group, and deliver the Welcome message via the DS.
```bash
cargo run -p quicnprotochat-client -- invite \
--state alice.bin \
--peer-key b9a8c7d6e5f4... \
--server 127.0.0.1:7000
```
This command performs three operations in sequence:
1. Fetches the peer's KeyPackage from the AS (`fetchKeyPackage` RPC).
2. Calls `add_member()` on the local MLS group, producing a Commit and a Welcome.
3. Enqueues the Welcome to the DS for the peer's identity key (`enqueue` RPC).
**Output:**
```
invited peer (welcome queued)
```
### `join`
Join a group by consuming a Welcome message from the DS.
```bash
cargo run -p quicnprotochat-client -- join \
--state bob.bin \
--server 127.0.0.1:7000
```
The command fetches all pending messages for the local identity from the DS and expects to find a Welcome. The Welcome is processed by `MlsGroup::new_from_welcome()`, which decrypts it using the HPKE init private key stored in the key store.
**Output:**
```
joined group successfully
```
### `send`
Encrypt and send an application message to a peer via the DS.
```bash
cargo run -p quicnprotochat-client -- send \
--state alice.bin \
--peer-key b9a8c7d6e5f4... \
--msg "hello from alice" \
--server 127.0.0.1:7000
```
The message is encrypted as an MLS `PrivateMessage` using the current epoch's key schedule, then enqueued to the DS for the specified recipient.
**Output:**
```
message sent
```
### `recv`
Receive and decrypt all pending messages from the DS.
```bash
cargo run -p quicnprotochat-client -- recv \
--state bob.bin \
--server 127.0.0.1:7000
```
**Output:**
```
[0] plaintext: hello from alice
```
Additional flags:
| Flag | Default | Purpose |
|---|---|---|
| `--wait-ms` | `0` | Long-poll timeout in milliseconds. If no messages are queued, wait up to this long before returning. Uses the `fetchWait` RPC. |
| `--stream` | `false` | Continuously long-poll for messages. The process will not exit until interrupted. |
```bash
# Wait up to 5 seconds for messages
cargo run -p quicnprotochat-client -- recv \
--state bob.bin \
--wait-ms 5000
# Stream messages continuously
cargo run -p quicnprotochat-client -- recv \
--state bob.bin \
--stream --wait-ms 10000
```
---
## HPKE init key lifecycle warning
The MLS protocol requires that the HPKE init private key generated during KeyPackage creation is available when processing the corresponding Welcome message. In quicnprotochat, this private key is stored in the key store file (`.ks` extension alongside the state file).
**The same state file and key store must be used for both `register-state` and `join`.** If you:
- Run `register-state` with `--state bob.bin` (which generates `bob.ks`)
- Delete or move `bob.ks` before running `join`
- Or use a different `--state` path for `join`
...then `join` will fail because the HPKE init private key cannot be found.
In ephemeral mode (`register` and `demo-group`), the key is held in process memory and is only valid for the lifetime of that process.
---
## Command reference summary
| Command | Persistent? | Description |
|---|---|---|
| `ping` | No | Health check, prints RTT |
| `register` | No | Generate ephemeral identity + KeyPackage, upload to AS |
| `fetch-key <hex>` | No | Fetch a peer's KeyPackage from AS |
| `demo-group` | No | Automated Alice-and-Bob round-trip |
| `register-state` | Yes | Upload KeyPackage for persistent identity |
| `create-group` | Yes | Create MLS group (sole member, epoch 0) |
| `invite` | Yes | Add peer to group, deliver Welcome via DS |
| `join` | Yes | Consume Welcome from DS, join group |
| `send` | Yes | Encrypt and enqueue application message via DS |
| `recv` | Yes | Fetch, decrypt, and display pending messages |
---
## Next steps
- [Demo Walkthrough](demo-walkthrough.md) -- step-by-step narrative with two terminals
- [Running the Server](running-the-server.md) -- server configuration and TLS setup
- [MLS (RFC 9420)](../protocol-layers/mls.md) -- how MLS group operations work under the hood

View File

@@ -0,0 +1,166 @@
# Running the Server
The quicnprotochat server is a single binary (`quicnprotochat-server`) that exposes a unified **NodeService** endpoint combining Authentication Service (KeyPackage management) and Delivery Service (message relay) operations over a single QUIC + TLS 1.3 connection.
---
## Quick start
```bash
cargo run -p quicnprotochat-server
```
On first launch the server will:
1. Create the `data/` directory if it does not exist.
2. Generate a self-signed TLS certificate and private key (`data/server-cert.der`, `data/server-key.der`) with SANs `localhost`, `127.0.0.1`, and `::1`.
3. Open a QUIC endpoint on `0.0.0.0:7000`.
4. Begin accepting connections.
You should see output similar to:
```
2025-01-01T00:00:00.000000Z INFO quicnprotochat_server: generated self-signed TLS certificate cert="data/server-cert.der" key="data/server-key.der"
2025-01-01T00:00:00.000000Z INFO quicnprotochat_server: accepting QUIC connections addr="0.0.0.0:7000"
```
---
## Configuration
All configuration is available via CLI flags and environment variables. Environment variables take precedence when both are specified.
| Purpose | CLI flag | Env var | Default |
|---|---|---|---|
| QUIC listen address | `--listen` | `QUICNPROTOCHAT_LISTEN` | `0.0.0.0:7000` |
| TLS certificate (DER) | `--tls-cert` | `QUICNPROTOCHAT_TLS_CERT` | `data/server-cert.der` |
| TLS private key (DER) | `--tls-key` | `QUICNPROTOCHAT_TLS_KEY` | `data/server-key.der` |
| Data directory | `--data-dir` | `QUICNPROTOCHAT_DATA_DIR` | `data` |
| Log level | -- | `RUST_LOG` | `info` |
### Examples
```bash
# Listen on a custom port
cargo run -p quicnprotochat-server -- --listen 0.0.0.0:9000
# Use pre-existing TLS credentials
cargo run -p quicnprotochat-server -- \
--tls-cert /etc/quicnprotochat/cert.der \
--tls-key /etc/quicnprotochat/key.der
# Via environment variables
QUICNPROTOCHAT_LISTEN=0.0.0.0:9000 \
RUST_LOG=debug \
cargo run -p quicnprotochat-server
```
---
## TLS certificate handling
### Self-signed certificate auto-generation
If the files at `--tls-cert` and `--tls-key` do not exist when the server starts, it generates a self-signed certificate using the `rcgen` crate. The generated certificate includes three Subject Alternative Names:
- `localhost`
- `127.0.0.1`
- `::1`
The certificate and key are written in DER format. Parent directories are created automatically.
### Using your own certificate
To use a certificate issued by a CA or a custom self-signed certificate:
1. Convert your certificate and key to DER format if they are in PEM:
```bash
openssl x509 -in cert.pem -outform DER -out cert.der
openssl pkcs8 -topk8 -inform PEM -outform DER -in key.pem -out key.der -nocrypt
```
2. Point the server at them:
```bash
cargo run -p quicnprotochat-server -- \
--tls-cert cert.der \
--tls-key key.der
```
3. Distribute the certificate (or its CA root) to clients so they can verify the server. The client's `--ca-cert` flag accepts a DER file.
### TLS configuration details
The server's TLS stack is configured as follows:
- **Protocol versions**: TLS 1.3 only (`rustls::version::TLS13`). TLS 1.2 and below are rejected.
- **Client authentication**: Disabled (`with_no_client_auth()`). The server does not request a client certificate. Client identity is established at the MLS layer via Ed25519 credentials, not at the TLS layer.
- **ALPN**: The server advertises `b"capnp"` as the application-layer protocol.
---
## ALPN negotiation
Both the server and client must agree on the ALPN token `b"capnp"` during the TLS handshake. This token is hardcoded in the server's TLS configuration:
```rust
tls.alpn_protocols = vec![b"capnp".to_vec()];
```
If a client connects with a different (or no) ALPN token, the QUIC handshake will fail with an ALPN mismatch error.
---
## Storage
The server persists its state to the data directory (`--data-dir`, default `data/`):
| File | Contents |
|---|---|
| `data/server-cert.der` | TLS certificate (DER) |
| `data/server-key.der` | TLS private key (DER) |
| `data/keypackages.bin` | `bincode`-serialised map of identity keys to KeyPackage queues |
| `data/deliveries.bin` | `bincode`-serialised map of `(channelId, recipientKey)` to message queues |
| `data/hybridkeys.bin` | `bincode`-serialised map of identity keys to hybrid (X25519 + ML-KEM-768) public keys |
Storage is implemented by the `FileBackedStore` in `crates/quicnprotochat-server/src/storage.rs`. Every mutation (upload, enqueue, fetch) flushes the entire map to disk synchronously. This is suitable for proof-of-concept workloads but not production traffic. See [Storage Backend](../internals/storage-backend.md) for details.
---
## Connection handling
Each incoming QUIC connection is handled in a `tokio::task::spawn_local` task on a shared `LocalSet`. The `capnp-rpc` library uses `Rc<RefCell<>>` internally, making it `!Send`, which is why all RPC tasks must run on a `LocalSet` rather than being spawned with `tokio::spawn`.
The connection lifecycle:
1. Accept incoming QUIC connection.
2. Complete TLS 1.3 handshake.
3. Accept a bidirectional QUIC stream.
4. Wrap the stream in a `capnp_rpc::twoparty::VatNetwork`.
5. Bootstrap a `NodeService` RPC endpoint.
6. Serve requests until the client disconnects or an error occurs.
---
## Logging
The server uses `tracing` with `tracing-subscriber` and respects the `RUST_LOG` environment variable:
```bash
# Default: info level
RUST_LOG=info cargo run -p quicnprotochat-server
# Debug level for detailed RPC tracing
RUST_LOG=debug cargo run -p quicnprotochat-server
# Trace level for maximum verbosity
RUST_LOG=trace cargo run -p quicnprotochat-server
# Filter to specific crates
RUST_LOG=quicnprotochat_server=debug,quinn=warn cargo run -p quicnprotochat-server
```
---
## Next steps
- [Running the Client](running-the-client.md) -- connect to the server and exercise the CLI
- [Demo Walkthrough](demo-walkthrough.md) -- step-by-step Alice-and-Bob group messaging scenario
- [Service Architecture](../architecture/service-architecture.md) -- how the NodeService combines AS and DS

View File

@@ -0,0 +1,279 @@
# Authentication Service Internals
The Authentication Service (AS) stores and distributes single-use MLS
KeyPackages. It is one of the two logical services exposed through the unified
`NodeService` RPC interface. The AS also stores hybrid (X25519 + ML-KEM-768)
public keys for post-quantum envelope encryption.
This page covers the server-side implementation of KeyPackage storage, the
`Auth` struct validation logic, and the hybrid key endpoints.
**Sources:**
- `crates/quicnprotochat-server/src/main.rs` (RPC handlers, auth validation)
- `crates/quicnprotochat-server/src/storage.rs` (FileBackedStore)
- `schemas/node.capnp` (wire schema)
---
## KeyPackage Storage
### Data Model
KeyPackages are stored in a `FileBackedStore` using a `Mutex`-protected
`HashMap`:
```text
key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>
^ ^
| |
identity_key FIFO queue of
(32-byte Ed25519 TLS-encoded
public key) KeyPackage bytes
```
Each identity can have multiple KeyPackages queued. This is essential because
KeyPackages are single-use (per RFC 9420): once fetched by a peer, they are
permanently removed. Clients should upload several KeyPackages to handle
concurrent group invitations.
The map is persisted to `data/keypackages.bin` using bincode serialization,
wrapped in the `QueueMapV1` struct. See [Storage Backend](storage-backend.md)
for persistence details.
### uploadKeyPackage
```capnp
uploadKeyPackage @0 (identityKey :Data, package :Data, auth :Auth)
-> (fingerprint :Data);
```
**Handler logic:**
1. **Parse parameters.** Extract `identityKey`, `package`, and `auth`.
2. **Validate auth.** Call `validate_auth()` (see [Auth Validation](#auth-validation)
below).
3. **Validate inputs:**
| Check | Constraint | Error Message |
|-------|------------|---------------|
| Identity key length | Exactly 32 bytes | `"identityKey must be exactly 32 bytes, got {n}"` |
| Package non-empty | `package.len() > 0` | `"package must not be empty"` |
| Package size cap | `package.len() <= 1,048,576` | `"package exceeds max size (1048576 bytes)"` |
4. **Compute fingerprint.** `SHA-256(package_bytes)` produces a 32-byte digest.
5. **Store.** `FileBackedStore::upload_key_package(identity_key, package)` pushes
the package to the back of the identity's `VecDeque` and flushes to disk.
6. **Return fingerprint.** The SHA-256 hash is set in the response.
The fingerprint allows the uploading client to verify that the server stored the
exact bytes it sent. See [KeyPackage Exchange Flow](keypackage-exchange.md) for
the client-side verification logic.
### fetchKeyPackage
```capnp
fetchKeyPackage @1 (identityKey :Data, auth :Auth) -> (package :Data);
```
**Handler logic:**
1. **Parse and validate** `identityKey` (32 bytes) and `auth`.
2. **Pop from queue.** `FileBackedStore::fetch_key_package(identity_key)` calls
`VecDeque::pop_front()` on the identity's queue, removing and returning the
oldest KeyPackage. The updated map is flushed to disk.
3. **Return.** If a KeyPackage was available, set it in the response. If the
queue was empty (or the identity has no entry), return empty `Data`.
**Single-use semantics:** The `pop_front()` operation ensures each KeyPackage is
returned exactly once. This is critical for MLS security -- reusing a KeyPackage
would allow conflicting group states. The removal is atomic with respect to the
`Mutex` lock, so concurrent fetch requests will not receive the same package.
**Empty response handling:** The client checks `package.is_empty()` to
distinguish between "no packages available" and "package fetched." An empty
response is not an error -- it means the target identity has exhausted their
KeyPackage supply and needs to upload more.
---
## Auth Validation
All `NodeService` RPC methods accept an `Auth` struct:
```capnp
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based
accessToken @1 :Data; # opaque bearer token
deviceId @2 :Data; # optional UUID for auditing
}
```
The server validates this struct through the `validate_auth` function:
```text
validate_auth(cfg, auth)
|
+-- version == 0?
| +-- cfg.allow_legacy_v0 == true? -> OK
| +-- cfg.allow_legacy_v0 == false? -> ERROR "auth version 0 disabled"
|
+-- version == 1?
| +-- accessToken empty? -> ERROR "requires non-empty accessToken"
| +-- cfg.required_token is Some?
| | +-- token matches? -> OK
| | +-- token mismatch? -> ERROR "invalid accessToken"
| +-- cfg.required_token is None? -> OK (any non-empty token accepted)
|
+-- version >= 2? -> ERROR "unsupported auth version"
```
### AuthConfig
The server's auth behavior is controlled by `AuthConfig`:
```rust
struct AuthConfig {
required_token: Option<Vec<u8>>, // None = accept any token
allow_legacy_v0: bool, // true = accept version 0 (no auth)
}
```
Configured via CLI flags / environment variables:
| Flag / Env Var | Default | Purpose |
|-----------------------------------|---------|---------|
| `--auth-token` / `QUICNPROTOCHAT_AUTH_TOKEN` | None | Required bearer token. If unset, any non-empty token is accepted for version 1. |
| `--allow-auth-v0` / `QUICNPROTOCHAT_ALLOW_AUTH_V0` | `true` | Whether to accept `auth.version=0` (legacy, unauthenticated) requests. |
### Version Semantics
| Version | Meaning | Token Required? |
|---------|---------|-----------------|
| 0 | Legacy / unauthenticated | No. Token is ignored. Server must have `allow_legacy_v0 = true`. |
| 1 | Token-based authentication | Yes. Must be non-empty. Must match `required_token` if configured. |
| 2+ | Reserved for future use | Rejected. |
### Current Limitations
The current auth implementation is intentionally minimal:
- **No identity binding.** The access token is not tied to a specific Ed25519
identity. Any valid token can upload or fetch KeyPackages for any identity.
- **No rate limiting.** There is no per-identity or per-IP rate limiting.
- **No token rotation.** Tokens are static strings configured at server startup.
- **No device management.** The `deviceId` field is accepted but not used for
authorization decisions.
The [Auth, Devices, and Tokens](../roadmap/authz-plan.md) roadmap item
addresses these gaps with a proper token issuance and validation system.
---
## Hybrid Key Endpoints
The AS also stores hybrid (X25519 + ML-KEM-768) public keys for post-quantum
envelope encryption. Unlike KeyPackages, hybrid keys are **not single-use** --
they are stored persistently and can be fetched multiple times.
### uploadHybridKey
```capnp
uploadHybridKey @6 (identityKey :Data, hybridPublicKey :Data) -> ();
```
**Handler logic:**
1. Validate `identityKey` (32 bytes) and `hybridPublicKey` (non-empty).
2. `FileBackedStore::upload_hybrid_key(identity_key, hybrid_pk)` stores the key,
overwriting any previous value for this identity.
3. Flushes to `data/hybridkeys.bin`.
The storage model is simpler than KeyPackages: a flat
`HashMap<Vec<u8>, Vec<u8>>` (identity key to hybrid public key bytes). There is
no queue -- each identity has at most one hybrid public key.
### fetchHybridKey
```capnp
fetchHybridKey @7 (identityKey :Data) -> (hybridPublicKey :Data);
```
**Handler logic:**
1. Validate `identityKey` (32 bytes).
2. Look up the hybrid public key in the store. Unlike `fetchKeyPackage`, this
does **not** remove the key -- it can be fetched repeatedly.
3. Return the key bytes, or empty `Data` if none is stored.
See [Hybrid KEM](../protocol-layers/hybrid-kem.md) for how the client uses
these keys to wrap MLS payloads in post-quantum envelopes.
---
## NodeServiceImpl Structure
The server-side implementation struct:
```rust
struct NodeServiceImpl {
store: Arc<FileBackedStore>, // shared across connections
waiters: Arc<DashMap<Vec<u8>, Arc<Notify>>>, // long-poll notification
auth_cfg: Arc<AuthConfig>, // auth policy
}
```
All connections share the same `store` and `waiters` via `Arc`. The
`DashMap<Vec<u8>, Arc<Notify>>` is keyed by recipient key and provides the
push-notification mechanism for `fetchWait`. See
[Delivery Service Internals](delivery-service.md) for the long-polling
implementation.
---
## Connection Model
```text
QUIC endpoint (port 7000)
+-- TLS 1.3 handshake (self-signed cert by default)
+-- Accept bidirectional stream
+-- capnp-rpc VatNetwork (Side::Server)
+-- NodeServiceImpl { store, waiters, auth_cfg }
```
Each QUIC connection opens one bidirectional stream for Cap'n Proto RPC. The
`capnp-rpc` crate uses `Rc<RefCell<>>` internally, making it `!Send`. All RPC
tasks run on a `tokio::task::LocalSet` to satisfy this constraint.
The server generates a self-signed TLS certificate on first start if no
certificate files exist. Certificate and key paths are configurable via
`--tls-cert` and `--tls-key`.
---
## Health Endpoint
```capnp
health @5 () -> (status :Text);
```
A simple readiness probe. Returns `"ok"` unconditionally. No auth validation is
performed. Useful for infrastructure health checks and measuring QUIC round-trip
time.
---
## Related Pages
- [KeyPackage Exchange Flow](keypackage-exchange.md) -- end-to-end upload and fetch flow including client-side logic
- [Delivery Service Internals](delivery-service.md) -- the DS half of NodeService
- [Storage Backend](storage-backend.md) -- FileBackedStore persistence model
- [GroupMember Lifecycle](group-member-lifecycle.md) -- how KeyPackages are generated and consumed
- [Auth, Devices, and Tokens](../roadmap/authz-plan.md) -- planned auth improvements
- [NodeService Schema](../wire-format/node-service-schema.md) -- Cap'n Proto schema reference
- [Hybrid KEM](../protocol-layers/hybrid-kem.md) -- post-quantum envelope encryption

View File

@@ -0,0 +1,337 @@
# Delivery Service Internals
The Delivery Service (DS) is a store-and-forward relay for opaque MLS payloads.
It never inspects, decrypts, or validates MLS ciphertext -- it routes solely by
recipient identity key and channel identifier. The DS exposes three operations
through the `NodeService` RPC interface: `enqueue`, `fetch`, and `fetchWait`.
**Sources:**
- `crates/quicnprotochat-server/src/main.rs` (RPC handlers)
- `crates/quicnprotochat-server/src/storage.rs` (queue storage)
- `schemas/node.capnp` (wire schema)
---
## Architecture
```text
NodeService (port 7000)
=======================
enqueue(recipientKey, payload, channelId)
|
v
+---------------------------------------------------------+
| FileBackedStore |
| |
| deliveries: Mutex<HashMap<ChannelKey, VecDeque<Vec<u8>>>>|
| ^ ^ |
| | | |
| ChannelKey { FIFO queue of |
| channel_id, opaque payload |
| recipient_key bytes |
| } |
| |
| Persisted to: data/deliveries.bin (bincode, V2 format) |
+---------------------------------------------------------+
|
v
notify_waiters() --> DashMap<Vec<u8>, Arc<Notify>>
^
|
keyed by recipient_key
wakes blocked fetchWait calls
```
The DS is intentionally MLS-unaware. This design decision is documented in
[ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md).
From the server's perspective, every payload is an opaque blob -- it could be
a Welcome, a Commit, an application message, or a hybrid-encrypted envelope.
---
## Queue Model
### ChannelKey
Delivery queues are indexed by a compound key:
```rust
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)]
pub struct ChannelKey {
pub channel_id: Vec<u8>,
pub recipient_key: Vec<u8>,
}
```
| Field | Size | Purpose |
|-----------------|-------------|---------|
| `channel_id` | Variable (typically 16 bytes UUID or empty) | Isolates messages by conversation. Empty for legacy/default channel. |
| `recipient_key` | 32 bytes | Ed25519 public key of the intended recipient. |
The `ChannelKey` implements `Hash` manually, hashing `channel_id` followed by
`recipient_key`.
**Channel-aware routing** ensures that messages for different conversations do
not interfere with each other. A client fetching from channel A will not see
messages enqueued for channel B, even if both target the same recipient. For
legacy clients (or single-channel usage), `channel_id` is left empty.
### Queue Structure
Each `ChannelKey` maps to a `VecDeque<Vec<u8>>`:
```text
ChannelKey("chan-1", "alice-pk") -> [msg_1, msg_2, msg_3]
ChannelKey("chan-1", "bob-pk") -> [msg_4]
ChannelKey("chan-2", "alice-pk") -> [msg_5, msg_6]
ChannelKey("", "alice-pk") -> [msg_7] (legacy/default channel)
```
Messages within a queue are ordered FIFO (first-in, first-out). This preserves
MLS epoch ordering, which is critical: a recipient must process a Welcome before
application messages, and Commits in the order they were produced.
---
## RPC Operations
### enqueue
Appends a payload to the recipient's queue and wakes any blocked long-poll
waiters.
```capnp
enqueue @2 (recipientKey :Data, payload :Data, channelId :Data,
version :UInt16, auth :Auth) -> ();
```
**Handler logic:**
1. **Parse parameters.** Extract `recipientKey`, `payload`, `channelId`,
`version`, and `auth` from the Cap'n Proto request.
2. **Validate auth.** Call `validate_auth()` to check the `Auth` struct. See
[Authentication Service Internals](authentication-service.md) for auth
validation details.
3. **Validate inputs:**
| Field | Constraint | Error on Violation |
|----------------|-------------------------|--------------------|
| `recipientKey` | Exactly 32 bytes | `"recipientKey must be exactly 32 bytes, got {n}"` |
| `payload` | Non-empty | `"payload must not be empty"` |
| `payload` | At most 5 MB | `"payload exceeds max size (5242880 bytes)"` |
| `version` | 0 (legacy) or 1 (current) | `"unsupported wire version {v} (expected 0 or 1)"` |
4. **Store.** Call `FileBackedStore::enqueue(recipient_key, channel_id, payload)`,
which constructs a `ChannelKey` from the channel ID and recipient key, then
pushes the payload to the back of the corresponding `VecDeque`. The entire
delivery map is flushed to disk.
5. **Notify waiters.** Look up or create a `tokio::sync::Notify` for the
recipient key in `DashMap<Vec<u8>, Arc<Notify>>` and call
`notify_waiters()`. This wakes all `fetchWait` calls currently blocked on
this recipient.
### fetch
Atomically drains the entire queue for a recipient+channel and returns all
payloads.
```capnp
fetch @3 (recipientKey :Data, channelId :Data, version :UInt16, auth :Auth)
-> (payloads :List(Data));
```
**Handler logic:**
1. Parse and validate `recipientKey` (32 bytes), `version` (0 or 1), and
`auth`.
2. Call `FileBackedStore::fetch(recipient_key, channel_id)`, which:
- Constructs a `ChannelKey`.
- Calls `VecDeque::drain(..)` on the matching queue, collecting all messages.
- Flushes the updated (now empty) map to disk.
- Returns the drained messages as `Vec<Vec<u8>>`.
3. Build a `List(Data)` response with all the payload bytes.
**Important:** The drain is atomic with respect to the `Mutex` lock. No
interleaving with concurrent `enqueue` calls is possible. The returned list
preserves FIFO order.
### fetchWait (Long-Polling)
Combines `fetch` with a blocking wait. If the queue is empty, the server waits
for up to `timeoutMs` milliseconds for a new message to arrive.
```capnp
fetchWait @4 (recipientKey :Data, channelId :Data, version :UInt16,
timeoutMs :UInt64, auth :Auth) -> (payloads :List(Data));
```
**Handler logic:**
```text
1. validate inputs (same as fetch)
2. messages = store.fetch(recipient_key, channel_id)
3. if messages.is_empty() AND timeout_ms > 0:
a. waiter = waiters.entry(recipient_key).or_insert(Arc::new(Notify::new()))
b. tokio::time::timeout(Duration::from_millis(timeout_ms), waiter.notified()).await
c. messages = store.fetch(recipient_key, channel_id) // re-fetch after wake
4. return messages
```
The implementation uses `Promise::from_future(async move { ... })` because the
`tokio::time::timeout` call is async. This is the only DS handler that produces
an async `Promise`.
**Timeout behavior:**
- If `timeout_ms == 0`, `fetchWait` behaves identically to `fetch` (immediate
return).
- If a message arrives before the timeout, `notify_waiters()` from `enqueue`
wakes the `Notify`, and the handler re-fetches immediately.
- If the timeout expires without a message, the handler re-fetches (which will
return empty) and returns an empty list.
**Waiter model:** The `DashMap<Vec<u8>, Arc<Notify>>` is keyed by recipient key
(not by `ChannelKey`). This means a notification for any channel targeting the
same recipient will wake all blocked `fetchWait` calls for that recipient. This
is a deliberate simplification -- the re-fetch after waking will only return
messages from the requested channel, so cross-channel wake-ups result in a
no-op re-fetch rather than incorrect behavior.
---
## Version Validation
The `version` field in `enqueue`, `fetch`, and `fetchWait` enables future
schema evolution:
| Version | Meaning |
|---------|---------|
| 0 | Legacy (pre-versioning). `channelId` is treated as empty. |
| 1 | Current wire format. `channelId` is a meaningful field. |
| 2+ | Rejected with `"unsupported wire version"`. |
Both 0 and 1 are accepted on the server side. The constant
`CURRENT_WIRE_VERSION = 1` is used in validation:
```rust
if version != 0 && version != CURRENT_WIRE_VERSION {
return Promise::err(/* unsupported version */);
}
```
The client library always sends `version: 1` for new operations.
---
## Notification System
The waiter map provides a lightweight push-notification mechanism:
```text
enqueue() fetchWait()
| |
v v
store.enqueue(key, ch, payload) messages = store.fetch(key, ch)
| |
v | (if empty)
waiter = waiters.entry(key) v
.or_insert(Notify::new()) waiter = waiters.entry(key)
| .or_insert(Notify::new())
v |
waiter.notify_waiters() v
| timeout(duration, waiter.notified())
| |
+------- wakes ----------------------->+
|
v
messages = store.fetch(key, ch)
|
v
return messages
```
`tokio::sync::Notify` is a broadcast notification primitive. `notify_waiters()`
wakes all tasks currently awaiting `.notified()`. If no tasks are waiting, the
notification is lost (there is no stored permit in the `notify_waiters()` path).
This is acceptable because `fetchWait` always performs a fetch before blocking,
so messages that arrive before the wait begins are returned immediately.
---
## Data Flow Example: Two-Party Message Exchange
```text
Alice Server DS Bob
| | |
| encrypt("hello bob") | |
| -> ct_bytes | |
| | |
| enqueue(bob_pk, ct_bytes) | |
| -------------------------> | |
| | queue[("", bob_pk)] += ct |
| | notify_waiters(bob_pk) |
| | |
| | <--- fetchWait(bob_pk, 30s) |
| | (was blocked, now woken)|
| | |
| | drain queue[("", bob_pk)] |
| | ---- [ct_bytes] -----------> |
| | |
| | decrypt(ct_bytes) |
| | -> "hello bob" |
```
---
## Server Constants
| Constant | Value | Purpose |
|-------------------------|-----------|---------|
| `MAX_PAYLOAD_BYTES` | 5,242,880 (5 MB) | Maximum size of a single enqueued payload |
| `MAX_KEYPACKAGE_BYTES` | 1,048,576 (1 MB) | Maximum size of a KeyPackage (AS) |
| `CURRENT_WIRE_VERSION` | 1 | Current schema version; rejects > 1 |
---
## Persistence
Delivery queues are persisted to `data/deliveries.bin` using bincode
serialization. The V2 format uses `ChannelKey` as the map key:
```rust
#[derive(Serialize, Deserialize, Default)]
struct QueueMapV2 {
map: HashMap<ChannelKey, VecDeque<Vec<u8>>>,
}
```
On load, the server attempts V2 deserialization first. If that fails, it falls
back to V1 format (keyed by `Vec<u8>` recipient key only) and migrates in
memory by assigning empty `channel_id` to each entry:
```rust
for (recipient_key, queue) in legacy.map.into_iter() {
upgraded.insert(
ChannelKey { channel_id: Vec::new(), recipient_key },
queue,
);
}
```
See [Storage Backend](storage-backend.md) for the full persistence model.
---
## Related Pages
- [Authentication Service Internals](authentication-service.md) -- KeyPackage storage and retrieval
- [GroupMember Lifecycle](group-member-lifecycle.md) -- how `send_message()` and `receive_message()` produce and consume the payloads
- [Storage Backend](storage-backend.md) -- `FileBackedStore` persistence and migration
- [NodeService Schema](../wire-format/node-service-schema.md) -- Cap'n Proto schema reference
- [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md) -- design rationale
- [End-to-End Data Flow](../architecture/data-flow.md) -- sequence diagrams for registration, group creation, and messaging

View File

@@ -0,0 +1,316 @@
# GroupMember Lifecycle
The `GroupMember` struct in `quicnprotochat-core` is the core MLS state machine
that manages a single client's membership in an MLS group. It wraps an openmls
`MlsGroup`, a persistent crypto backend, and the long-term Ed25519 identity
keypair. Every MLS operation -- key package generation, group creation, member
addition, joining, sending, and receiving -- flows through this struct.
**Source:** `crates/quicnprotochat-core/src/group.rs`
---
## Struct Fields
```rust
pub struct GroupMember {
backend: StoreCrypto, // persistent crypto backend (key store + RustCrypto)
identity: Arc<IdentityKeypair>, // long-term Ed25519 signing keypair
group: Option<MlsGroup>, // active MLS group (None before create/join)
config: MlsGroupConfig, // shared group configuration
}
```
| Field | Type | Purpose |
|------------|-------------------------|---------|
| `backend` | `StoreCrypto` | Implements `OpenMlsCryptoProvider`. Couples a `RustCrypto` engine with a `DiskKeyStore` that holds HPKE init private keys. The backend is **persistent** -- the same instance must be used from `generate_key_package()` through `join_group()`. See [Storage Backend](storage-backend.md) for details on `DiskKeyStore`. |
| `identity` | `Arc<IdentityKeypair>` | The client's long-term Ed25519 keypair. Used as the MLS `Signer` for all group operations (signing Commits, KeyPackages, credentials). Also used to build the MLS `BasicCredential`. See [Ed25519 Identity Keys](../cryptography/identity-keys.md). |
| `group` | `Option<MlsGroup>` | `None` until the client creates or joins a group. Once set, all message operations (`send_message`, `receive_message`) operate on this group. |
| `config` | `MlsGroupConfig` | Shared configuration for all groups created by this member. Built once in the constructor. |
### MlsGroupConfig
The configuration is constructed as:
```rust
MlsGroupConfig::builder()
.use_ratchet_tree_extension(true)
.build()
```
Setting `use_ratchet_tree_extension = true` embeds the ratchet tree inside
Welcome messages (in the `GroupInfo` extension). This means `new_from_welcome`
can be called with `ratchet_tree = None` -- openmls extracts the tree from the
Welcome itself. This simplifies the protocol by eliminating the need for a
separate ratchet tree distribution mechanism.
---
## State Transition Diagram
```text
GroupMember::new(identity) -----> [No Group]
| group = None
|
+-- generate_key_package() --> [Has KeyPackage, waiting for Welcome]
| Returns TLS-encoded HPKE init key stored in backend
| KeyPackage bytes
|
+-- create_group(group_id) --> [Group Creator, epoch 0]
| group = Some(MlsGroup) Sole member of the group
| |
| +-- add_member(kp_bytes) --> [epoch N+1]
| Returns (commit_bytes, welcome_bytes)
| Pending commit merged locally
| Creator ready to encrypt immediately
|
+-- join_group(welcome_bytes) --> [Group Member, epoch N]
group = Some(MlsGroup) Joined via Welcome
|
+-- send_message(plaintext) --> encrypted PrivateMessage bytes
|
+-- receive_message(bytes) --> Some(plaintext) [ApplicationMessage]
| None [Commit or Proposal]
```
### Transitions in Detail
1. **`new(identity)`** -- Creates a `GroupMember` with an ephemeral
`DiskKeyStore` and no active group. The `StoreCrypto` backend is initialized
fresh. An alternative constructor, `new_with_state`, accepts a pre-existing
`DiskKeyStore` and optional serialized `MlsGroup` for session resumption.
2. **`generate_key_package()`** -- Generates a fresh single-use MLS KeyPackage.
The HPKE init private key is stored in `self.backend`'s key store. Returns
TLS-encoded KeyPackage bytes suitable for upload to the
[Authentication Service](authentication-service.md).
3. **`create_group(group_id)`** -- Creates a new MLS group where the caller
becomes the sole member at epoch 0. The `group_id` can be any non-empty byte
string (SHA-256 of a human-readable name is recommended).
4. **`add_member(key_package_bytes)`** -- Adds a peer using their TLS-encoded
KeyPackage. Produces a Commit and a Welcome. The Commit is merged locally
(advancing the epoch), so the creator is immediately ready to encrypt. The
caller is responsible for distributing the Welcome to the new member via the
[Delivery Service](delivery-service.md).
5. **`join_group(welcome_bytes)`** -- Joins an existing group from a TLS-encoded
Welcome message. The caller must have previously called
`generate_key_package()` on **this same instance** so the HPKE init private
key is available in the backend.
6. **`send_message(plaintext)`** -- Encrypts plaintext as an MLS Application
message (PrivateMessage variant). Returns TLS-encoded bytes for delivery.
7. **`receive_message(bytes)`** -- Processes an incoming MLS message. Returns
`Some(plaintext)` for application messages, `None` for Commits (which advance
the group epoch) and Proposals (which are stored for a future Commit).
---
## Critical Invariant: Backend Identity
The same `GroupMember` instance must be used from `generate_key_package()`
through `join_group()`. This is the most important invariant in the system.
**Why:** When `generate_key_package()` runs, openmls creates an HPKE key pair
and stores the private key in the `StoreCrypto` backend's in-memory key store
(the `DiskKeyStore`). When `join_group()` later processes the Welcome, openmls
calls `new_from_welcome`, which reads the HPKE init private key from the key
store to decrypt the Welcome's encrypted group secrets. If a different backend
instance is used, the private key will not be found, and `new_from_welcome` will
fail with a key-not-found error.
```text
generate_key_package() join_group(welcome)
| |
v v
KeyPackage::builder().build() MlsGroup::new_from_welcome()
| |
v v
backend.key_store().store( backend.key_store().read(
init_key_ref, hpke_private_key) init_key_ref) -> hpke_private_key
| |
+----------- MUST BE SAME BACKEND ------+
```
For persistent clients, the `DiskKeyStore::persistent(path)` constructor is used
so that the HPKE init keys survive process restarts. The client state file
stores the path alongside the identity seed and serialized group, and
`new_with_state` reconstructs the `GroupMember` with the persisted key store.
---
## Credential Construction
The `make_credential_with_key` helper builds the MLS `CredentialWithKey` used
for KeyPackage generation and group creation:
```rust
fn make_credential_with_key(&self) -> Result<CredentialWithKey, CoreError> {
let credential = Credential::new(
self.identity.public_key_bytes().to_vec(),
CredentialType::Basic,
)?;
Ok(CredentialWithKey {
credential,
signature_key: self.identity.public_key_bytes().to_vec().into(),
})
}
```
Key points:
- **Credential type:** `CredentialType::Basic` -- the simplest MLS credential
form, containing only the raw public key bytes.
- **Credential identity:** The raw 32-byte Ed25519 public key. This is what
peers use to identify the member within the group.
- **Signature key:** The same Ed25519 public key bytes, wrapped in the openmls
`SignaturePublicKey` type.
- **Signer:** The `IdentityKeypair` struct implements the openmls `Signer`
trait directly, so it can be passed to `KeyPackage::builder().build()` and
`MlsGroup::new_with_group_id()` without the external
`openmls_basic_credential` crate.
---
## MLS Ciphersuite
All operations use a single ciphersuite:
```text
MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519
```
This provides:
| Component | Algorithm | Security Level |
|---------------|--------------------|----------------|
| HPKE KEM | DHKEM(X25519) | 128-bit classical |
| AEAD | AES-128-GCM | 128-bit |
| KDF / Hash | SHA-256 | 128-bit collision resistance |
| Signature | Ed25519 | 128-bit classical |
See [Cryptography Overview](../cryptography/overview.md) for the full algorithm
inventory across all protocol layers.
---
## KeyPackage Deserialization (openmls 0.5)
openmls 0.5 separates serializable and deserializable types. `KeyPackage`
derives `TlsSerialize` but not `TlsDeserialize`. To deserialize an incoming
KeyPackage:
```rust
let key_package: KeyPackage =
KeyPackageIn::tls_deserialize(&mut bytes.as_ref())?
.validate(backend.crypto(), ProtocolVersion::Mls10)?;
```
The `KeyPackageIn` type derives `TlsDeserialize` and provides `validate()`,
which verifies the KeyPackage's signature and returns a trusted `KeyPackage`.
Similarly, `MlsMessageIn` is used to deserialize incoming MLS messages, and its
`extract()` method returns the inner message body (`MlsMessageInBody`). The
`into_welcome()` and `into_protocol_message()` methods that existed in earlier
openmls versions are feature-gated in 0.5; `extract()` with pattern matching is
the public API:
```rust
let msg_in = MlsMessageIn::tls_deserialize(&mut bytes.as_ref())?;
match msg_in.extract() {
MlsMessageInBody::Welcome(w) => { /* join_group path */ }
MlsMessageInBody::PrivateMessage(m) => ProtocolMessage::PrivateMessage(m),
MlsMessageInBody::PublicMessage(m) => ProtocolMessage::PublicMessage(m),
_ => { /* error: unexpected message type */ }
}
```
---
## Message Processing
`receive_message` handles four variants of `ProcessedMessageContent`:
| Variant | Action | Return Value |
|----------------------------------|--------------------------------------------|--------------|
| `ApplicationMessage` | Extract plaintext bytes | `Some(plaintext)` |
| `StagedCommitMessage` | `merge_staged_commit()` -- epoch advances | `None` |
| `ProposalMessage` | `store_pending_proposal()` -- cached | `None` |
| `ExternalJoinProposalMessage` | `store_pending_proposal()` -- cached | `None` |
For Commit messages, `merge_staged_commit` advances the group's epoch and
updates the ratchet tree. Proposals are stored for inclusion in a future Commit;
this allows the group to accumulate multiple proposals before committing them as
a batch.
---
## Error Handling
All `GroupMember` methods return `Result<_, CoreError>`. The MLS-specific error
variant is:
```rust
#[error("MLS error: {0}")]
Mls(String)
```
The inner string is the debug representation of the openmls error. This is a
deliberate design choice: openmls error types are complex enums with many
variants, and wrapping the debug output provides sufficient diagnostic
information without coupling `CoreError` to openmls's internal error hierarchy.
Common error scenarios:
| Operation | Failure Mode |
|------------------------|-------------------------------------------------|
| `generate_key_package` | Backend RNG failure (extremely unlikely) |
| `create_group` | Group already exists in state |
| `add_member` | Malformed KeyPackage, no active group |
| `join_group` | Welcome does not match any stored init key |
| `send_message` | No active group |
| `receive_message` | Malformed message, decryption failure, wrong epoch |
---
## Accessors
| Method | Returns | Purpose |
|-------------------|---------------------------------------|---------|
| `group_id()` | `Option<Vec<u8>>` | MLS group ID bytes, or `None` if no group is active |
| `identity()` | `&IdentityKeypair` | Reference to the long-term Ed25519 keypair |
| `identity_seed()` | `[u8; 32]` | Private seed bytes for state persistence |
| `backend()` | `&StoreCrypto` | Reference to the crypto provider |
| `group_ref()` | `Option<&MlsGroup>` | Reference to the MLS group for serialization |
---
## Unit Tests
The `two_party_mls_round_trip` test exercises the complete lifecycle:
1. Alice and Bob each create a `GroupMember` with fresh identities.
2. Bob generates a KeyPackage (stored in his backend).
3. Alice creates a group and adds Bob using his KeyPackage.
4. Bob joins via the Welcome message.
5. Alice sends "hello bob" -- Bob decrypts and verifies.
6. Bob sends "hello alice" -- Alice decrypts and verifies.
This test runs entirely in-memory (no server) and validates that the HPKE init
key invariant is maintained when the same `GroupMember` instance is used
throughout.
---
## Related Pages
- [KeyPackage Exchange Flow](keypackage-exchange.md) -- upload and fetch of KeyPackages via the server
- [Delivery Service Internals](delivery-service.md) -- how Commits and Welcomes are relayed
- [Authentication Service Internals](authentication-service.md) -- server-side KeyPackage storage
- [Storage Backend](storage-backend.md) -- `DiskKeyStore` and `FileBackedStore` persistence
- [Cryptography Overview](../cryptography/overview.md) -- algorithm inventory
- [Ed25519 Identity Keys](../cryptography/identity-keys.md) -- the `IdentityKeypair` struct

View File

@@ -0,0 +1,326 @@
# KeyPackage Exchange Flow
MLS KeyPackages are single-use tokens that enable a group creator to add a new
member. The KeyPackage contains the member's HPKE init public key, their MLS
credential (Ed25519 public key), and a signature proving ownership. The
quicnprotochat Authentication Service (AS) provides a simple upload/fetch
interface for distributing KeyPackages between clients.
This page describes the end-to-end flow: from client-side generation through
server-side storage to peer-side retrieval and consumption.
**Sources:**
- `crates/quicnprotochat-core/src/group.rs` (client-side generation)
- `crates/quicnprotochat-server/src/main.rs` (server-side handlers)
- `crates/quicnprotochat-server/src/storage.rs` (server-side persistence)
- `crates/quicnprotochat-client/src/lib.rs` (client-side RPC calls)
- `schemas/node.capnp` (wire schema)
---
## Upload Flow
The upload flow moves a freshly generated KeyPackage from a client to the
server, where it is stored for later retrieval by a peer.
```text
Client Server (AS)
| |
| 1. GroupMember::generate_key_package() |
| -> TLS-encoded KeyPackage bytes |
| -> HPKE init key stored in backend |
| |
| 2. uploadKeyPackage RPC |
| identityKey = Ed25519 pub key (32 B) |
| package = TLS-encoded bytes |
| auth = Auth struct |
| ----------------------------------------> |
| | 3. Validate inputs:
| | - identityKey == 32 bytes
| | - package non-empty
| | - package < 1 MB
| | - auth version valid
| |
| | 4. Compute SHA-256(package)
| |
| | 5. Store: push_back to
| | DashMap<Vec<u8>, VecDeque<Vec<u8>>>
| | keyed by identity_key
| |
| 6. Response: fingerprint (SHA-256 hash) |
| <---------------------------------------- |
| |
| 7. Verify: local SHA-256 == server SHA-256|
| |
```
### Step-by-Step
1. **Client generates KeyPackage.** The client calls
`GroupMember::generate_key_package()`, which internally:
- Builds an MLS `CredentialWithKey` from the Ed25519 public key
(`CredentialType::Basic`).
- Calls `KeyPackage::builder().build()` with the ciphersuite
`MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`, the `StoreCrypto` backend,
and the `IdentityKeypair` as the signer.
- openmls generates an ephemeral HPKE key pair (X25519) and stores the
private key in the backend's `DiskKeyStore`.
- Returns the TLS-serialized KeyPackage bytes.
See [GroupMember Lifecycle](group-member-lifecycle.md) for the critical
invariant about backend identity.
2. **Client sends `uploadKeyPackage` RPC.** The request includes:
- `identityKey`: The raw 32-byte Ed25519 public key.
- `package`: The TLS-encoded KeyPackage bytes.
- `auth`: An [Auth struct](../wire-format/auth-schema.md) with version and
optional access token.
3. **Server validates inputs.** The server checks:
- `identityKey` is exactly 32 bytes (Ed25519 public key size).
- `package` is non-empty.
- `package` does not exceed `MAX_KEYPACKAGE_BYTES` (1 MB).
- The `Auth` struct version is acceptable (0 for legacy, 1 for token-based).
4. **Server computes fingerprint.** `SHA-256(package_bytes)` produces a 32-byte
digest used as a tamper-detection fingerprint.
5. **Server stores the KeyPackage.** The package bytes are pushed to the back of
a `VecDeque<Vec<u8>>` keyed by the identity key in the server's
`FileBackedStore`. This allows multiple KeyPackages per identity (clients
should upload several to handle concurrent invitations). The store flushes to
disk after every mutation.
6. **Server returns the fingerprint.** The SHA-256 digest is sent back in the
response's `fingerprint` field.
7. **Client verifies the fingerprint.** The client computes its own
`SHA-256(package_bytes)` and compares it to the server-returned value. A
mismatch indicates tampering (the server or a MITM modified the package in
transit) and the client aborts with a `fingerprint mismatch` error.
---
## Fetch Flow
The fetch flow allows a peer to retrieve a stored KeyPackage for a target
identity, consuming it in the process (single-use per RFC 9420).
```text
Peer Server (AS)
| |
| 1. fetchKeyPackage RPC |
| identityKey = target's Ed25519 pub key |
| auth = Auth struct |
| ----------------------------------------> |
| | 2. Validate inputs:
| | - identityKey == 32 bytes
| | - auth version valid
| |
| | 3. Pop front of VecDeque
| | (FIFO, single-use)
| | Flush updated map to disk
| |
| 4. Response: package bytes (or empty) |
| <---------------------------------------- |
| |
| 5. If non-empty: |
| KeyPackageIn::tls_deserialize() |
| .validate(crypto, MLS10) |
| -> trusted KeyPackage for add_member() |
| |
```
### Step-by-Step
1. **Peer sends `fetchKeyPackage` RPC.** The request includes the target's
Ed25519 public key (32 bytes) and an Auth context.
2. **Server validates inputs.** Same identity key length check as upload (32
bytes).
3. **Server pops from the front of the queue.** `VecDeque::pop_front()` returns
the oldest uploaded KeyPackage. This enforces FIFO ordering and **single-use
semantics**: once fetched, the KeyPackage is permanently removed from the
server. This is a hard requirement of the MLS specification -- reusing a
KeyPackage would allow an attacker to create conflicting group states.
The store is flushed to disk after the pop, ensuring the removal survives
server restarts.
4. **Server returns the package bytes.** If the queue was empty (no KeyPackages
available), the response contains an empty `Data` field. The client checks
for emptiness to distinguish "no packages available" from "package fetched."
5. **Peer deserializes and validates.** The peer uses `KeyPackageIn::tls_deserialize()`
followed by `.validate(crypto, ProtocolVersion::Mls10)` to verify the
KeyPackage signature. The validated `KeyPackage` can then be passed to
`GroupMember::add_member()`.
---
## Fingerprint Verification
The fingerprint mechanism provides a simple tamper-detection check:
```text
Client Server Client
SHA-256(pkg) ---------> store pkg -----------> SHA-256(pkg)
| SHA-256(pkg) --------> |
| | |
+---- compare: local_fp == server_fp --------+
```
**What it detects:**
- A malicious server replacing the package bytes.
- A network-layer MITM modifying the package in transit (though QUIC/TLS
already prevents this).
**What it does NOT detect:**
- A malicious server that simply returns the correct fingerprint for a package
it has replaced (since the server computes the hash itself). True
KeyPackage authenticity requires verifying the Ed25519 signature inside
the KeyPackage, which openmls does during `validate()`.
The fingerprint is best understood as a transport-level integrity check, not a
cryptographic proof of authenticity. The real authenticity guarantee comes from
the MLS KeyPackage signature verified on the receiving side.
---
## Storage Model
On the server, KeyPackages are stored in a `FileBackedStore`:
```text
FileBackedStore
+-- key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>
| ^ ^
| | |
| identity_key queue of TLS-encoded
| (32 bytes) KeyPackage bytes
|
+-- Persisted to: data/keypackages.bin (bincode serialized)
```
Each identity key maps to a FIFO queue of KeyPackage bytes. A client should
upload multiple KeyPackages so that peers can concurrently fetch them without
contention. If the queue is exhausted, fetches return empty until the client
uploads more.
The storage format uses the `QueueMapV1` wrapper for bincode serialization:
```rust
#[derive(Serialize, Deserialize, Default)]
struct QueueMapV1 {
map: HashMap<Vec<u8>, VecDeque<Vec<u8>>>,
}
```
See [Storage Backend](storage-backend.md) for details on persistence,
flush-on-write semantics, and the V1/V2 delivery map migration.
---
## Input Validation Summary
| Field | Constraint | Error on Violation |
|----------------|---------------------------|--------------------|
| `identityKey` | Exactly 32 bytes | `"identityKey must be exactly 32 bytes, got {n}"` |
| `package` | Non-empty | `"package must not be empty"` |
| `package` | At most 1 MB (1,048,576) | `"package exceeds max size (1048576 bytes)"` |
| `auth.version` | 0 (legacy) or 1 (current) | `"unsupported auth version {v}"` |
| `auth.token` | Non-empty when version=1 | `"auth.version=1 requires non-empty accessToken"` |
---
## Wire Schema
From `schemas/node.capnp`:
```capnp
uploadKeyPackage @0 (identityKey :Data, package :Data, auth :Auth)
-> (fingerprint :Data);
fetchKeyPackage @1 (identityKey :Data, auth :Auth)
-> (package :Data);
```
The `Auth` struct is shared across all RPC methods:
```capnp
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based
accessToken @1 :Data; # opaque bearer token
deviceId @2 :Data; # optional UUID for auditing
}
```
See [NodeService Schema](../wire-format/node-service-schema.md) for the
complete schema reference.
---
## Client-Side Usage
The CLI exposes two commands for KeyPackage exchange:
### `register` / `register-state`
Generates a fresh KeyPackage and uploads it. `register` uses an ephemeral
identity; `register-state` loads from (or initializes) a persistent state file.
```bash
# Ephemeral registration (for testing)
quicnprotochat register --server 127.0.0.1:7000
# Persistent registration (production)
quicnprotochat register-state --state alice.bin --server 127.0.0.1:7000
```
Output:
```
identity_key : 7a3f... (64 hex chars, 32 bytes)
fingerprint : 9e1c... (SHA-256 of KeyPackage)
KeyPackage uploaded successfully.
```
### `fetch-key`
Fetches a peer's KeyPackage by their hex-encoded Ed25519 public key:
```bash
quicnprotochat fetch-key --server 127.0.0.1:7000 7a3f...
```
---
## Security Considerations
1. **Single-use enforcement.** The server's `pop_front()` semantics ensure each
KeyPackage is consumed exactly once, satisfying RFC 9420's requirement.
However, a malicious server could duplicate KeyPackages before deletion. True
single-use is enforced at the MLS protocol level: duplicate KeyPackage usage
would be detected when processing the Welcome (mismatched group state).
2. **No authentication on fetch.** Currently, anyone can fetch any identity's
KeyPackage. This is intentional for the MVP but means an attacker could
exhaust a victim's KeyPackage supply. The
[Auth, Devices, and Tokens](../roadmap/authz-plan.md) plan addresses this
with token-based access control.
3. **HPKE init key lifetime.** The HPKE init private key lives in the
`DiskKeyStore` from generation until the Welcome is processed. For persistent
clients using `DiskKeyStore::persistent()`, this key survives process
restarts. For ephemeral clients, the key exists only in memory and is lost if
the process exits before `join_group()` is called.
---
## Related Pages
- [GroupMember Lifecycle](group-member-lifecycle.md) -- the MLS state machine that generates and consumes KeyPackages
- [Authentication Service Internals](authentication-service.md) -- server-side KeyPackage handling
- [Delivery Service Internals](delivery-service.md) -- how the Welcome message is relayed after `add_member()`
- [Storage Backend](storage-backend.md) -- `FileBackedStore` persistence model
- [NodeService Schema](../wire-format/node-service-schema.md) -- Cap'n Proto schema reference

View File

@@ -0,0 +1,390 @@
# Storage Backend
quicnprotochat uses two storage backends: `FileBackedStore` on the server side
for KeyPackages and delivery queues, and `DiskKeyStore` on the client side for
MLS cryptographic key material. Both follow the same pattern: in-memory data
structures backed by optional file persistence, with full serialization on every
write.
**Sources:**
- `crates/quicnprotochat-server/src/storage.rs` (FileBackedStore)
- `crates/quicnprotochat-core/src/keystore.rs` (DiskKeyStore, StoreCrypto)
---
## FileBackedStore (Server-Side)
`FileBackedStore` provides persistent storage for the server's three data
domains: KeyPackages, delivery queues, and hybrid public keys.
### Structure
```rust
pub struct FileBackedStore {
kp_path: PathBuf, // keypackages.bin
ds_path: PathBuf, // deliveries.bin
hk_path: PathBuf, // hybridkeys.bin
key_packages: Mutex<HashMap<Vec<u8>, VecDeque<Vec<u8>>>>, // identity -> KP queue
deliveries: Mutex<HashMap<ChannelKey, VecDeque<Vec<u8>>>>, // (channel, recipient) -> msg queue
hybrid_keys: Mutex<HashMap<Vec<u8>, Vec<u8>>>, // identity -> hybrid PK
}
```
Each domain has its own `Mutex`-protected in-memory map and its own disk file.
The `Mutex` (not `RwLock`) is used because every read-path operation that
modifies state (e.g., `pop_front` in `fetch_key_package`) requires exclusive
access.
### Initialization
```rust
FileBackedStore::open(dir: impl AsRef<Path>) -> Result<Self, StorageError>
```
1. Creates the directory if it does not exist.
2. Loads each map from its respective file, or initializes an empty map if the
file is missing.
3. Returns the initialized store.
File paths:
- `{dir}/keypackages.bin` -- KeyPackage queues
- `{dir}/deliveries.bin` -- Delivery queues
- `{dir}/hybridkeys.bin` -- Hybrid public keys
The default data directory is `data/`, configurable via `--data-dir` /
`QUICNPROTOCHAT_DATA_DIR`.
### Flush-on-Every-Write
Every mutation serializes the entire in-memory map to disk:
```text
upload_key_package(identity_key, package)
|
+-- lock key_packages Mutex
|
+-- map.entry(identity_key).or_default().push_back(package)
|
+-- flush_kp_map(path, &map)
| +-- QueueMapV1 { map: map.clone() }
| +-- bincode::serialize(&payload)
| +-- fs::write(path, bytes)
|
+-- unlock Mutex
```
This approach is deliberately simple and correct:
- **Crash safety:** Every successful RPC response guarantees the data has been
written to the filesystem.
- **No partial writes:** The entire map is serialized atomically (though not to
a temp file with rename -- this is an MVP trade-off).
- **Performance:** Not suitable for production scale. Every write serializes and
writes the full map, which is O(n) in the total number of stored entries.
**Production improvement path:** Replace with a proper database (SQLite, sled,
or similar) for incremental writes, WAL-based crash safety, and concurrent
access without full serialization.
### KeyPackage Operations
| Method | Behavior |
|--------|----------|
| `upload_key_package(identity_key, package)` | Push to back of VecDeque; flush |
| `fetch_key_package(identity_key)` | Pop from front (FIFO, single-use); flush |
The KeyPackage map uses the `QueueMapV1` serialization wrapper:
```rust
#[derive(Serialize, Deserialize, Default)]
struct QueueMapV1 {
map: HashMap<Vec<u8>, VecDeque<Vec<u8>>>,
}
```
### Delivery Queue Operations
| Method | Behavior |
|--------|----------|
| `enqueue(recipient_key, channel_id, payload)` | Construct ChannelKey; push to back; flush |
| `fetch(recipient_key, channel_id)` | Construct ChannelKey; drain entire VecDeque; flush |
The delivery map uses `QueueMapV2` with the compound `ChannelKey`:
```rust
#[derive(Serialize, Deserialize, Clone, Eq, PartialEq, Debug)]
pub struct ChannelKey {
pub channel_id: Vec<u8>,
pub recipient_key: Vec<u8>,
}
#[derive(Serialize, Deserialize, Default)]
struct QueueMapV2 {
map: HashMap<ChannelKey, VecDeque<Vec<u8>>>,
}
```
See [Delivery Service Internals](delivery-service.md) for the full queue model
and channel-aware routing semantics.
### V1/V2 Delivery Map Migration
The delivery map format evolved from V1 (keyed by recipient key only) to V2
(keyed by `ChannelKey` with channel ID + recipient key). The load function
handles both formats transparently:
```rust
fn load_delivery_map(path: &Path) -> Result<HashMap<ChannelKey, VecDeque<Vec<u8>>>> {
let bytes = fs::read(path)?;
// Try V2 format first (channel-aware).
if let Ok(map) = bincode::deserialize::<QueueMapV2>(&bytes) {
return Ok(map.map);
}
// Fallback to legacy V1 format: migrate by setting channel_id = empty.
let legacy: QueueMapV1 = bincode::deserialize(&bytes)?;
let mut upgraded = HashMap::new();
for (recipient_key, queue) in legacy.map.into_iter() {
upgraded.insert(
ChannelKey { channel_id: Vec::new(), recipient_key },
queue,
);
}
Ok(upgraded)
}
```
Migration strategy:
1. Attempt to deserialize as V2 (`QueueMapV2`). If successful, use as-is.
2. If V2 fails, deserialize as V1 (`QueueMapV1`). Migrate each entry by
wrapping the recipient key in a `ChannelKey` with an empty `channel_id`.
3. The next flush will write V2 format, completing the migration.
This in-place migration is transparent to clients. Legacy messages (pre-channel
routing) appear under the empty channel ID and can still be fetched by clients
that pass an empty `channelId`.
### Hybrid Key Operations
| Method | Behavior |
|--------|----------|
| `upload_hybrid_key(identity_key, hybrid_pk)` | Insert (overwrite); flush |
| `fetch_hybrid_key(identity_key)` | Read-only lookup; no flush needed |
The hybrid key map is a flat `HashMap<Vec<u8>, Vec<u8>>` serialized directly
with bincode. Unlike KeyPackages, hybrid keys are not single-use -- they persist
until overwritten.
### Error Type
```rust
#[derive(thiserror::Error, Debug)]
pub enum StorageError {
#[error("io error: {0}")]
Io(String),
#[error("serialization error")]
Serde,
}
```
I/O errors (disk full, permission denied) and serialization errors (corrupt
file) are the two failure modes. The server converts `StorageError` to
`capnp::Error` via the `storage_err` helper for RPC responses.
---
## DiskKeyStore (Client-Side)
`DiskKeyStore` is the client-side key store that implements the openmls
`OpenMlsKeyStore` trait. It holds MLS cryptographic key material -- most
importantly, the HPKE init private keys created during KeyPackage generation.
### Structure
```rust
pub struct DiskKeyStore {
path: Option<PathBuf>, // None = ephemeral (in-memory only)
values: RwLock<HashMap<Vec<u8>, Vec<u8>>>, // key reference -> serialized MLS entity
}
```
The `RwLock` (not `Mutex`) allows concurrent reads. Write operations (store,
delete) take an exclusive lock and flush to disk.
### Modes
| Mode | Constructor | Persistence |
|------|-------------|-------------|
| Ephemeral | `DiskKeyStore::ephemeral()` | None. Data exists only in memory. Lost on process exit. |
| Persistent | `DiskKeyStore::persistent(path)` | Yes. Every write flushes the full map to disk. Survives process restarts. |
**Ephemeral mode** is used for tests and the `register` / `demo-group` CLI
commands where session resumption is not needed.
**Persistent mode** is used for production clients (`register-state`, `invite`,
`join`, `send`, `recv` commands). The key store file path is derived from the
state file path by changing the extension to `.ks`:
```rust
fn keystore_path(state_path: &Path) -> PathBuf {
let mut path = state_path.to_path_buf();
path.set_extension("ks");
path
}
```
So `quicnprotochat-state.bin` produces a key store at `quicnprotochat-state.ks`.
### Persistence Format
The key store is serialized as a bincode-encoded `HashMap<Vec<u8>, Vec<u8>>`.
Individual values are serialized using `serde_json` (as required by openmls's
`MlsEntity` trait bound):
```rust
fn store<V: MlsEntity>(&self, k: &[u8], v: &V) -> Result<(), Self::Error> {
let value = serde_json::to_vec(v)?; // MlsEntity -> JSON bytes
let mut values = self.values.write().unwrap();
values.insert(k.to_vec(), value);
drop(values); // release lock before I/O
self.flush() // bincode serialize full map to disk
}
```
The two-layer serialization (JSON for values, bincode for the map) is a
consequence of openmls requiring `serde_json`-compatible serialization for MLS
entities, while the outer map uses bincode for compactness.
### OpenMlsKeyStore Implementation
| Trait Method | DiskKeyStore Behavior |
|--------------|-----------------------|
| `store(k, v)` | JSON-serialize value, insert into HashMap, flush to disk |
| `read(k)` | Look up key, JSON-deserialize value, return `Option<V>` |
| `delete(k)` | Remove from HashMap, flush to disk |
The `read` method does not flush because it does not modify the map. A failed
deserialization (corrupt value) returns `None` rather than an error, which
matches the openmls `OpenMlsKeyStore` trait signature.
### Flush Behavior
```rust
fn flush(&self) -> Result<(), DiskKeyStoreError> {
let Some(path) = &self.path else {
return Ok(()); // ephemeral: no-op
};
let values = self.values.read().unwrap();
let bytes = bincode::serialize(&*values)?;
fs::create_dir_all(path.parent())?; // ensure parent dir exists
fs::write(path, bytes)?;
Ok(())
}
```
Like `FileBackedStore`, the flush serializes the entire map on every write.
For client-side usage, the map is typically small (a handful of HPKE keys), so
this is not a performance concern.
### Error Type
```rust
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum DiskKeyStoreError {
#[error("serialization error")]
Serialization,
#[error("io error: {0}")]
Io(String),
}
```
---
## StoreCrypto
`StoreCrypto` is a composite type that bundles a `DiskKeyStore` with the
`RustCrypto` provider from `openmls_rust_crypto`. It implements the openmls
`OpenMlsCryptoProvider` trait, which is the single entry point that openmls
uses for all cryptographic operations:
```rust
pub struct StoreCrypto {
crypto: RustCrypto, // AES-GCM, SHA-256, X25519, Ed25519, etc.
key_store: DiskKeyStore, // HPKE init keys, MLS epoch secrets, etc.
}
impl OpenMlsCryptoProvider for StoreCrypto {
type CryptoProvider = RustCrypto;
type RandProvider = RustCrypto;
type KeyStoreProvider = DiskKeyStore;
fn crypto() -> &RustCrypto { &self.crypto }
fn rand() -> &RustCrypto { &self.crypto }
fn key_store() -> &DiskKeyStore { &self.key_store }
}
```
`StoreCrypto` is the `backend` field of [`GroupMember`](group-member-lifecycle.md).
It is passed to every openmls operation -- `KeyPackage::builder().build()`,
`MlsGroup::new_with_group_id()`, `MlsGroup::new_from_welcome()`,
`create_message()`, `process_message()`, etc.
The critical property is that the **same `StoreCrypto` instance** (and therefore
the same `DiskKeyStore`) must be used from `generate_key_package()` through
`join_group()`, because the HPKE init private key is stored in and read from
this key store.
---
## Storage Architecture Summary
```text
Server Client
====== ======
FileBackedStore DiskKeyStore
+-- key_packages (Mutex<HashMap>) +-- values (RwLock<HashMap>)
| Persisted: keypackages.bin | Persisted: {state}.ks
| Format: bincode(QueueMapV1) | Format: bincode(HashMap)
| | Values: serde_json(MlsEntity)
+-- deliveries (Mutex<HashMap>) |
| Persisted: deliveries.bin +-- Wrapped by StoreCrypto
| Format: bincode(QueueMapV2) | implements OpenMlsCryptoProvider
| Migration: V1 -> V2 on load |
| +-- Used by GroupMember.backend
+-- hybrid_keys (Mutex<HashMap>)
Persisted: hybridkeys.bin
Format: bincode(HashMap)
```
### Shared Design Patterns
Both backends share these characteristics:
1. **Full-map serialization.** Every write serializes the entire map to disk.
Simple, correct, but O(n) per write.
2. **Bincode format.** The outer map is always bincode-serialized. Compact and
fast, but not human-readable and not forward-compatible without wrapper
structs.
3. **No WAL / journaling.** A crash during `fs::write` could leave a corrupt
file. For the MVP, this is acceptable -- the data can be regenerated (clients
re-upload KeyPackages; delivery messages are ephemeral).
4. **No compaction.** Empty queues are not removed from the map. Over time, the
serialized size can grow with stale entries. A production implementation
should periodically compact empty entries.
5. **Directory creation.** Both backends call `fs::create_dir_all` before
writing, ensuring parent directories exist.
---
## Related Pages
- [GroupMember Lifecycle](group-member-lifecycle.md) -- how `StoreCrypto` and `DiskKeyStore` are used during MLS operations
- [KeyPackage Exchange Flow](keypackage-exchange.md) -- upload and fetch through `FileBackedStore`
- [Delivery Service Internals](delivery-service.md) -- delivery queue operations
- [Authentication Service Internals](authentication-service.md) -- KeyPackage and hybrid key storage
- [Key Lifecycle and Zeroization](../cryptography/key-lifecycle.md) -- how HPKE keys are created and destroyed

100
docs/src/introduction.md Normal file
View File

@@ -0,0 +1,100 @@
# Introduction
**quicnprotochat** is a research-oriented, end-to-end encrypted group messaging system written in Rust. It layers the Messaging Layer Security protocol (MLS, [RFC 9420](https://datatracker.ietf.org/doc/rfc9420/)) on top of QUIC + TLS 1.3 transport (via [quinn](https://github.com/quinn-rs/quinn) and [rustls](https://github.com/rustls/rustls)), with all service RPCs and wire messages framed using [Cap'n Proto](https://capnproto.org/). The project exists to explore how modern transport encryption (QUIC), a formally specified group key agreement protocol (MLS), and a zero-copy serialisation format (Cap'n Proto) compose in practice -- and to provide a readable, auditable reference implementation for security researchers, protocol designers, and Rust developers who want to study or extend the design.
---
## Protocol stack
```
┌─────────────────────────────────────────────┐
│ Application / MLS ciphertext │ <- group key ratchet (RFC 9420)
├─────────────────────────────────────────────┤
│ Cap'n Proto RPC │ <- typed, schema-versioned framing
├─────────────────────────────────────────────┤
│ QUIC + TLS 1.3 (quinn/rustls) │ <- mutual auth + transport secrecy
└─────────────────────────────────────────────┘
```
Each layer addresses a distinct concern:
1. **QUIC + TLS 1.3** provides authenticated, confidential transport with 0-RTT connection establishment and multiplexed streams. The server presents a TLS 1.3 certificate (self-signed by default); the client verifies it against a local trust anchor. ALPN negotiation uses the token `b"capnp"`.
2. **Cap'n Proto RPC** defines the wire schema for all service operations (KeyPackage upload/fetch, message enqueue/fetch, health probes). Schemas live in `schemas/*.capnp` and are compiled to Rust at build time. Because Cap'n Proto uses a pointer-based layout, messages can be read without an unpacking step -- though quicnprotochat currently uses the unpacked wire format for simplicity.
3. **MLS (RFC 9420)** provides the group key agreement layer. Each participant holds an Ed25519 identity keypair and generates single-use HPKE KeyPackages. The MLS epoch ratchet delivers forward secrecy and post-compromise security: compromising a member's state at epoch *n* does not reveal plaintext from epochs *< n* (forward secrecy) or *> n+1* (post-compromise security, once the compromised member updates).
---
## Security properties
| Property | Mechanism |
|---|---|
| Transport confidentiality | TLS 1.3 over QUIC (`rustls` with `TLS13` only) |
| Transport authentication | TLS 1.3 server certificate (self-signed, SANs: `localhost`, `127.0.0.1`, `::1`) |
| Group key agreement | `MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519` |
| Post-compromise security (PCS) | MLS epoch ratchet -- each Commit advances the key schedule |
| Identity | Ed25519 (`ed25519-dalek`); public key used as MLS `BasicCredential` |
| Framing | Cap'n Proto (unpacked wire format, schema-versioned) |
For a deeper discussion of the cryptographic guarantees, threat model, and known gaps, see:
- [Forward Secrecy](cryptography/forward-secrecy.md)
- [Post-Compromise Security](cryptography/post-compromise-security.md)
- [Threat Model](cryptography/threat-model.md)
---
## Who is this for?
**Security researchers** studying how MLS composes with QUIC transport and Cap'n Proto framing. The codebase is intentionally small (four crates, ~2 500 lines of non-generated Rust) so that every cryptographic boundary is auditable.
**Protocol designers** evaluating MLS deployment patterns. quicnprotochat implements a concrete Authentication Service (AS) and Delivery Service (DS) pair, demonstrating single-use KeyPackage lifecycle, Welcome routing, and epoch advancement in a live system.
**Rust developers** looking for a working example of:
- `quinn` + `rustls` server/client setup with self-signed certificates
- `capnp-rpc` over QUIC bidirectional streams (including the `!Send` / `LocalSet` constraint)
- `openmls` group creation, member addition, and application message encryption
- `zeroize`-on-drop key material handling
---
## Quick links
| Section | What you will find |
|---|---|
| [Prerequisites](getting-started/prerequisites.md) | Toolchain and system dependencies |
| [Building from Source](getting-started/building.md) | `cargo build`, Cap'n Proto codegen, troubleshooting |
| [Running the Server](getting-started/running-the-server.md) | Server startup, configuration, TLS cert generation |
| [Running the Client](getting-started/running-the-client.md) | All CLI subcommands with examples |
| [Docker Deployment](getting-started/docker.md) | `docker compose up`, multi-stage build |
| [Demo Walkthrough](getting-started/demo-walkthrough.md) | Step-by-step Alice-and-Bob narrative with sequence diagram |
| [Architecture Overview](architecture/overview.md) | Crate boundaries, service architecture, data flow |
| [Protocol Layers](protocol-layers/overview.md) | Deep dives into QUIC/TLS, Cap'n Proto, MLS, Hybrid KEM |
| [Wire Format Reference](wire-format/overview.md) | Cap'n Proto schema documentation |
| [Cryptography](cryptography/overview.md) | Identity keys, key lifecycle, forward secrecy, PCS, threat model |
| [Design Rationale](design-rationale/overview.md) | ADRs and "why not Signal/Matrix" comparison |
| [Roadmap](roadmap/milestones.md) | Milestone tracker and future research directions |
---
## Current status
quicnprotochat is a **proof of concept**. It has not been audited by a third party.
Known limitations:
- The server uses a **self-signed TLS certificate** by default. No certificate pinning or CA-based server identity is enforced.
- MLS credentials use `CredentialType::Basic` (raw public key). A production system would bind credentials to a certificate authority or use X.509 certificates.
- The Delivery Service performs **no authentication** of the `recipientKey` field -- anyone who knows a recipient's public key can enqueue messages for them. Access control is a future milestone.
- The HPKE init private key generated during `register-state` is held in-process memory (or on-disk via the key store). If the process exits before the corresponding Welcome is consumed, `join` will fail because the private key is lost.
- Group membership is currently limited to two-party groups in practice. Multi-party Commit fan-out is planned for milestone M5.
For the full milestone tracker, see [Milestones](roadmap/milestones.md).
---
## License
quicnprotochat is released under the **MIT** license. See `LICENSE` in the repository root.

View File

@@ -0,0 +1,278 @@
# Cap'n Proto Serialisation and RPC
quicnprotochat uses [Cap'n Proto](https://capnproto.org/) for both message serialisation and remote procedure calls. The serialisation layer encodes structured messages (Envelopes, Auth tokens, delivery payloads) into a compact binary format. The RPC layer provides the client-server interface for the Authentication Service, Delivery Service, and health checks -- all exposed through a single `NodeService` interface.
This page covers why Cap'n Proto was chosen, how schemas are compiled, the owned `ParsedEnvelope` type, serialisation helpers, and ALPN integration with QUIC.
## Why Cap'n Proto
Several serialisation formats were considered. The table below summarises the trade-offs:
| Format | Zero-copy reads | Schema enforcement | Built-in RPC | Canonical bytes for signing |
|---|---|---|---|---|
| **Cap'n Proto** | Yes | Yes (`.capnp` schemas) | Yes (`capnp-rpc`) | Yes (canonical serialisation mode) |
| Protocol Buffers | No (requires deserialisation) | Yes (`.proto` schemas) | Yes (`tonic`/gRPC) | No (non-deterministic field ordering) |
| MessagePack | No | No (untyped) | No | No |
| FlatBuffers | Yes | Yes (`.fbs` schemas) | No built-in RPC | Partial |
Cap'n Proto was selected for the following reasons:
1. **Zero-copy reads**: Cap'n Proto messages can be read directly from the wire buffer without deserialisation. The `Reader` type is a thin pointer into the original bytes. This eliminates allocation and copying on the hot path (message routing in the Delivery Service).
2. **Schema-enforced types**: All messages are defined in `.capnp` schema files. The compiler (`capnpc`) generates type-safe Rust code that prevents mismatched field types at compile time. This is especially valuable for a security-sensitive protocol where a type confusion bug could be exploitable.
3. **Canonical serialisation**: Cap'n Proto can produce deterministic byte representations of messages. This is critical for MLS, where Commits and KeyPackages must be signed -- the signature must cover exactly the same bytes that the verifier will see.
4. **Built-in async RPC**: The `capnp-rpc` crate provides a capability-based RPC system with promise pipelining. quicnprotochat uses it for the `NodeService` interface (KeyPackage upload/fetch, message enqueue/fetch, health checks, hybrid key operations). This avoids the need to hand-roll a request/response protocol.
5. **Compact wire format**: Cap'n Proto's wire format is more compact than JSON or XML and comparable to Protocol Buffers, with the advantage of no decode step.
## Schema compilation flow
Cap'n Proto schemas live in the workspace-root `schemas/` directory:
```text
schemas/
envelope.capnp -- Top-level wire message (MsgType enum + payload)
auth.capnp -- AuthenticationService RPC interface (legacy, pre-M3)
delivery.capnp -- DeliveryService RPC interface (legacy, pre-M3)
node.capnp -- Unified NodeService RPC interface (M3+)
```
### build.rs
The `quicnprotochat-proto` crate compiles these schemas at build time via `build.rs`:
```rust
capnpc::CompilerCommand::new()
.src_prefix(&schemas_dir)
.file(schemas_dir.join("envelope.capnp"))
.file(schemas_dir.join("auth.capnp"))
.file(schemas_dir.join("delivery.capnp"))
.file(schemas_dir.join("node.capnp"))
.run()
.expect("Cap'n Proto schema compilation failed.");
```
Key details:
- **`src_prefix`**: Set to `schemas/` so that inter-schema imports resolve correctly.
- **Output location**: Generated Rust source is written to `$OUT_DIR` (Cargo's build directory). The filenames follow the convention `{schema_name}_capnp.rs`.
- **Rerun triggers**: `cargo:rerun-if-changed` directives ensure the build script re-runs whenever any `.capnp` file changes.
- **Prerequisite**: The `capnp` CLI binary must be installed on the build machine (`apt-get install capnproto` or `brew install capnp`).
### Generated module inclusion
The generated code is spliced into the `quicnprotochat-proto` crate via `include!` macros:
```rust
pub mod envelope_capnp {
include!(concat!(env!("OUT_DIR"), "/envelope_capnp.rs"));
}
pub mod auth_capnp {
include!(concat!(env!("OUT_DIR"), "/auth_capnp.rs"));
}
pub mod delivery_capnp {
include!(concat!(env!("OUT_DIR"), "/delivery_capnp.rs"));
}
pub mod node_capnp {
include!(concat!(env!("OUT_DIR"), "/node_capnp.rs"));
}
```
Consumers import types from these modules. For example, `node_capnp::node_service::Server` is the trait that the server implements.
## The Envelope schema
The `Envelope` is the top-level wire message for all quicnprotochat traffic. Every frame exchanged between peers (whether over Noise or QUIC) is serialised as an Envelope:
```capnp
struct Envelope {
msgType @0 :MsgType;
groupId @1 :Data; # 32-byte SHA-256 digest of group name
senderId @2 :Data; # 32-byte SHA-256 digest of Ed25519 pubkey
payload @3 :Data; # Opaque payload (MLS blob or control data)
timestampMs @4 :UInt64; # Unix epoch milliseconds
enum MsgType {
ping @0;
pong @1;
keyPackageUpload @2;
keyPackageFetch @3;
keyPackageResponse @4;
mlsWelcome @5;
mlsCommit @6;
mlsApplication @7;
error @8;
}
}
```
The Delivery Service routes by `(groupId, msgType)` without inspecting `payload`. This design keeps the DS MLS-unaware -- see [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md).
## The `ParsedEnvelope` owned type
Cap'n Proto readers (`envelope_capnp::envelope::Reader`) borrow from the original byte buffer and cannot be sent across async task boundaries (`!Send`). This is a fundamental limitation of zero-copy reads.
To bridge this gap, `quicnprotochat-proto` defines `ParsedEnvelope`:
```rust
pub struct ParsedEnvelope {
pub msg_type: MsgType,
pub group_id: Vec<u8>,
pub sender_id: Vec<u8>,
pub payload: Vec<u8>,
pub timestamp_ms: u64,
}
```
`ParsedEnvelope` eagerly copies all byte fields out of the Cap'n Proto reader, making the type `Send + 'static`. This allows it to cross Tokio task boundaries, be stored in queues, and be passed through channels.
The trade-off is clear: `ParsedEnvelope` allocates and copies, defeating the zero-copy benefit. This is acceptable because:
1. The copying happens once per message at the protocol boundary.
2. Application-layer code (MLS encryption/decryption, routing) needs owned data anyway.
3. The performance-critical path (Delivery Service routing) works with opaque `Vec<u8>` payloads, not parsed Cap'n Proto readers.
### Invariants
- `group_id` and `sender_id` are either empty (for control messages like Ping/Pong) or exactly 32 bytes (SHA-256 digest).
- `payload` is empty for Ping and Pong; non-empty for all MLS variants.
## Serialisation helpers
Two functions handle the conversion between `ParsedEnvelope` and wire bytes:
### `build_envelope`
```rust
pub fn build_envelope(env: &ParsedEnvelope) -> Result<Vec<u8>, capnp::Error>
```
Serialises a `ParsedEnvelope` to unpacked Cap'n Proto wire bytes. The output includes the Cap'n Proto segment table header followed by the message data. These bytes are suitable as the body of a length-prefixed frame (the `LengthPrefixedCodec` in `quicnprotochat-core` prepends the 4-byte length) or as a payload within a QUIC stream.
Internally, it builds a `capnp::message::Builder`, populates an `Envelope` root, and serialises via `capnp::serialize::write_message`.
### `parse_envelope`
```rust
pub fn parse_envelope(bytes: &[u8]) -> Result<ParsedEnvelope, capnp::Error>
```
Deserialises unpacked Cap'n Proto wire bytes into a `ParsedEnvelope`. All data is copied out of the reader before returning, so the input slice is not retained.
It returns `capnp::Error` if:
- The bytes are not valid Cap'n Proto wire format.
- The `msgType` discriminant is not present in the current schema (forward-compatibility guard).
### Low-level helpers
Two additional functions provide raw byte-to-message conversions:
```rust
pub fn to_bytes<A: Allocator>(msg: &Builder<A>) -> Result<Vec<u8>, capnp::Error>
pub fn from_bytes(bytes: &[u8]) -> Result<Reader<OwnedSegments>, capnp::Error>
```
`from_bytes` uses `ReaderOptions::new()` with default limits:
- **Traversal limit**: 64 MiB (8 * 1024 * 1024 words)
- **Nesting limit**: 512 levels
These defaults are reasonable for trusted data. For untrusted data from the network, callers should consider tightening `traversal_limit_in_words` to prevent denial-of-service via deeply nested or excessively large messages. The server enforces its own size limits: 5 MB per payload (`MAX_PAYLOAD_BYTES`) and 1 MB per KeyPackage (`MAX_KEYPACKAGE_BYTES`).
## The NodeService RPC interface
The M3 unified RPC interface is defined in `schemas/node.capnp`:
```capnp
interface NodeService {
uploadKeyPackage @0 (identityKey :Data, package :Data, auth :Auth)
-> (fingerprint :Data);
fetchKeyPackage @1 (identityKey :Data, auth :Auth) -> (package :Data);
enqueue @2 (recipientKey :Data, payload :Data,
channelId :Data, version :UInt16, auth :Auth) -> ();
fetch @3 (recipientKey :Data, channelId :Data,
version :UInt16, auth :Auth) -> (payloads :List(Data));
fetchWait @4 (recipientKey :Data, channelId :Data,
version :UInt16, timeoutMs :UInt64, auth :Auth)
-> (payloads :List(Data));
health @5 () -> (status :Text);
uploadHybridKey @6 (identityKey :Data, hybridPublicKey :Data) -> ();
fetchHybridKey @7 (identityKey :Data) -> (hybridPublicKey :Data);
}
```
This combines Authentication Service operations (`uploadKeyPackage`, `fetchKeyPackage`), Delivery Service operations (`enqueue`, `fetch`, `fetchWait`), health monitoring (`health`), and hybrid key management (`uploadHybridKey`, `fetchHybridKey`) into a single RPC interface.
### Auth context
Every mutating RPC method accepts an `Auth` struct:
```capnp
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based auth
accessToken @1 :Data; # opaque bearer token
deviceId @2 :Data; # optional UUID bytes for auditing
}
```
The server validates the `version` field and rejects unknown versions. Token validation is planned for a future milestone. See [Auth, Devices, and Tokens](../roadmap/authz-plan.md).
## ALPN integration
Cap'n Proto RPC rides directly on the QUIC bidirectional stream. The ALPN (Application-Layer Protocol Negotiation) extension in the TLS handshake identifies the protocol:
```rust
tls.alpn_protocols = vec![b"capnp".to_vec()];
```
Both client and server set the ALPN to `b"capnp"`. If the client and server disagree on the ALPN, the TLS handshake fails before any application data is exchanged.
On the QUIC path, the flow is:
```text
Client Server
| |
|── QUIC handshake (TLS 1.3) ────►| ALPN: "capnp"
| |
|── open_bi() ───────────────────►| Bidirectional QUIC stream
| |
|◄─────── capnp-rpc messages ────►| VatNetwork reads/writes on the stream
```
The `tokio-util` compat layer converts Quinn stream types into `futures::AsyncRead + AsyncWrite`, which `capnp-rpc`'s `VatNetwork` expects. See [QUIC + TLS 1.3](quic-tls.md) for the full connection setup.
On the legacy Noise path, the `into_capnp_io()` bridge serves the same purpose -- converting a Noise-encrypted TCP connection into a byte stream for `VatNetwork`. See [Noise\_XX Handshake](noise-xx.md) for details.
## Comparison with alternatives
### vs Protocol Buffers + gRPC
Protocol Buffers require a full deserialisation step to access any field. Cap'n Proto avoids this with zero-copy readers. gRPC requires HTTP/2 framing, which adds overhead on top of QUIC. Cap'n Proto RPC is leaner and maps naturally to a single QUIC stream.
### vs MessagePack
MessagePack is untyped -- there is no schema file, and type errors are caught at runtime. This is unacceptable for a security protocol where a misinterpreted field could be exploitable. MessagePack also has no RPC framework, requiring a hand-rolled request/response protocol.
### vs FlatBuffers
FlatBuffers supports zero-copy reads (like Cap'n Proto) but lacks a built-in RPC framework. The ecosystem and tooling are also less mature for Rust.
## Design constraints of `quicnprotochat-proto`
The `quicnprotochat-proto` crate enforces three design constraints:
1. **No crypto**: Key material never enters this crate. All encryption and signing happens in `quicnprotochat-core`.
2. **No I/O**: Callers own the transport. This crate only converts between bytes and types.
3. **No async**: Pure synchronous data-layer code. Async is the caller's responsibility.
These constraints keep the serialisation layer thin and auditable.
## Further reading
- [Envelope Schema](../wire-format/envelope-schema.md) -- Detailed field-by-field breakdown of the Envelope wire format.
- [NodeService Schema](../wire-format/node-service-schema.md) -- Full RPC interface documentation.
- [Auth Schema](../wire-format/auth-schema.md) -- Auth token structure and versioning.
- [MLS (RFC 9420)](mls.md) -- How MLS messages are carried as opaque payloads inside Cap'n Proto Envelopes.
- [ADR-002: Cap'n Proto over MessagePack](../design-rationale/adr-002-capnproto.md) -- Design rationale for choosing Cap'n Proto.
- [ADR-003: RPC Inside the Noise Tunnel](../design-rationale/adr-003-rpc-inside-noise.md) -- Why RPC runs inside the encrypted transport.

View File

@@ -0,0 +1,281 @@
# Hybrid KEM: X25519 + ML-KEM-768
quicnprotochat implements a hybrid Key Encapsulation Mechanism that combines classical X25519 Diffie-Hellman with post-quantum ML-KEM-768 (FIPS 203). The hybrid construction ensures that the system remains secure even if one of the two components is broken: X25519 protects against failures in ML-KEM, and ML-KEM protects against quantum computers breaking X25519.
The implementation lives in `quicnprotochat-core/src/hybrid_kem.rs`. It is fully implemented and tested but **not yet integrated into the MLS ciphersuite** -- integration is planned for the M5 milestone. Currently, the module can be used as a standalone envelope encryption layer to wrap MLS payloads in an outer post-quantum-resistant encryption before they transit the network.
## Design approach
The hybrid KEM follows the **combiner approach** from [draft-ietf-tls-hybrid-design](https://datatracker.ietf.org/doc/draft-ietf-tls-hybrid-design/). The core idea:
1. Perform both a classical key exchange (X25519) and a post-quantum key encapsulation (ML-KEM-768) against the recipient's public keys.
2. Combine the two shared secrets into a single AEAD key using HKDF.
3. Encrypt the payload with ChaCha20-Poly1305 using the derived key.
This ensures:
- **IND-CCA2 security** if *either* X25519 or ML-KEM-768 is secure.
- No reliance on a single hardness assumption.
- Graceful degradation: if ML-KEM is found to have a flaw, classical X25519 still protects the data.
## Component algorithms
| Component | Algorithm | Size | Security Level |
|---|---|---|---|
| Classical KEM | X25519 ECDH | 32-byte keys, 32-byte shared secret | 128-bit classical |
| Post-quantum KEM | ML-KEM-768 (FIPS 203) | 1184-byte EK, 2400-byte DK, 1088-byte CT, 32-byte SS | NIST Level 3 (128-bit quantum) |
| Key derivation | HKDF-SHA256 | 32-byte output key, 12-byte output nonce | 256-bit PRF security |
| Symmetric encryption | ChaCha20-Poly1305 | 32-byte key, 12-byte nonce, 16-byte tag | 256-bit security |
### ML-KEM-768 constants
These constants are defined in `hybrid_kem.rs` and match FIPS 203:
| Constant | Value | Description |
|---|---|---|
| `MLKEM_EK_LEN` | 1,184 bytes | Encapsulation (public) key size |
| `MLKEM_DK_LEN` | 2,400 bytes | Decapsulation (private) key size |
| `MLKEM_CT_LEN` | 1,088 bytes | Ciphertext size |
| Shared secret | 32 bytes | Output of encapsulate/decapsulate |
ML-KEM-768 was chosen over ML-KEM-512 (NIST Level 1) for a stronger security margin and over ML-KEM-1024 (NIST Level 5) because the additional key/ciphertext sizes are not justified for 128-bit target security.
## Wire format
Every hybrid-encrypted payload is packaged as a self-describing envelope:
```text
┌─────────┬──────────────────┬──────────────────┬──────────────┬──────────────────┐
│ version │ x25519_eph_pk │ mlkem_ct │ aead_nonce │ aead_ct │
│ (1 B) │ (32 B) │ (1088 B) │ (12 B) │ (variable) │
└─────────┴──────────────────┴──────────────────┴──────────────┴──────────────────┘
```
| Field | Offset | Size | Description |
|---|---|---|---|
| `version` | 0 | 1 byte | Envelope version. Currently `0x01`. |
| `x25519_eph_pk` | 1 | 32 bytes | Ephemeral X25519 public key (generated fresh per encryption). |
| `mlkem_ct` | 33 | 1,088 bytes | ML-KEM-768 ciphertext (encapsulation of the PQ shared secret). |
| `aead_nonce` | 1,121 | 12 bytes | ChaCha20-Poly1305 nonce (derived from HKDF). |
| `aead_ct` | 1,133 | variable | ChaCha20-Poly1305 ciphertext + 16-byte authentication tag. |
The total header (`HEADER_LEN`) is 1 + 32 + 1088 + 12 = **1,133 bytes**. The minimum valid envelope is `HEADER_LEN + 16` = 1,149 bytes (16 bytes for the AEAD tag on an empty plaintext).
The `version` byte enables future format evolution. Decryption rejects any version other than `0x01` with `HybridKemError::UnsupportedVersion`.
## Key derivation
The two shared secrets are combined via HKDF-SHA256 with domain separation:
```text
ikm = X25519_shared_secret(32 bytes) || ML-KEM_shared_secret(32 bytes)
salt = [] (empty)
key = HKDF-SHA256(salt, ikm, info="quicnprotochat-hybrid-v1", L=32)
nonce = HKDF-SHA256(salt, ikm, info="quicnprotochat-hybrid-nonce-v1", L=12)
```
The implementation in `derive_aead_material()`:
```rust
fn derive_aead_material(x25519_ss: &[u8], mlkem_ss: &[u8]) -> (Key, Nonce) {
let mut ikm = Zeroizing::new(vec![0u8; x25519_ss.len() + mlkem_ss.len()]);
ikm[..x25519_ss.len()].copy_from_slice(x25519_ss);
ikm[x25519_ss.len()..].copy_from_slice(mlkem_ss);
let hk = Hkdf::<Sha256>::new(None, &ikm);
let mut key_bytes = Zeroizing::new([0u8; 32]);
hk.expand(b"quicnprotochat-hybrid-v1", &mut *key_bytes).unwrap();
let mut nonce_bytes = [0u8; 12];
hk.expand(b"quicnprotochat-hybrid-nonce-v1", &mut nonce_bytes).unwrap();
(*Key::from_slice(&*key_bytes), *Nonce::from_slice(&nonce_bytes))
}
```
Key design decisions:
- **Concatenation order**: X25519 shared secret first, ML-KEM shared secret second. This is consistent with the draft-ietf-tls-hybrid-design convention.
- **Separate info strings**: The key and nonce are derived with different HKDF info strings to ensure domain separation. Using the same info string for both would be a cryptographic error.
- **Zeroization**: The concatenated IKM and the derived key bytes are wrapped in `Zeroizing` to ensure they are cleared from memory when dropped.
- **Empty salt**: HKDF is used in extract-then-expand mode with no salt. The IKM already has high entropy from both DH operations.
## `HybridKeypair`
Each peer holds a `HybridKeypair` combining classical and post-quantum key material:
```rust
pub struct HybridKeypair {
x25519_sk: StaticSecret, // 32 bytes
x25519_pk: X25519Public, // 32 bytes
mlkem_dk: DecapsulationKey<MlKem768Params>, // 2400 bytes
mlkem_ek: EncapsulationKey<MlKem768Params>, // 1184 bytes
}
```
### Generation
```rust
pub fn generate() -> Self {
let x25519_sk = StaticSecret::random_from_rng(OsRng);
let x25519_pk = X25519Public::from(&x25519_sk);
let (mlkem_dk, mlkem_ek) = MlKem768::generate(&mut OsRng);
// ...
}
```
Both key pairs are generated from the OS CSPRNG (`OsRng`). The X25519 key uses `x25519-dalek`'s `StaticSecret` (not `EphemeralSecret`) because the keypair is long-lived and must be stored.
### Serialisation
For persistence, `HybridKeypairBytes` provides a serialisable form:
```rust
pub struct HybridKeypairBytes {
pub x25519_sk: [u8; 32],
pub mlkem_dk: Vec<u8>, // 2400 bytes
pub mlkem_ek: Vec<u8>, // 1184 bytes
}
```
Round-trip: `keypair.to_bytes()` serialises, `HybridKeypair::from_bytes(&bytes)` reconstructs. The ML-KEM keys are reconstructed using `DecapsulationKey::from_bytes()` and `EncapsulationKey::from_bytes()`, which accept `Array` types converted from slices.
### Public key extraction
The public portion is extracted for distribution to peers:
```rust
pub struct HybridPublicKey {
pub x25519_pk: [u8; 32],
pub mlkem_ek: Vec<u8>, // 1184 bytes
}
```
`HybridPublicKey` can be serialised to a single byte blob: `x25519_pk(32) || mlkem_ek(1184)` = 1,216 bytes total. This is uploaded to the server via the `uploadHybridKey` RPC and fetched by peers via `fetchHybridKey`.
## Encryption flow: `hybrid_encrypt`
```rust
pub fn hybrid_encrypt(
recipient_pk: &HybridPublicKey,
plaintext: &[u8],
) -> Result<Vec<u8>, HybridKemError>
```
Step-by-step:
1. **Ephemeral X25519 DH**: Generate a fresh `EphemeralSecret`, compute the X25519 shared secret with the recipient's static public key. The ephemeral secret is consumed (moved) by `diffie_hellman()` and cannot be reused.
2. **ML-KEM-768 encapsulation**: Reconstruct the recipient's `EncapsulationKey` from the public key bytes, then call `encapsulate(&mut OsRng)`. This produces a ciphertext (1,088 bytes) and a shared secret (32 bytes).
3. **Key derivation**: Call `derive_aead_material()` with both shared secrets to produce a 32-byte ChaCha20-Poly1305 key and a 12-byte nonce.
4. **AEAD encryption**: Encrypt the plaintext with `ChaCha20Poly1305::encrypt()`. The output includes the 16-byte authentication tag.
5. **Envelope assembly**: Concatenate `version || x25519_eph_pk || mlkem_ct || nonce || aead_ct`.
## Decryption flow: `hybrid_decrypt`
```rust
pub fn hybrid_decrypt(
keypair: &HybridKeypair,
envelope: &[u8],
) -> Result<Vec<u8>, HybridKemError>
```
Step-by-step:
1. **Envelope parsing**: Verify minimum length (`HEADER_LEN + 16`), check version byte (`0x01`), then extract the five fields by offset.
2. **X25519 DH**: Compute the shared secret using the recipient's static private key (`keypair.x25519_sk`) and the sender's ephemeral public key from the envelope.
3. **ML-KEM-768 decapsulation**: Convert the ciphertext bytes to the `Array` type expected by `DecapsulationKey::decapsulate()`, then decapsulate to recover the shared secret.
4. **Key derivation**: Same `derive_aead_material()` call as encryption, producing the same key and nonce (the nonce from the envelope is used for AEAD decryption, not the derived one -- actually, both are identical because the derivation is deterministic from the same shared secrets).
5. **AEAD decryption**: Decrypt and authenticate the ciphertext with `ChaCha20Poly1305::decrypt()`.
## Error handling
The `HybridKemError` enum covers all failure modes:
| Variant | Meaning |
|---|---|
| `EncryptionFailed` | AEAD encryption failed (should not happen with valid inputs) |
| `DecryptionFailed` | AEAD decryption failed -- wrong recipient key or tampered ciphertext |
| `UnsupportedVersion(u8)` | Envelope version byte is not `0x01` |
| `TooShort(usize)` | Envelope is shorter than `HEADER_LEN + 16` bytes |
| `InvalidMlKemKey` | ML-KEM encapsulation key bytes are malformed |
| `MlKemDecapsFailed` | ML-KEM decapsulation failed -- tampered ciphertext or wrong key |
The tests in `hybrid_kem.rs` verify:
- Round-trip encrypt/decrypt with correct keys.
- Decryption with wrong key fails (`DecryptionFailed`).
- Tampered AEAD ciphertext fails (`DecryptionFailed`).
- Tampered ML-KEM ciphertext fails (either `MlKemDecapsFailed` or `DecryptionFailed`).
- Tampered X25519 ephemeral public key fails (`DecryptionFailed`).
- Unsupported version is rejected.
- Too-short envelope is rejected.
- Keypair and public key serialisation round-trip.
- Large payloads (50 KB) round-trip successfully.
## Current status and roadmap
The hybrid KEM module is:
- **Implemented**: All types, encryption, decryption, serialisation, and key management are complete.
- **Tested**: Comprehensive unit tests cover all success and failure paths.
- **Server-supported**: The `NodeService` RPC interface includes `uploadHybridKey` and `fetchHybridKey` methods. The server stores hybrid public keys in its `FileBackedStore`.
- **Not yet integrated into MLS**: The MLS ciphersuite (`MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519`) uses classical DHKEM(X25519). Replacing it with a hybrid KEM requires either:
- A custom openmls ciphersuite that uses the hybrid KEM for HPKE (complex, requires forking openmls).
- An outer encryption layer that wraps MLS messages in a hybrid envelope before delivery (simpler, less tightly integrated).
The M5 milestone will integrate the hybrid KEM, likely as an outer encryption layer. Until then, MLS application data is protected by classical X25519 ECDH (128-bit security against classical computers, vulnerable to quantum computers).
The post-quantum gap in the transport layer ([QUIC + TLS 1.3](quic-tls.md) and [Noise\_XX](noise-xx.md)) is a separate concern tracked in [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md).
## Security analysis
### Hybrid security guarantee
The combiner construction ensures that an attacker must break *both* X25519 and ML-KEM-768 to recover the plaintext. Specifically:
- A **classical attacker** cannot break X25519 (ECDLP is hard on Curve25519) and therefore cannot derive the AEAD key, regardless of whether they can break ML-KEM.
- A **quantum attacker** with a cryptographically relevant quantum computer could break X25519 via Shor's algorithm but cannot break ML-KEM-768 (based on the Module-LWE problem, believed to be quantum-resistant).
- An attacker who discovers a **flaw in ML-KEM** still faces X25519, which provides 128-bit classical security.
### Key reuse
The X25519 component of the hybrid keypair is a `StaticSecret` (long-lived), not an `EphemeralSecret`. This is safe because:
- Each encryption uses a fresh `EphemeralSecret` for the sender's X25519 contribution.
- The static secret is only used in the DH computation with the ephemeral public key; it never appears in the wire format.
- The ML-KEM encapsulation also generates fresh randomness per encryption.
### Nonce handling
The AEAD nonce is derived deterministically from the shared secrets via HKDF. Since each encryption uses a fresh ephemeral X25519 key and fresh ML-KEM randomness, the shared secrets (and therefore the derived nonce) are unique per encryption with overwhelming probability. Nonce reuse would require both:
- The same ephemeral X25519 key (probability 2^{-256}).
- The same ML-KEM encapsulation randomness (probability 2^{-256}).
## Crate dependencies
| Crate | Version | Role |
|---|---|---|
| `ml-kem` | 0.2 | ML-KEM-768 (FIPS 203) implementation |
| `x25519-dalek` | 2 | X25519 ECDH (with `static_secrets` feature) |
| `chacha20poly1305` | 0.10 | AEAD symmetric encryption |
| `hkdf` | 0.12 | HKDF-SHA256 key derivation |
| `sha2` | 0.10 | SHA-256 (used by HKDF) |
| `zeroize` | 1 | Secure memory clearing for key material |
| `rand` | 0.8 | `OsRng` for CSPRNG |
| `serde` | 1 | Serialisation of keypair and public key types |
## Further reading
- [Post-Quantum Readiness](../cryptography/post-quantum-readiness.md) -- Broader discussion of quicnprotochat's PQ strategy.
- [MLS (RFC 9420)](mls.md) -- The MLS layer that the hybrid KEM will wrap.
- [Key Lifecycle and Zeroization](../cryptography/key-lifecycle.md) -- How hybrid key material is managed and cleared.
- [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md) -- The accepted PQ gap in the transport layers.
- [Threat Model](../cryptography/threat-model.md) -- Where hybrid KEM fits in the overall threat model.
- [Milestone Tracker](../roadmap/milestones.md) -- M5 milestone for hybrid KEM integration into MLS.

View File

@@ -0,0 +1,420 @@
# MLS (RFC 9420)
The Messaging Layer Security protocol (RFC 9420) is the core cryptographic layer in quicnprotochat. It provides authenticated group key agreement with forward secrecy and post-compromise security -- properties that distinguish quicnprotochat from a simple transport-encrypted relay. This is the most detailed page in the Protocol Deep Dives section because MLS is the most complex layer in the stack.
The implementation lives in `quicnprotochat-core/src/group.rs` and `quicnprotochat-core/src/keystore.rs`, using the `openmls 0.5` crate.
## Background: what problem MLS solves
Before MLS, group messaging systems had two main approaches:
1. **Pairwise encryption (Signal/Double Ratchet)**: Each pair of group members maintains an independent encrypted session. A message to a group of *n* members requires *n - 1* separate encryptions. Adding or removing a member requires *O(n)* operations by each member. The total work for a group operation is *O(n^2)*.
2. **Server-side fan-out with shared key**: All members share a single group key. The server decrypts and re-encrypts for each member. This is not end-to-end encrypted -- the server sees plaintext.
MLS takes a fundamentally different approach: it uses a **ratchet tree** (a binary tree of Diffie-Hellman key pairs) to derive group keys. This gives:
- **O(log n) scaling**: A group operation (add, remove, update) requires only *O(log n)* DH operations, one per level of the tree, regardless of group size.
- **Forward secrecy**: Each epoch uses a fresh key derived from the ratchet tree. Compromising the current key does not reveal past messages.
- **Post-compromise security (PCS)**: After a member's key is compromised, a single Update Commit operation re-randomises the compromised node's path in the tree, restoring confidentiality for all subsequent messages.
- **End-to-end encryption**: The server (Delivery Service) never sees plaintext. It routes opaque MLS blobs by recipient key without parsing them.
## Ciphersuite
quicnprotochat uses:
```text
MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519
```
| Component | Algorithm | Purpose |
|---|---|---|
| **HPKE KEM** | DHKEM(X25519, HKDF-SHA256) | Key encapsulation for Welcome messages and tree operations |
| **AEAD** | AES-128-GCM | Symmetric encryption of application messages |
| **Hash** | SHA-256 | Key derivation, transcript hashing, tree hashing |
| **Signature** | Ed25519 | Credential binding, Commit signing, KeyPackage signing |
This ciphersuite provides 128-bit classical security. Post-quantum protection is handled by the [Hybrid KEM](hybrid-kem.md) layer wrapping MLS payloads at the transport level (planned for M5).
## The `GroupMember` state machine
The central type is `GroupMember`, defined in `quicnprotochat-core/src/group.rs`. It wraps an openmls `MlsGroup`, a persistent crypto backend (`StoreCrypto`), and the user's long-term Ed25519 identity keypair.
### Lifecycle diagram
```text
GroupMember::new(identity)
|
├── generate_key_package() → TLS-encoded KeyPackage bytes
| (upload to Authentication Service)
|
├── create_group(group_id) → Epoch 0; caller is sole member
| |
| └── add_member(kp_bytes) → (commit_bytes, welcome_bytes)
| | merge_pending_commit() called internally
| |
| ├── [commit_bytes → existing members via DS]
| └── [welcome_bytes → new member via DS]
|
└── join_group(welcome_bytes) → Join via Welcome; epoch matches inviter
|
├── send_message(plaintext) → MLS PrivateMessage bytes
|
└── receive_message(bytes) → Some(plaintext) for Application messages
None for Commits (state updated internally)
None for Proposals (stored for later Commit)
```
### Construction
```rust
pub fn new(identity: Arc<IdentityKeypair>) -> Self
```
Creates a new `GroupMember` with:
- A fresh `StoreCrypto` backend using an ephemeral (in-memory) key store.
- The provided Ed25519 identity keypair (used as the MLS `Signer`).
- No active group (`self.group = None`).
For state persistence across restarts, use:
```rust
pub fn new_with_state(
identity: Arc<IdentityKeypair>,
key_store: DiskKeyStore,
group: Option<MlsGroup>,
) -> Self
```
This constructor accepts a pre-existing `DiskKeyStore` (loaded from disk) and an optional serialised `MlsGroup`. The `MlsGroupConfig` is rebuilt with `use_ratchet_tree_extension(true)`.
### MLS group configuration
The group configuration is built once at construction time:
```rust
let config = MlsGroupConfig::builder()
.use_ratchet_tree_extension(true)
.build();
```
The critical setting is `use_ratchet_tree_extension(true)`: this embeds the full ratchet tree inside Welcome messages so that new members can reconstruct the group state without a separate tree-fetching step. The trade-off is larger Welcome messages, but this simplifies the protocol by eliminating a round-trip to a tree distribution service.
## Key operations
### `generate_key_package()`
```rust
pub fn generate_key_package(&mut self) -> Result<Vec<u8>, CoreError>
```
Generates a fresh, single-use MLS KeyPackage and returns it as TLS-encoded bytes.
**What happens internally:**
1. A `CredentialWithKey` is created from the identity keypair. The credential type is `Basic` -- the credential body is the raw Ed25519 public key bytes, and the `signature_key` field is the same public key.
2. `KeyPackage::builder().build()` is called with:
- `CryptoConfig::with_default_version(CIPHERSUITE)` -- specifies the MLS ciphersuite.
- `&self.backend` -- the `StoreCrypto` provider. During build, openmls generates an HPKE init keypair and stores the private key in the backend's key store.
- `self.identity.as_ref()` -- the `Signer` (Ed25519 private key) used to sign the KeyPackage.
- The `CredentialWithKey` binding the credential to the signature key.
3. The KeyPackage is serialised via `tls_serialize_detached()` (TLS presentation language encoding, as specified by RFC 9420).
**Critical invariant:** The HPKE init private key is stored in `self.backend`'s key store. The **same `GroupMember` instance** (or one reconstructed with the same `DiskKeyStore`) must later call `join_group()`, because `new_from_welcome()` looks up the init private key by reference to decrypt the Welcome. If a different `GroupMember` instance (with a fresh key store) tries to join, the lookup fails and the Welcome cannot be decrypted.
**Why KeyPackages are single-use:** Each KeyPackage contains a unique HPKE init public key. Using the same KeyPackage for two different group joins would allow the joiner's init key to be reused, which could compromise forward secrecy. See [ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md).
### `create_group(group_id)`
```rust
pub fn create_group(&mut self, group_id: &[u8]) -> Result<(), CoreError>
```
Creates a new MLS group at epoch 0 with the caller as the sole member.
**Parameters:**
- `group_id`: Any non-empty byte string. By convention, quicnprotochat uses the SHA-256 digest of a human-readable group name.
**What happens internally:**
1. A `CredentialWithKey` is created (same as `generate_key_package`).
2. `MlsGroup::new_with_group_id()` is called with the backend, signer, config, group ID, and credential.
3. The resulting `MlsGroup` is stored in `self.group`.
After this call, the group exists at epoch 0 with one member. Use `add_member()` to invite additional members.
### `add_member(key_package_bytes)`
```rust
pub fn add_member(
&mut self,
key_package_bytes: &[u8],
) -> Result<(Vec<u8>, Vec<u8>), CoreError>
```
Adds a new member to the group by their TLS-encoded KeyPackage. Returns `(commit_bytes, welcome_bytes)`.
**What happens internally:**
1. **KeyPackage deserialisation and validation**: The raw bytes are deserialised via `KeyPackageIn::tls_deserialize()`. Note the `In` suffix -- openmls 0.5 distinguishes between `KeyPackage` (trusted, locally-generated) and `KeyPackageIn` (untrusted, received from the network). The `validate()` method verifies the Ed25519 signature on the KeyPackage and returns a trusted `KeyPackage`.
```rust
let key_package: KeyPackage =
KeyPackageIn::tls_deserialize(&mut key_package_bytes.as_ref())?
.validate(self.backend.crypto(), ProtocolVersion::Mls10)?;
```
2. **Commit + Welcome creation**: `group.add_members()` produces three outputs:
- `commit_out` (`MlsMessageOut`): A Commit message that existing members process to update their state.
- `welcome_out` (`MlsMessageOut`): A Welcome message that bootstraps the new member into the group.
- `_group_info`: A GroupInfo for external commits (not used here).
3. **Merge pending commit**: `group.merge_pending_commit()` applies the Commit to the local state, advancing the epoch. This is called immediately because the creator of the Commit is also a group member.
4. **Serialisation**: Both `commit_out` and `welcome_out` are serialised to bytes via `.to_bytes()`.
**Caller responsibilities:**
- Send `commit_bytes` to all existing group members via the Delivery Service. (In the two-party case where the creator is the only member, this can be discarded -- the creator has already merged it locally.)
- Send `welcome_bytes` to the new member via the Delivery Service.
### `join_group(welcome_bytes)`
```rust
pub fn join_group(&mut self, welcome_bytes: &[u8]) -> Result<(), CoreError>
```
Joins an existing group from a TLS-encoded Welcome message.
**Prerequisites:**
- `generate_key_package()` must have been called on **this same instance** (or one with the same `DiskKeyStore`) so that the HPKE init private key is available in the backend.
**What happens internally:**
1. **Deserialisation**: The bytes are deserialised as `MlsMessageIn`, then the inner body is extracted. The `into_welcome()` method is feature-gated in openmls 0.5, so the implementation uses `msg_in.extract()` with a match on `MlsMessageInBody::Welcome`.
```rust
let welcome = match msg_in.extract() {
MlsMessageInBody::Welcome(w) => w,
_ => return Err(CoreError::Mls("expected a Welcome message".into())),
};
```
2. **Group construction**: `MlsGroup::new_from_welcome()` is called with:
- `&self.backend` -- to look up the HPKE init private key.
- `&self.config` -- group configuration (ratchet tree extension enabled).
- The `Welcome` message.
- `ratchet_tree = None` -- because `use_ratchet_tree_extension = true` means the tree is embedded in the Welcome's `GroupInfo` extension. openmls extracts it automatically.
3. The resulting `MlsGroup` is stored in `self.group`.
### `send_message(plaintext)`
```rust
pub fn send_message(&mut self, plaintext: &[u8]) -> Result<Vec<u8>, CoreError>
```
Encrypts plaintext as an MLS Application message (PrivateMessage variant).
**What happens internally:**
1. `group.create_message()` is called with the backend, signer, and plaintext.
2. The resulting `MlsMessageOut` is serialised to bytes via `.to_bytes()`.
The output is a TLS-encoded MLS message ready for delivery. The Delivery Service treats it as an opaque blob.
### `receive_message(bytes)`
```rust
pub fn receive_message(&mut self, bytes: &[u8]) -> Result<Option<Vec<u8>>, CoreError>
```
Processes an incoming TLS-encoded MLS message.
**Return values:**
- `Ok(Some(plaintext))` -- for Application messages (PrivateMessage). The caller receives the decrypted plaintext.
- `Ok(None)` -- for Commit messages. The group state is updated internally (epoch advances) via `merge_staged_commit()`.
- `Ok(None)` -- for Proposal messages. The proposal is stored via `store_pending_proposal()` for inclusion in a future Commit.
- `Ok(None)` -- for External Join Proposal messages. Also stored as a pending proposal.
**What happens internally:**
1. **Deserialisation**: Bytes are deserialised as `MlsMessageIn`, then extracted as either `PrivateMessage` or `PublicMessage`. The extraction uses manual pattern matching because `into_protocol_message()` is feature-gated in openmls 0.5:
```rust
let protocol_message = match msg_in.extract() {
MlsMessageInBody::PrivateMessage(m) => ProtocolMessage::PrivateMessage(m),
MlsMessageInBody::PublicMessage(m) => ProtocolMessage::PublicMessage(m),
_ => return Err(CoreError::Mls("not a protocol message".into())),
};
```
2. **Processing**: `group.process_message()` decrypts (for PrivateMessage) or verifies (for PublicMessage) the message and returns a `ProcessedMessage`.
3. **Content dispatch**: The `ProcessedMessageContent` is matched:
- `ApplicationMessage`: Plaintext bytes are extracted and returned.
- `StagedCommitMessage`: The staged commit is merged, advancing the epoch.
- `ProposalMessage` / `ExternalJoinProposalMessage`: The proposal is stored for later.
## The `StoreCrypto` backend
The `StoreCrypto` struct (in `quicnprotochat-core/src/keystore.rs`) implements `OpenMlsCryptoProvider`, which openmls requires for all cryptographic operations:
```rust
pub struct StoreCrypto {
crypto: RustCrypto,
key_store: DiskKeyStore,
}
```
It couples two things:
1. **`RustCrypto`**: The `openmls_rust_crypto` crate's implementation of MLS cryptographic primitives (HPKE, AEAD, hashing, signing). This provides both the `CryptoProvider` and `RandProvider` traits.
2. **`DiskKeyStore`**: A key-value store that maps opaque byte keys to serialised MLS entities (HPKE private keys, epoch secrets, etc.). This is the critical piece -- openmls stores HPKE init private keys here during `KeyPackage::builder().build()` and retrieves them during `MlsGroup::new_from_welcome()`.
### Why the backend must persist
This is the most important implementation detail in the entire MLS layer:
When `generate_key_package()` is called, openmls generates an HPKE init keypair and stores the private key in the `DiskKeyStore` under a reference derived from the init public key. When `join_group()` is later called with a Welcome message, `new_from_welcome()` decrypts the Welcome using that stored private key.
**If the `DiskKeyStore` is lost between these two calls, the Welcome cannot be decrypted.**
This means:
- For ephemeral usage (tests, demos), `DiskKeyStore::ephemeral()` (in-memory `HashMap`) works as long as the same `GroupMember` instance is used throughout.
- For persistent usage (real clients), `DiskKeyStore::persistent(path)` must be used. It serialises the `HashMap` to disk via `bincode` on every `store` and `delete` operation.
### DiskKeyStore implementation
```rust
pub struct DiskKeyStore {
path: Option<PathBuf>,
values: RwLock<HashMap<Vec<u8>, Vec<u8>>>,
}
```
- **Ephemeral mode** (`path = None`): Pure in-memory. Fast but not restart-safe.
- **Persistent mode** (`path = Some(path)`): Flushes the entire `HashMap` to disk on every mutation. This is simple but not optimised -- a production system would use an append-only log or embedded database.
The `OpenMlsKeyStore` trait implementation:
- `store()`: Serialises the value via `serde_json`, inserts into the `HashMap`, then flushes to disk.
- `read()`: Deserialises from the `HashMap` via `serde_json`.
- `delete()`: Removes from the `HashMap`, then flushes to disk.
## openmls 0.5 API gotchas
Several openmls 0.5 API patterns are non-obvious and worth documenting:
### `KeyPackageIn` vs `KeyPackage`
openmls 0.5 separates untrusted wire types (`*In` suffix) from validated types. `KeyPackage` only derives `TlsSerialize`; `KeyPackageIn` derives `TlsDeserialize`. To go from bytes to a trusted `KeyPackage`:
```rust
KeyPackageIn::tls_deserialize(&mut bytes.as_ref())?
.validate(backend.crypto(), ProtocolVersion::Mls10)?
```
### Feature-gated methods
Several convenient methods (`into_welcome()`, `into_protocol_message()`) are feature-gated behind openmls feature flags that quicnprotochat does not enable. The workaround is to use `msg_in.extract()` and pattern-match on the `MlsMessageInBody` enum variants.
### MlsGroup is not Send
`MlsGroup` holds internal state that may not be `Send` depending on the crypto backend. In quicnprotochat, `StoreCrypto` uses `RwLock` (which is `Send + Sync`), so `GroupMember` is `Send`. However, all MLS operations must use the same backend instance, so `GroupMember` should not be cloned across tasks.
## Ratchet tree embedding
The ratchet tree is embedded in Welcome messages via the `use_ratchet_tree_extension(true)` configuration. This means:
1. When `add_member()` creates a Welcome, the full ratchet tree is included as a `GroupInfo` extension.
2. When `join_group()` calls `new_from_welcome()` with `ratchet_tree = None`, openmls extracts the tree from the extension automatically.
The trade-off:
- **Pro**: No need for a separate tree distribution service or additional round-trips.
- **Con**: Welcome messages grow with the group size (O(n log n) for a balanced tree of n members).
For quicnprotochat's target group sizes (2-100 members), this trade-off is acceptable.
## Wire format
All MLS messages are serialised using TLS presentation language encoding (`tls_codec`). The TLS-encoded byte vectors are what the transport layer (Noise or QUIC) and the Delivery Service see. The DS routes these blobs without parsing them.
The key wire message types:
| MLS Type | Envelope MsgType | Direction |
|---|---|---|
| KeyPackage | `keyPackageUpload` | Client -> AS |
| Welcome | `mlsWelcome` | Inviter -> DS -> Joinee |
| Commit (PublicMessage) | `mlsCommit` | Committer -> DS -> Members |
| Application (PrivateMessage) | `mlsApplication` | Sender -> DS -> Recipient |
## Example: two-party round-trip
The following sequence shows a complete Alice-and-Bob scenario, matching the `two_party_mls_round_trip` test in `group.rs`:
```text
1. Alice = GroupMember::new(alice_identity)
2. Bob = GroupMember::new(bob_identity)
3. bob_kp = Bob.generate_key_package()
→ Bob's backend now holds the HPKE init private key
4. Alice.create_group(b"test-group")
→ Alice is sole member at epoch 0
5. (commit, welcome) = Alice.add_member(&bob_kp)
→ Alice's epoch advances to 1
→ commit is for existing members (Alice already merged it)
→ welcome is for Bob
6. Bob.join_group(&welcome)
→ Bob's backend retrieves the HPKE init key to decrypt the Welcome
→ Bob is now at the same epoch as Alice
7. ct = Alice.send_message(b"hello bob")
→ MLS PrivateMessage encrypted under the group key
8. pt = Bob.receive_message(&ct)
→ pt == Some(b"hello bob")
9. ct = Bob.send_message(b"hello alice")
10. pt = Alice.receive_message(&ct)
→ pt == Some(b"hello alice")
```
## Credential model
quicnprotochat uses MLS `Basic` credentials. The credential body is the raw Ed25519 public key bytes (32 bytes), and the `signature_key` is the same public key:
```rust
let credential = Credential::new(
self.identity.public_key_bytes().to_vec(),
CredentialType::Basic,
)?;
CredentialWithKey {
credential,
signature_key: self.identity.public_key_bytes().to_vec().into(),
}
```
This means the MLS identity *is* the Ed25519 key. There is no X.509 certificate chain or other PKI. The trust model is:
- Peers trust identity keys obtained out-of-band (e.g., verified via QR code, secure channel, or TOFU).
- The Authentication Service stores KeyPackages indexed by Ed25519 public key.
- The Delivery Service routes by Ed25519 public key.
A future milestone may introduce X.509 credentials for integration with external PKI.
## Further reading
- [Forward Secrecy](../cryptography/forward-secrecy.md) -- How MLS epoch ratcheting provides forward secrecy.
- [Post-Compromise Security](../cryptography/post-compromise-security.md) -- How MLS Update Commits restore security after key compromise.
- [Ed25519 Identity Keys](../cryptography/identity-keys.md) -- Key generation and management for the identity keypair used as the MLS Signer.
- [GroupMember Lifecycle](../internals/group-member-lifecycle.md) -- Detailed state transitions and error handling.
- [KeyPackage Exchange Flow](../internals/keypackage-exchange.md) -- How KeyPackages flow through the Authentication Service.
- [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md) -- Why the DS does not parse MLS messages.
- [ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md) -- Why KeyPackages are single-use.
- [Hybrid KEM: X25519 + ML-KEM-768](hybrid-kem.md) -- Post-quantum outer encryption layer for MLS payloads.
- [Storage Backend](../internals/storage-backend.md) -- DiskKeyStore persistence and the FileBackedStore used by the server.

View File

@@ -0,0 +1,227 @@
# Noise\_XX Handshake
quicnprotochat's M1 milestone used the Noise Protocol Framework for transport-layer encryption between peers over raw TCP. The implementation lives in `quicnprotochat-core/src/noise.rs` and uses the `snow 0.9` crate. Although the M3 architecture migrated client-server communication to [QUIC + TLS 1.3](quic-tls.md), the Noise\_XX transport remains in the codebase for direct peer-to-peer connections and integration testing.
## The Noise\_XX pattern
quicnprotochat uses the `Noise_XX_25519_ChaChaPoly_BLAKE2s` parameter set:
| Component | Choice | Rationale |
|---|---|---|
| **Pattern** | XX | Mutual authentication with no pre-shared keys required |
| **DH** | X25519 | 128-bit security level; fast; widely reviewed |
| **AEAD** | ChaCha20-Poly1305 | Constant-time on all platforms (no AES-NI dependency) |
| **Hash** | BLAKE2s | Faster than SHA-256 on software; 256-bit security level |
The XX pattern involves a three-message handshake:
```text
XX handshake (3 messages):
-> e Initiator sends ephemeral public key
<- e, ee, s, es Responder replies: ephemeral, DH(ee), static key, DH(es)
-> s, se Initiator sends static key, DH(se)
```
### Message-by-message breakdown
**Message 1: `-> e` (Initiator to Responder)**
The initiator generates an ephemeral X25519 keypair and sends the public half. At this point, no encryption is active. The ephemeral key is sent in the clear, but it reveals nothing about the initiator's identity.
**Message 2: `<- e, ee, s, es` (Responder to Initiator)**
The responder:
1. Generates its own ephemeral X25519 keypair and sends the public half (`e`).
2. Performs `DH(e_init, e_resp)` to establish a shared secret (`ee`).
3. Sends its static (long-term) X25519 public key encrypted under the `ee` shared secret (`s`).
4. Performs `DH(e_init, s_resp)` for an additional shared secret (`es`).
After this message, the initiator knows the responder's static key and can authenticate it.
**Message 3: `-> s, se` (Initiator to Responder)**
The initiator:
1. Sends its static X25519 public key encrypted under the accumulated handshake secrets (`s`).
2. Performs `DH(s_init, e_resp)` for the final shared secret (`se`).
After this message, both parties have authenticated each other's static keys and derived a symmetric session key for ChaCha20-Poly1305.
### Why XX
The XX pattern was chosen over other Noise patterns for several reasons:
- **No pre-shared keys**: Unlike IK or KK, XX does not require either party to know the other's static key before the handshake. This simplifies bootstrapping -- peers can connect to each other using only a network address.
- **Identity hiding for the initiator**: The initiator's static key is not sent until message 3, after the session is already encrypted. An eavesdropper cannot determine who is initiating the connection.
- **Mutual authentication**: Both parties prove possession of their static private keys through DH operations. Unlike the NK or NX patterns, neither party is anonymous.
- **Responder identity protection (partial)**: The responder's static key is encrypted under the `ee` DH secret in message 2, providing protection against passive eavesdroppers (but not against an active attacker who controls the initiator's ephemeral key).
## Implementation
The core type is `NoiseTransport`, defined in `quicnprotochat-core/src/noise.rs`:
```rust
pub struct NoiseTransport {
framed: Framed<TcpStream, LengthPrefixedCodec>,
session: snow::TransportState,
remote_static: Option<Vec<u8>>,
}
```
The struct wraps three components:
1. **`framed`**: A `tokio_util::codec::Framed<TcpStream, LengthPrefixedCodec>` that handles length-prefixed byte framing over TCP. Each frame is prefixed with a 4-byte little-endian length field. See [Length-Prefixed Framing Codec](../wire-format/framing-codec.md) for details on the wire format.
2. **`session`**: A `snow::TransportState` that encrypts and decrypts Noise messages. This is obtained by calling `HandshakeState::into_transport_mode()` after the three-message handshake completes.
3. **`remote_static`**: The remote peer's static X25519 public key (32 bytes), captured from the `HandshakeState` before `into_transport_mode()` consumes it. This is stored explicitly because `snow` does not guarantee that `TransportState::get_remote_static()` survives the mode transition.
### Handshake functions
Two public async functions perform the handshake:
#### `handshake_initiator`
```rust
pub async fn handshake_initiator(
stream: TcpStream,
keypair: &NoiseKeypair,
) -> Result<NoiseTransport, CoreError>
```
The initiator:
1. Parses the Noise parameter string `Noise_XX_25519_ChaChaPoly_BLAKE2s` and builds a `snow::Builder` with the local private key.
2. Wraps the TCP stream in `Framed<TcpStream, LengthPrefixedCodec>`.
3. Allocates a scratch buffer of `NOISE_MAX_MSG` (65,535) bytes.
4. **Message 1** (`-> e`): Calls `session.write_message(&[], &mut buf)` to produce the ephemeral key, then sends it as a length-prefixed frame.
5. **Message 2** (`<- e, ee, s, es`): Receives a frame and calls `session.read_message()` to process it.
6. **Message 3** (`-> s, se`): Calls `session.write_message()` again and sends the result.
7. Zeroizes the scratch buffer (it contained plaintext key material during the handshake).
8. Captures the remote static key via `session.get_remote_static()`.
9. Transitions to transport mode via `session.into_transport_mode()`.
The private key bytes are held in a `Zeroizing` wrapper and dropped immediately after `snow::Builder` clones them internally.
#### `handshake_responder`
```rust
pub async fn handshake_responder(
stream: TcpStream,
keypair: &NoiseKeypair,
) -> Result<NoiseTransport, CoreError>
```
The responder mirrors the initiator but with reversed message directions:
1. Builds a `snow::Builder` with `build_responder()`.
2. **Message 1** (`<- e`): Receives and processes the initiator's ephemeral key.
3. **Message 2** (`-> e, ee, s, es`): Produces and sends the responder's reply.
4. **Message 3** (`<- s, se`): Receives and processes the initiator's static key.
5. Same zeroization, key capture, and mode transition as the initiator.
Both functions return `CoreError::HandshakeIncomplete` if the peer closes the connection mid-handshake, `CoreError::Noise` for any snow error, or `CoreError::Codec` for TCP I/O failures.
### Transport-layer I/O
After the handshake, `NoiseTransport` provides two levels of I/O:
**Frame-level** (raw bytes):
- `send_frame(&mut self, plaintext: &[u8])` -- Encrypts plaintext with ChaCha20-Poly1305 (adding a 16-byte AEAD tag) and sends it as a length-prefixed frame. Rejects payloads exceeding `MAX_PLAINTEXT_LEN` (65,519 bytes -- the Noise maximum of 65,535 minus the 16-byte AEAD tag).
- `recv_frame(&mut self)` -- Receives a length-prefixed frame and decrypts it.
**Envelope-level** (Cap'n Proto messages):
- `send_envelope(&mut self, env: &ParsedEnvelope)` -- Serialises a `ParsedEnvelope` to Cap'n Proto wire bytes via `build_envelope()`, then calls `send_frame()`.
- `recv_envelope(&mut self)` -- Calls `recv_frame()`, then deserialises the bytes via `parse_envelope()`.
## The capnp-rpc bridge: `into_capnp_io()`
The most architecturally interesting method on `NoiseTransport` is `into_capnp_io()`, which bridges the message-oriented Noise transport with the stream-oriented `capnp-rpc` library:
```rust
pub fn into_capnp_io(mut self) -> (ReadHalf<DuplexStream>, WriteHalf<DuplexStream>)
```
### Why this bridge exists
`capnp-rpc`'s `twoparty::VatNetwork` expects `AsyncRead + AsyncWrite` byte streams, but `NoiseTransport` is message-based -- each `send_frame`/`recv_frame` call encrypts/decrypts one discrete Noise message. These two models are incompatible: a byte stream has no inherent message boundaries, while Noise requires them for its AEAD authentication.
### How it works
The bridge uses `tokio::io::duplex` to create an in-process bidirectional byte channel:
```text
capnp-rpc duplex pipe NoiseTransport
┌─────────┐ ┌─────────────────┐ ┌───────────────────┐
│ VatNetwork │◄──►│ app_stream │◄──►│ bridge task │◄──► TCP
│ (reads/ │ │ (ReadHalf + │ │ (tokio::select!) │
│ writes) │ │ WriteHalf) │ │ │
└─────────┘ └─────────────────┘ └───────────────────┘
```
1. `into_capnp_io()` creates a `tokio::io::duplex(MAX_PLAINTEXT_LEN)` pipe.
2. It spawns a background Tokio task that uses `tokio::select!` to shuttle data bidirectionally:
- **Noise -> app**: Calls `self.recv_frame()`, writes the decrypted plaintext into the pipe.
- **App -> Noise**: Reads bytes from the pipe, calls `self.send_frame()` to encrypt and send them.
3. The returned `(ReadHalf, WriteHalf)` are the application ends of the pipe, suitable for passing to `VatNetwork::new()`.
The bridge task runs until either side of the pipe closes. When `capnp-rpc` drops the pipe halves, the bridge exits cleanly.
The pipe capacity is set to `MAX_PLAINTEXT_LEN` (65,519 bytes) so that one Noise frame's worth of plaintext can be buffered without blocking.
## Remote static key extraction
After a successful handshake, `NoiseTransport::remote_static_public_key()` returns the authenticated remote peer's X25519 public key:
```rust
pub fn remote_static_public_key(&self) -> Option<&[u8]> {
self.remote_static.as_deref()
}
```
This returns `Some(&[u8])` (32 bytes) in all normal cases. `None` would indicate a snow implementation bug where the XX handshake completed without exchanging static keys.
Applications use the remote static key to:
- Verify the peer's identity against a known-good key fingerprint.
- Index the peer in a roster or routing table.
- Derive additional key material for application-layer protocols.
## Post-quantum gap (ADR-006)
The Noise transport uses classical X25519 for all Diffie-Hellman operations. There is currently no standardised PQ-Noise extension in the `snow` crate. This means:
- **Handshake metadata** (ephemeral keys, encrypted static keys) could be harvested by a passive attacker and decrypted later with a quantum computer ("harvest now, decrypt later" attack).
- **Application data** encrypted by MLS is PQ-protected from the M5 milestone onward via the [Hybrid KEM](hybrid-kem.md) layer.
The residual risk (metadata exposure via handshake harvest) is accepted for M1 through M5. On the QUIC + TLS 1.3 path, the same gap exists: TLS 1.3 key exchange uses classical ECDHE. Both gaps are tracked in [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md).
## Thread safety
`NoiseTransport` is `Send` but not `Clone` or `Sync`. It should be used from a single Tokio task. To share data across tasks, use channels or other message-passing mechanisms. The `Debug` implementation formats the first four bytes of the remote static key as hex for logging:
```rust
NoiseTransport { remote_static: Some("a1b2c3d4…"), .. }
```
## Error handling
All `NoiseTransport` methods return `Result<_, CoreError>` with these variants:
| Error | Meaning |
|---|---|
| `CoreError::HandshakeIncomplete` | Peer closed the connection during the handshake |
| `CoreError::Noise(snow::Error)` | Any Noise operation failed (pattern mismatch, bad DH, decryption failure) |
| `CoreError::Codec(CodecError)` | TCP I/O failure or frame size violation |
| `CoreError::ConnectionClosed` | Peer closed the connection during transport phase |
| `CoreError::MessageTooLarge { size }` | Plaintext exceeds `MAX_PLAINTEXT_LEN` (65,519 bytes) |
| `CoreError::Capnp(capnp::Error)` | Cap'n Proto serialisation error (envelope methods only) |
## Further reading
- [QUIC + TLS 1.3](quic-tls.md) -- The M3+ replacement for Noise\_XX on the client-server path.
- [Cap'n Proto Serialisation and RPC](capn-proto.md) -- The serialisation layer that rides on top of the Noise transport.
- [Length-Prefixed Framing Codec](../wire-format/framing-codec.md) -- The `LengthPrefixedCodec` used by `NoiseTransport`.
- [X25519 Transport Keys](../cryptography/transport-keys.md) -- Key generation and management for Noise static keys.
- [ADR-001: Noise\_XX for Transport Auth](../design-rationale/adr-001-noise-xx.md) -- Design rationale for choosing the XX pattern.
- [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md) -- Accepted risk of classical-only key exchange.

View File

@@ -0,0 +1,87 @@
# Protocol Layers Overview
quicnprotochat composes five distinct protocol layers into a single security stack. Each layer addresses a specific class of threat and delegates everything else to the layers above or below it. No single layer is sufficient on its own; the composition is what delivers end-to-end confidentiality, mutual authentication, forward secrecy, post-compromise security, and post-quantum resistance.
This page provides a high-level comparison and a suggested reading order. The deep-dive pages that follow contain implementation details drawn directly from the source code.
## Layer comparison
| Layer | Standard / Spec | Crate(s) | Security Properties |
|---|---|---|---|
| **QUIC + TLS 1.3** | RFC 9000, RFC 9001 | `quinn 0.11`, `rustls 0.23` | Transport confidentiality, server authentication, 0-RTT resumption |
| **Noise\_XX** | [Noise Protocol Framework](https://noiseprotocol.org/noise.html) | `snow 0.9` | Mutual authentication, identity hiding, ChaCha20-Poly1305 session encryption |
| **Cap'n Proto** | [capnproto.org specification](https://capnproto.org/encoding.html) | `capnp 0.19`, `capnp-rpc 0.19` | Zero-copy deserialisation, schema-enforced types, canonical serialisation for signing, async RPC |
| **MLS** | [RFC 9420](https://www.rfc-editor.org/rfc/rfc9420.html) | `openmls 0.5` | Group key agreement, forward secrecy, post-compromise security (PCS) |
| **Hybrid KEM** | [draft-ietf-tls-hybrid-design](https://datatracker.ietf.org/doc/draft-ietf-tls-hybrid-design/) | `ml-kem 0.2`, `x25519-dalek 2` | Post-quantum resistance via ML-KEM-768 combined with X25519 |
## How the layers compose
Data flows through the stack from top to bottom on send and from bottom to top on receive:
```text
Application plaintext
|
v
+-----------+
| MLS | RFC 9420 group encryption (PrivateMessage)
+-----------+
|
v
+-----------+
| Cap'n Proto| Schema-typed serialisation into Envelope frames
+-----------+
|
v
+-----------+
| Noise_XX | Per-session ChaCha20-Poly1305 encryption (M1 TCP path)
+-----------+ -- OR --
+-----------+
| QUIC+TLS | QUIC transport encryption (M3+ QUIC path)
+-----------+
|
v
Network
```
In the current M3 architecture, the QUIC + TLS 1.3 layer has replaced the Noise\_XX layer for client-to-server transport. The Noise\_XX implementation remains in the codebase and is used for direct peer-to-peer connections in M1-era integration tests. Both paths carry Cap'n Proto messages as their inner payload.
The Hybrid KEM layer operates orthogonally: it wraps MLS payloads in an outer post-quantum encryption envelope before they enter the transport layer. It is implemented and tested but not yet integrated into the MLS ciphersuite (planned for the M5 milestone).
## Suggested reading order
The pages in this section are ordered to build understanding incrementally:
1. **[QUIC + TLS 1.3](quic-tls.md)** -- Start here. This is the outermost transport layer that every client-server connection uses today. Understanding QUIC stream multiplexing and the TLS 1.3 handshake is prerequisite to understanding how Cap'n Proto RPC rides on top.
2. **[MLS (RFC 9420)](mls.md)** -- The core cryptographic innovation. MLS provides the group key agreement that makes quicnprotochat an E2E encrypted group messenger rather than just a transport-encrypted relay. This is the longest and most detailed page.
3. **[Cap'n Proto Serialisation and RPC](capn-proto.md)** -- The serialisation and RPC layer that bridges MLS application data with the transport. Understanding the Envelope schema, the ParsedEnvelope owned type, and the NodeService RPC interface is essential for reading the server and client source code.
4. **[Noise\_XX Handshake](noise-xx.md)** -- The M1-era transport encryption layer. Even though QUIC has replaced it for client-server communication, the Noise\_XX code remains in the codebase and the design decisions it embodies (mutual authentication, identity hiding) inform the overall architecture.
5. **[Hybrid KEM: X25519 + ML-KEM-768](hybrid-kem.md)** -- The post-quantum encryption layer. Read this last because it builds on concepts from all other layers: key encapsulation (from MLS), wire format conventions (from Cap'n Proto), and AEAD encryption (from Noise).
## Cross-cutting concerns
Several topics span multiple layers and have their own dedicated pages elsewhere in this book:
- **Forward secrecy**: Provided by MLS epoch ratcheting. See [Forward Secrecy](../cryptography/forward-secrecy.md).
- **Post-compromise security**: Provided by MLS Update proposals. See [Post-Compromise Security](../cryptography/post-compromise-security.md).
- **Post-quantum readiness**: Currently provided by the standalone Hybrid KEM module; integration into MLS is planned for M5. See [Post-Quantum Readiness](../cryptography/post-quantum-readiness.md).
- **Key lifecycle and zeroization**: Private key material is zeroized after use across all layers. See [Key Lifecycle and Zeroization](../cryptography/key-lifecycle.md).
- **Wire format details**: The length-prefixed framing codec and Cap'n Proto schema definitions are documented in the [Wire Format Reference](../wire-format/overview.md) section.
- **Design rationale**: The ADR pages explain *why* each layer was chosen. See [Design Decisions Overview](../design-rationale/overview.md).
## Crate mapping
Each protocol layer maps to one or more workspace crates:
| Layer | Primary Crate | Source File(s) |
|---|---|---|
| QUIC + TLS 1.3 | `quicnprotochat-server`, `quicnprotochat-client` | `main.rs` (server and client entry points) |
| Noise\_XX | `quicnprotochat-core` | `src/noise.rs`, `src/codec.rs` |
| Cap'n Proto | `quicnprotochat-proto` | `src/lib.rs`, `build.rs`, `schemas/*.capnp` |
| MLS | `quicnprotochat-core` | `src/group.rs`, `src/keystore.rs` |
| Hybrid KEM | `quicnprotochat-core` | `src/hybrid_kem.rs` |
For a full crate responsibility breakdown, see [Crate Responsibilities](../architecture/crate-responsibilities.md).

View File

@@ -0,0 +1,177 @@
# QUIC + TLS 1.3
quicnprotochat uses QUIC (RFC 9000) with mandatory TLS 1.3 (RFC 9001) as its client-to-server transport layer. This page explains why QUIC was chosen over raw TCP, how the `quinn` and `rustls` crates are integrated, and what security properties the transport provides.
## Why QUIC over raw TCP
The M1 milestone used raw TCP sockets with a Noise\_XX handshake for transport encryption (see [Noise\_XX Handshake](noise-xx.md)). Starting from M3, the project migrated to QUIC for several reasons:
| Property | Raw TCP + Noise | QUIC + TLS 1.3 |
|---|---|---|
| **Multiplexed streams** | Single stream; application must multiplex manually | Native bidirectional streams; each RPC call gets its own stream |
| **0-RTT resumption** | Not available; full handshake every time | Built-in; returning clients can send data in the first flight |
| **Head-of-line blocking** | A lost TCP segment blocks all subsequent data | Only the affected stream is blocked; other streams proceed |
| **NAT traversal** | TCP requires keep-alives; NAT rebinding breaks connections | UDP-based; connection migration survives NAT rebinding |
| **TLS integration** | Separate Noise handshake layered on top of TCP | TLS 1.3 is integral to the QUIC handshake; no extra round-trips |
| **Ecosystem support** | Custom framing codec required | `capnp-rpc` can use QUIC bidirectional streams directly via `tokio-util` compat layer |
The migration also simplified the codebase: the custom `LengthPrefixedCodec` framing layer and the `into_capnp_io()` bridge (documented in [Noise\_XX Handshake](noise-xx.md)) are no longer needed on the QUIC path because `capnp-rpc` reads and writes directly on the QUIC stream.
## Crate integration
quicnprotochat uses the following crates for QUIC and TLS:
- **`quinn 0.11`** -- The async QUIC implementation for Tokio. Provides `Endpoint`, `Connection`, and bidirectional stream types.
- **`quinn-proto 0.11`** -- The protocol-level types, including `QuicServerConfig` and `QuicClientConfig` wrappers that bridge `rustls` into `quinn`.
- **`rustls 0.23`** -- The TLS implementation. quicnprotochat uses it in strict TLS 1.3 mode with no fallback to TLS 1.2.
- **`rcgen 0.13`** -- Self-signed certificate generation for development and testing.
### Server configuration
The server builds its QUIC endpoint configuration in `build_server_config()` (in `quicnprotochat-server/src/main.rs`):
```rust
let mut tls = rustls::ServerConfig::builder_with_protocol_versions(&[&TLS13])
.with_no_client_auth()
.with_single_cert(cert_chain, key)?;
tls.alpn_protocols = vec![b"capnp".to_vec()];
let crypto = QuicServerConfig::try_from(tls)?;
Ok(ServerConfig::with_crypto(Arc::new(crypto)))
```
Key points:
1. **TLS 1.3 strict mode**: `builder_with_protocol_versions(&[&TLS13])` ensures no TLS 1.2 fallback. This is a hard requirement: TLS 1.2 lacks the 0-RTT and full forward secrecy guarantees that quicnprotochat relies on.
2. **No client certificate authentication**: `with_no_client_auth()` means the server does not verify client certificates at the TLS layer. Client authentication is handled at the application layer via Ed25519 identity keys and MLS credentials. This is a deliberate design choice -- MLS provides stronger authentication properties than TLS client certificates.
3. **ALPN negotiation**: The Application-Layer Protocol Negotiation extension is set to `b"capnp"`, advertising that this endpoint speaks Cap'n Proto RPC. Both client and server must agree on this protocol identifier or the TLS handshake fails.
4. **`QuicServerConfig` bridge**: The `quinn-proto` crate provides `QuicServerConfig::try_from(tls)` to adapt the `rustls::ServerConfig` for use with QUIC. This handles the QUIC-specific TLS parameters (transport parameters, QUIC header protection keys) automatically.
### Client configuration
The client performs the mirror operation. It loads the server's DER-encoded certificate from a local file and constructs a `rustls::ClientConfig`:
```rust
let mut roots = rustls::RootCertStore::empty();
roots.add(CertificateDer::from(cert_bytes))?;
let tls = rustls::ClientConfig::builder_with_protocol_versions(&[&TLS13])
.with_root_certificates(roots)
.with_no_client_auth();
tls.alpn_protocols = vec![b"capnp".to_vec()];
let crypto = QuicClientConfig::try_from(tls)?;
```
The client trusts exactly one certificate: the server's self-signed cert loaded from disk. There is no system trust store involved, which simplifies the trust model but requires out-of-band distribution of the server certificate.
### Per-connection handling
Each accepted QUIC connection spawns a handler task:
```rust
let (send, recv) = connection.accept_bi().await?;
let (reader, writer) = (recv.compat(), send.compat_write());
let network = twoparty::VatNetwork::new(reader, writer, Side::Server, Default::default());
let service: node_service::Client = capnp_rpc::new_client(NodeServiceImpl { store, waiters });
RpcSystem::new(Box::new(network), Some(service.client)).await?;
```
The `tokio-util` compat layer (`compat()` and `compat_write()`) converts Quinn's `RecvStream` and `SendStream` into types that implement `futures::AsyncRead` and `futures::AsyncWrite`, which `capnp-rpc`'s `VatNetwork` requires. The entire Cap'n Proto RPC system then runs over this single QUIC bidirectional stream.
Because `capnp-rpc` uses `Rc<RefCell<>>` internally (making it `!Send`), all RPC tasks run on a `tokio::task::LocalSet`. The server spawns each connection handler via `tokio::task::spawn_local`.
## Certificate trust model
quicnprotochat currently uses a **trust-on-first-use (TOFU)** model with self-signed certificates:
1. On first start, the server generates a self-signed certificate using `rcgen::generate_simple_self_signed` with SANs for `localhost`, `127.0.0.1`, and `::1`.
2. The certificate and private key are persisted to disk as DER files (default: `data/server-cert.der` and `data/server-key.der`).
3. Clients must obtain the server's certificate file out-of-band and reference it via the `--ca-cert` flag or `QUICNPROTOCHAT_CA_CERT` environment variable.
This model is adequate for development and single-server deployments. The roadmap includes:
- **ACME integration** (Let's Encrypt) for production deployments with publicly-routable servers.
- **Certificate pinning** to detect MITM attacks even when a CA is compromised.
- **Certificate transparency** log monitoring for detecting misissued certificates.
## Self-signed certificate generation
The server's `generate_self_signed()` function:
```rust
let subject_alt_names = vec![
"localhost".to_string(),
"127.0.0.1".to_string(),
"::1".to_string(),
];
let issued = generate_simple_self_signed(subject_alt_names)?;
fs::write(cert_path, issued.cert.der())?;
fs::write(key_path, &issued.key_pair.serialize_der())?;
```
The generated certificate includes both DNS and IP SANs so that clients can connect using either `localhost` or an IP address. The client specifies the expected server name via `--server-name` (default: `localhost`), which must match one of the certificate's SANs.
## Security properties
The QUIC + TLS 1.3 layer provides:
| Property | Mechanism |
|---|---|
| **Transport confidentiality** | All application data is encrypted with AES-128-GCM or ChaCha20-Poly1305 (negotiated during the TLS handshake) |
| **Server authentication** | The client verifies the server's certificate against the locally-trusted DER file |
| **Forward secrecy** | TLS 1.3 exclusively uses ephemeral Diffie-Hellman key exchange; session keys are not derivable from the server's long-term key |
| **Replay protection** | QUIC packet numbers and TLS 1.3's anti-replay mechanism prevent replay attacks |
| **Connection migration** | QUIC connection IDs allow the client to change IP addresses without re-handshaking |
### What TLS does *not* provide
- **Client authentication**: Handled by MLS identity credentials at the application layer. See [MLS (RFC 9420)](mls.md).
- **End-to-end encryption**: TLS terminates at the server. The server can read the Cap'n Proto RPC framing and message routing metadata. Payload confidentiality is provided by MLS. See [MLS (RFC 9420)](mls.md).
- **Post-quantum resistance**: TLS 1.3 key exchange uses classical ECDHE. Post-quantum protection of application data is provided by the [Hybrid KEM](hybrid-kem.md) layer (M5 milestone).
- **Mutual peer authentication**: For peer-to-peer scenarios, the M1-era [Noise\_XX](noise-xx.md) transport provides mutual authentication with identity hiding.
## Comparison with Noise\_XX (M1 approach)
| Aspect | Noise\_XX (M1) | QUIC + TLS 1.3 (M3+) |
|---|---|---|
| **Transport** | Raw TCP | UDP (QUIC) |
| **Handshake** | 3-message Noise XX pattern | TLS 1.3 (1-RTT or 0-RTT) |
| **Mutual auth** | Both peers authenticate static X25519 keys | Server-only at TLS layer; mutual auth via MLS |
| **Identity hiding** | Initiator's identity hidden until message 3 | No identity hiding at TLS layer |
| **Stream multiplexing** | None (single stream) | Native QUIC streams |
| **RPC bridge** | `into_capnp_io()` with `tokio::io::duplex` | Direct `compat()` wrapper on QUIC stream |
| **Codebase location** | `quicnprotochat-core/src/noise.rs` | `quicnprotochat-server/src/main.rs`, client `lib.rs` |
The Noise\_XX path remains useful for direct peer-to-peer connections (without a central server) and as a fallback transport. Both paths carry identical Cap'n Proto message payloads, so the application layer is transport-agnostic.
## Configuration reference
### Server
| Environment Variable | CLI Flag | Default | Description |
|---|---|---|---|
| `QUICNPROTOCHAT_LISTEN` | `--listen` | `0.0.0.0:7000` | QUIC listen address |
| `QUICNPROTOCHAT_TLS_CERT` | `--tls-cert` | `data/server-cert.der` | TLS certificate path |
| `QUICNPROTOCHAT_TLS_KEY` | `--tls-key` | `data/server-key.der` | TLS private key path |
| `QUICNPROTOCHAT_DATA_DIR` | `--data-dir` | `data` | Persistent storage directory |
### Client
| Environment Variable | CLI Flag | Default | Description |
|---|---|---|---|
| `QUICNPROTOCHAT_CA_CERT` | `--ca-cert` | `data/server-cert.der` | Server certificate to trust |
| `QUICNPROTOCHAT_SERVER_NAME` | `--server-name` | `localhost` | Expected TLS server name (must match certificate SAN) |
| `QUICNPROTOCHAT_SERVER` | `--server` | `127.0.0.1:7000` | Server address (per-subcommand) |
## Further reading
- [Noise\_XX Handshake](noise-xx.md) -- The M1-era transport layer that QUIC replaced.
- [Cap'n Proto Serialisation and RPC](capn-proto.md) -- The RPC layer that runs on top of QUIC streams.
- [Service Architecture](../architecture/service-architecture.md) -- How the server's `NodeServiceImpl` binds to the QUIC endpoint.
- [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md) -- Discusses the post-quantum gap in both the Noise and TLS transport layers.

View File

@@ -0,0 +1,256 @@
# Auth, Devices, and Tokens
This page describes the authentication, device management, and authorisation
design for quicnprotochat. It introduces account and device identities, gates
server operations by authenticated identity, enforces rate and size limits, and
binds MLS identity keys to accounts.
This design cuts across milestones M4 through M6. For the broader production
readiness plan, see [Production Readiness WBS](production-readiness.md).
---
## Goals
1. **Introduce accounts and devices** with authenticated access to `NodeService`.
2. **Gate operations by identity:** enqueue/fetch/fetchWait require a valid token
bound to the caller's account and device.
3. **Enforce rate and size limits** per account, per device, and per IP.
4. **Bind MLS identity keys to accounts:** a KeyPackage upload must be associated
with the uploading account, preventing impersonation.
5. **Keep wire changes minimal and versioned:** the `Auth` struct is additive
and uses a version field for backward compatibility.
---
## Data Model (Server)
### Accounts
| Field | Type | Description |
|-------|------|-------------|
| `account_id` | UUID | Unique account identifier |
| `created_at` | Timestamp | Account creation time |
| `status` | Enum | `active`, `suspended`, `deleted` |
### Devices
| Field | Type | Description |
|-------|------|-------------|
| `device_id` | UUID | Unique device identifier |
| `account_id` | UUID | Owning account (foreign key) |
| `device_pubkey` | Ed25519 public key (32 bytes) | Device signing key |
| `created_at` | Timestamp | Device registration time |
| `status` | Enum | `active`, `revoked` |
### Sessions / Tokens
| Field | Type | Description |
|-------|------|-------------|
| `session_id` | UUID | Unique session identifier |
| `account_id` | UUID | Owning account |
| `device_id` | UUID | Originating device |
| `access_token` | Opaque bytes | Short-lived bearer token |
| `refresh_token` | Opaque bytes | Long-lived token for renewal |
| `expires_at` | Timestamp | Access token expiry |
| `created_at` | Timestamp | Session creation time |
### Identity Binding
| Field | Type | Description |
|-------|------|-------------|
| `account_id` | UUID | Owning account |
| `mls_identity_key` | Ed25519 public key (32 bytes) | MLS credential public key |
| `verified_fp` | SHA-256 fingerprint (32 bytes) | Fingerprint of the bound key |
The identity binding table ensures that only the account that registered an
Ed25519 public key can upload KeyPackages for that key. This prevents a
compromised or malicious client from uploading KeyPackages under another
account's identity.
---
## Wire / API Changes
### Auth Struct
A new `Auth` struct is added to all `NodeService` RPC methods:
```capnp
struct Auth {
version @0 :UInt16; # 0 = legacy (no auth), 1 = token-based
accessToken @1 :Data; # opaque bearer token
deviceId @2 :Data; # optional UUID (16 bytes) for audit/rate limit
}
```
The `Auth` struct is included as a parameter in `enqueue`, `fetch`, `fetchWait`,
`uploadKeyPackage`, and `fetchKeyPackage`.
### Versioning
| Version | Meaning |
|---------|---------|
| 0 | Legacy mode: no authentication. Server can allow-list in development but defaults to rejecting in production. |
| 1 | Token-based authentication. `accessToken` is required and validated. |
The server rejects any `version` value higher than its current maximum. This
ensures that a newer client connecting to an older server fails cleanly rather
than silently skipping auth.
### Optional Device ID
The `deviceId` field is optional. When present, the server uses it for:
- Per-device rate limiting (in addition to per-account limits).
- Audit logging (which device performed which operation).
- Future: device revocation without revoking the entire account.
---
## Server Enforcement
### Token Validation
1. Extract `Auth` struct from the incoming RPC.
2. If `version == 0` and server is in production mode, reject with
`AUTHENTICATION_REQUIRED`.
3. If `version == 1`, validate `accessToken`:
- Token must exist in the session store.
- Token must not be expired (`expires_at > now`).
- Associated account must have `status == active`.
- Associated device (if `deviceId` present) must have `status == active`.
4. Map validated token to `(account_id, device_id)` for downstream authorisation.
### Identity Matching
- **uploadKeyPackage:** The `identityKey` in the RPC must match an identity
binding for the authenticated account. Reject with `IDENTITY_MISMATCH` if the
key is not bound to the caller's account.
- **fetchKeyPackage:** No identity restriction (any authenticated client can
fetch any identity's KeyPackage -- this is required for the MLS add-member flow).
- **enqueue:** If `channelId` is present, the caller's identity must be in the
channel membership. If `channelId` is absent (legacy mode), the operation is
allowed for any authenticated client.
- **fetch / fetchWait:** The `recipientKey` must correspond to an identity bound
to the caller's account.
### Rate Limits
| Limit | Scope | Default |
|-------|-------|---------|
| Request rate | Per IP | 50 requests/second |
| Request rate | Per account | 50 requests/second |
| Request rate | Per device | 50 requests/second |
| Payload size | Per RPC call | 5 MB |
| KeyPackage TTL | Per package | 24 hours |
| KeyPackage uploads | Per account | Configurable (prevents store exhaustion) |
Rate limit counters use a sliding window. When a limit is exceeded, the server
responds with `RATE_LIMITED` and includes a `Retry-After` hint.
### Audit Logging
The following events are logged at audit level:
- Authentication success (account, device, IP).
- Authentication failure (reason, IP).
- Token issuance and refresh (account, device).
- KeyPackage upload (account, identity key fingerprint).
- Enqueue (account, channel, recipient).
- Fetch / fetchWait (account, recipient).
- Rate limit exceeded (scope, account/IP, current rate).
All audit log entries include a timestamp and correlation ID. Sensitive fields
(token values, ciphertext, private keys) are never logged.
---
## Client Changes
### Login / Register Flow
1. **Register:** Client generates an Ed25519 identity keypair, sends the public
key to the server. Server creates an account, binds the identity key, and
returns an `(access_token, refresh_token)` pair.
2. **Login:** Client presents credentials (initially: signed challenge from
device key). Server validates and issues tokens.
3. **Token storage:** Access and refresh tokens stored in the client state file
(same location as identity keypair). The state file should be
permission-restricted (`0600`).
4. **Token refresh:** Client detects `TOKEN_EXPIRED` errors and uses the refresh
token to obtain a new access token without re-authenticating.
### RPC Integration
Every RPC call includes the `Auth` struct:
```rust
// Pseudocode for client RPC calls
let auth = Auth {
version: 1,
access_token: state.access_token.clone(),
device_id: Some(state.device_id),
};
node_service.enqueue(auth, recipient_key, channel_id, payload).await?;
```
### Identity Binding
At registration, the client's Ed25519 public key is bound to the new account.
The client must refuse to upload KeyPackages if the local identity key does not
match the bound key -- this prevents accidental identity confusion after key
rotation.
---
## Compatibility
### Wire Version Field
The `Auth` struct includes its own `version` field, independent of the delivery
message version. This allows auth changes to evolve separately from the delivery
protocol.
### Legacy Support
- `version == 0`: No auth. Server behaviour is configurable:
- **Development:** Allow legacy calls (default for `cargo run`).
- **Production:** Reject legacy calls (default for Docker deployment).
- `version == 1`: Full auth. This is the target for M4+.
### N-1 Integration Tests
Compatibility testing covers:
- New client (v1 auth) against new server -- expected: full auth flow works.
- Old client (v0 legacy) against new server in dev mode -- expected: legacy
calls succeed.
- Old client (v0 legacy) against new server in prod mode -- expected: clean
rejection with `AUTHENTICATION_REQUIRED`.
- New client (v1 auth) against old server -- expected: server ignores unknown
`Auth` struct fields; operations succeed if server does not enforce auth.
---
## Implementation Sequence
1. Extend Cap'n Proto schemas with the `Auth` struct and add it to all
`NodeService` methods.
2. Implement token validation middleware in server RPC handlers; add an in-memory
token store (upgradeable to SQLite at M6).
3. Bind `identityKey` to account on upload; enforce on fetch/enqueue.
4. Add tests: unit tests for token validation; integration tests for auth
success and failure paths.
5. Add rate limiting middleware with configurable thresholds.
6. Add audit logging for all auth-related events.
---
## Cross-references
- [Milestones](milestones.md) -- M4 and M6 deliverables
- [Production Readiness WBS](production-readiness.md) -- Phase 3 (Auth/Device/Server Hardening)
- [1:1 Channel Design](dm-channels.md) -- channel-level authz
- [Wire Format: NodeService Schema](../wire-format/node-service-schema.md) -- RPC schema
- [Coding Standards](../contributing/coding-standards.md) -- security-by-design requirements

View File

@@ -0,0 +1,261 @@
# 1:1 Channel Design
This page describes the design for first-class 1:1 (direct message) channels in
quicnprotochat. Channels provide per-conversation authorisation, MLS-encrypted
payloads, message retention with TTL eviction, and backward compatibility with
the legacy delivery model.
For the broader roadmap context, see [Milestones](milestones.md) and
[Production Readiness WBS](production-readiness.md) (Phase 4).
---
## Goals
1. **First-class 1:1 channels.** Each conversation between two participants has
a unique `channelId`, enabling per-channel authorisation, storage, and
eviction.
2. **Per-channel authorisation.** The server enforces that only the two channel
members can enqueue and fetch messages for a given channel.
3. **MLS-encrypted payloads.** All message content is MLS ciphertext. The server
never sees plaintext. Channel metadata (ID + participant keys) is the only
information the server holds.
4. **7-day message retention.** Messages older than 7 days are evicted. This is
configurable but defaults to 7 days.
5. **24-hour KeyPackage TTL.** KeyPackages expire after 24 hours. Clients must
rotate KeyPackages before expiry to remain reachable.
---
## Schema Changes (Cap'n Proto)
### New Fields
The following fields are added to the existing `NodeService` RPC methods:
| RPC Method | New Field | Type | Description |
|------------|-----------|------|-------------|
| `enqueue` | `channelId` | `Data` (UUID, 16 bytes) | Target channel |
| `fetch` | `channelId` | `Data` (UUID, 16 bytes) | Channel to fetch from |
| `fetchWait` | `channelId` | `Data` (UUID, 16 bytes) | Channel to long-poll |
| All messages | `version` | `UInt16` | Wire version for forward compat |
### Version Field
The `version` field on delivery messages allows the server to reject messages
with unknown versions. The current version is `1`. Clients that do not set
`channelId` are treated as version `0` (legacy mode).
### New RPC Method
A new `createChannel` method is added to `NodeService`:
```capnp
createChannel @N (
auth :Auth,
peerKey :Data # Ed25519 public key of the other participant
) -> (
channelId :Data # UUID, 16 bytes
);
```
The server generates the `channelId`, stores the membership, and returns the ID
to the caller. The peer discovers the channel when they receive a message
addressed to it (or via a separate discovery mechanism in a future milestone).
---
## AuthZ Model
### Channel Membership
Each channel has exactly two members, identified by their Ed25519 public keys:
```
Channel {
channelId: UUID (16 bytes)
members: {a_key: Ed25519PubKey, b_key: Ed25519PubKey}
created_at: Timestamp
}
```
The server stores this mapping and enforces it on every operation.
### Enqueue Authorisation
When a client calls `enqueue(auth, channelId, recipientKey, payload)`:
1. Validate the `Auth` token (see [Auth, Devices, and Tokens](authz-plan.md)).
2. Look up the channel by `channelId`.
3. Verify that the caller's identity (from the token) is one of the channel's
two members.
4. Verify that `recipientKey` is the *other* member of the channel (prevents
sending to yourself or to a non-member).
5. Apply rate limits (50 r/s per identity, 5 MB payload cap).
6. Enqueue the payload.
### Fetch Authorisation
When a client calls `fetch(auth, channelId, recipientKey)` or
`fetchWait(auth, channelId, recipientKey, timeout)`:
1. Validate the `Auth` token.
2. Verify that the caller's identity matches `recipientKey`.
3. Verify that `recipientKey` is a member of the specified channel.
4. Return messages for `(channelId, recipientKey)`, filtering out expired
messages (TTL check).
---
## Storage Model
### Channels Table
| Column | Type | Description |
|--------|------|-------------|
| `channel_id` | UUID (16 bytes) | Primary key |
| `member_a_key` | Ed25519 public key (32 bytes) | First member |
| `member_b_key` | Ed25519 public key (32 bytes) | Second member |
| `created_at` | Timestamp | Channel creation time |
A unique constraint on `(member_a_key, member_b_key)` (sorted) prevents
duplicate channels between the same pair of identities.
### Delivery Queue
Messages are keyed by `(channelId, recipient_key)`:
| Column | Type | Description |
|--------|------|-------------|
| `channel_id` | UUID (16 bytes) | Channel |
| `recipient_key` | Ed25519 public key (32 bytes) | Intended recipient |
| `payload` | Bytes | MLS ciphertext (opaque to server) |
| `received_at` | Timestamp | Server receive time |
| `sequence_no` | UInt64 | Per-channel, per-recipient monotonic counter |
### TTL Eviction
Messages are evicted in two ways:
1. **Fetch-time check:** When a client fetches messages, the server filters out
any message where `received_at + TTL < now`. This is the primary eviction
path.
2. **Background sweep:** A periodic task (configurable interval, default 1 hour)
scans for and deletes expired messages. This prevents unbounded storage
growth from inactive channels.
Default TTL values:
| Entity | TTL | Configurable |
|--------|-----|-------------|
| Messages | 7 days | Yes |
| KeyPackages | 24 hours | Yes |
---
## Flows
### Create Channel
```
Alice Server Bob
| | |
|-- createChannel(auth, bob_key) | |
| |-- generate channelId |
| |-- store {channelId, |
| | alice_key, bob_key} |
|<- channelId ------------------| |
| | |
```
Alice receives the `channelId` and can now send messages to Bob on this channel.
Bob discovers the channel when he receives the first message (the `channelId` is
included in the delivery metadata).
### Send (with AuthZ)
```
Alice Server
| |
|-- enqueue(auth, channelId, |
| bob_key, mls_ciphertext) |
| |-- validate auth token
| |-- lookup channel membership
| |-- verify alice_key in members
| |-- verify bob_key is recipient
| |-- check rate limits
| |-- store (channelId, bob_key,
| | payload, received_at, seq)
|<- ok (sequence_no) ------------|
| |
```
### Receive (with TTL)
```
Bob Server
| |
|-- fetchWait(auth, channelId, |
| bob_key, timeout) |
| |-- validate auth token
| |-- verify bob_key in channel
| |-- query (channelId, bob_key)
| |-- filter: received_at + 7d > now
| |-- return non-expired messages
|<- messages[] ------------------|
| |
```
---
## Backward Compatibility
### Legacy Mode (channelId = nil)
When `channelId` is empty or absent:
- The server treats the request as a legacy delivery (pre-channel behavior).
- Messages are routed solely by `recipientKey`, without channel-level authz.
- This mode can be disabled in production via server configuration.
### Version Negotiation
The `version` field on delivery messages allows clean rejection of future schema
changes:
| Version | Behavior |
|---------|----------|
| 0 | Legacy mode: no `channelId`, no per-channel authz |
| 1 | Channel-aware: `channelId` required, authz enforced |
The server rejects messages with `version > max_supported`.
---
## Open Items
These items are deferred to future milestones:
- **Persistence backend:** The current `DashMap`-based store must be extended to
SQLite (or SQLCipher) for durable channel and delivery state. See
[Milestones: M6](milestones.md#m6----persistence-planned).
- **Channel discovery API:** A dedicated RPC for Bob to discover channels he is
a member of, rather than relying on first-message discovery.
- **Client UX:** Map peer identity to `channelId` discovery; cache `channelId`
in the client state file.
- **Audit logging:** Log channel creation, authz failures, send/recv events with
redaction of ciphertext. See [Auth, Devices, and Tokens](authz-plan.md) for
the audit logging design.
- **Multi-device:** A single account on multiple devices sharing the same
channel. Requires per-device delivery queues and MLS multi-device support.
---
## Cross-references
- [Milestones](milestones.md) -- M4 (CLI subcommands) and M6 (persistence)
- [Production Readiness WBS](production-readiness.md) -- Phase 4 (Delivery Semantics)
- [Auth, Devices, and Tokens](authz-plan.md) -- token validation and identity binding
- [Wire Format: Delivery Schema](../wire-format/delivery-schema.md) -- current delivery schema
- [Wire Format: NodeService Schema](../wire-format/node-service-schema.md) -- RPC interface
- [Architecture Overview](../architecture/overview.md) -- system diagram and service model

View File

@@ -0,0 +1,406 @@
# Future Research Directions
This page catalogues technologies and research directions that could strengthen
quicnprotochat beyond the current [milestone plan](milestones.md). Each entry
includes a brief description, the problem it solves, relevant crates or
specifications, and how it maps to the project architecture.
For the production readiness work breakdown, see
[Production Readiness WBS](production-readiness.md).
---
## Transport and Networking
### LibP2P / iroh (n0)
**Problem:** The current architecture is strictly client-server. Clients behind
NAT cannot communicate directly, and the server is a single point of failure for
delivery.
**Solution:** [LibP2P](https://libp2p.io/) and [iroh](https://iroh.computer/)
(from n0) provide peer discovery, NAT traversal (hole-punching), and relay
fallback. iroh is particularly interesting because it is Rust-native and built on
QUIC, aligning with quicnprotochat's existing transport layer.
**Architecture impact:** Move from pure client-server to a hybrid topology where
peers communicate directly when possible and fall back to server relay when NAT
traversal fails. The server role shifts from mandatory relay to optional
rendezvous/relay node.
**Crates:** `libp2p`, `iroh`, `iroh-net`
### WebTransport (HTTP/3)
**Problem:** Browser clients cannot use raw QUIC. The current stack requires a
native Rust binary.
**Solution:** [WebTransport](https://w3c.github.io/webtransport/) exposes
QUIC-like semantics (multiplexed bidirectional streams, datagrams) to browsers
over HTTP/3. A WebTransport endpoint alongside the existing QUIC listener would
enable a web client without WebSocket degradation.
**Architecture impact:** Add a second listener (HTTP/3 + WebTransport) that
terminates WebTransport and bridges into the existing `NodeService` RPC layer.
Cap'n Proto serialisation works in WASM via `capnp` crate.
**Crates:** `h3`, `h3-webtransport`, `wtransport`
### Tor / I2P Integration
**Problem:** MLS protects message content, but connection metadata (who connects
to the server, when, how often) leaks to the server and network observers.
**Solution:** Route client-server connections through
[Tor](https://www.torproject.org/) onion services or
[I2P](https://geti2p.net/) tunnels. This provides metadata resistance at the
network layer.
**Architecture impact:** The server exposes a `.onion` address (Tor) or an I2P
destination. Clients connect through the anonymity network. Latency increases
significantly, so this should be optional.
**Crates:** `arti` (Tor client in Rust), `arti-client`
---
## Storage and Persistence
### SQLCipher / libsql (Turso)
**Problem:** At M6, quicnprotochat needs persistent storage for group state, key
material, and message queues. Storing private keys in a plaintext SQLite database
is insufficient.
**Solution:** [SQLCipher](https://www.zetetic.net/sqlcipher/) provides
transparent, page-level AES-256 encryption for SQLite. Alternatively,
[libsql](https://turso.tech/libsql) (Turso) offers a SQLite fork with
encryption, replication, and embedded server capabilities.
**Architecture impact:** Replace the `sqlx` SQLite backend with SQLCipher.
Encryption key derived from a user-provided passphrase (via Argon2id) or a
hardware-backed key.
**Crates:** `rusqlite` (with `bundled-sqlcipher` feature), `libsql`
### CRDTs (Automerge / Yrs)
**Problem:** Multi-device support requires synchronising state (group membership,
read receipts, settings) across devices without a central authority resolving
conflicts.
**Solution:** Conflict-free replicated data types (CRDTs) allow concurrent edits
to converge without coordination. [Automerge](https://automerge.org/) and
[Yrs](https://docs.rs/yrs/) (Yjs in Rust) provide production-quality CRDT
implementations.
**Architecture impact:** Client-side state (contact list, group membership
cache, read markers) stored as CRDT documents. Synchronisation happens over the
existing MLS-encrypted channel, ensuring the server never sees the state.
**Crates:** `automerge`, `yrs`
### Object Storage (S3-compatible)
**Problem:** Encrypted file and media attachments need a storage backend that
the server can host without seeing the content.
**Solution:** An S3-compatible object store (MinIO, Garage, or a cloud provider)
for encrypted blobs. Clients encrypt attachments client-side (using a key derived
from the MLS group secret) and upload the ciphertext. The server stores and
serves opaque blobs.
**Architecture impact:** Add a media upload/download RPC to `NodeService`. The
server proxies to the object store or returns pre-signed URLs.
**Crates:** `aws-sdk-s3`, `opendal`
---
## Cryptography and Privacy
### ML-KEM + ML-DSA Hybrid (Post-Quantum MLS)
**Problem:** Quantum computers threaten X25519 and Ed25519. While MLS content is
protected by ephemeral key exchange, the init keys and credential signatures are
vulnerable to harvest-now-decrypt-later attacks.
**Solution:** Hybrid X25519 + ML-KEM-768 KEM for MLS init keys, and optionally
hybrid Ed25519 + ML-DSA-65 for credential signatures. The `ml-kem` crate is
already vendored in the workspace.
**Architecture impact:** Custom `OpenMlsCryptoProvider` in `quicnprotochat-core`
implementing the hybrid combiner. This is the M7 milestone -- see
[Milestones](milestones.md#m7----post-quantum-planned) and
[Hybrid KEM](../protocol-layers/hybrid-kem.md).
**Crates:** `ml-kem`, `ml-dsa`
**References:** NIST FIPS 203 (ML-KEM), `draft-ietf-tls-hybrid-design`
### Private Information Retrieval (PIR)
**Problem:** When a client fetches messages or KeyPackages, the server learns
*which* recipient is requesting -- even though it cannot read the content.
**Solution:** Private Information Retrieval (PIR) allows a client to fetch a
record from the server without revealing which record was requested.
[SealPIR](https://github.com/microsoft/SealPIR) and SimplePIR provide practical
constructions.
**Architecture impact:** Replace the `fetch` / `fetchKeyPackage` RPCs with PIR
queries. This is a significant performance trade-off: PIR has high computational
cost. Suitable for KeyPackage fetch (small database) before message fetch (large
database).
### Sealed Sender (Signal-style)
**Problem:** The server sees `(sender, recipient, timestamp)` metadata on every
enqueued message. Even without reading content, this metadata reveals social
graphs.
**Solution:** [Sealed Sender](https://signal.org/blog/sealed-sender/) encrypts
the sender's identity inside the MLS ciphertext. The server routes by
`recipientKey` only and cannot determine who sent the message.
**Architecture impact:** Modify the `enqueue` RPC to omit sender identity from
the server-visible metadata. The sender identity is included only inside the
MLS application message (encrypted).
### Key Transparency (RFC draft)
**Problem:** A compromised server could substitute public keys, performing a
man-in-the-middle attack on MLS group formation.
**Solution:** A verifiable, append-only log of public key bindings (similar to
Certificate Transparency for TLS). Clients verify that the server's response
matches the log before trusting a fetched KeyPackage.
**Architecture impact:** Add a key transparency log (Merkle tree) alongside the
Authentication Service. Clients verify inclusion proofs on every `fetchKeyPackage`
response.
**References:** `draft-ietf-keytrans-protocol`
---
## Identity and Authentication
### DIDs (Decentralized Identifiers)
**Problem:** User identities are currently bound to the server. If the server
goes away, identities are lost.
**Solution:** [Decentralized Identifiers](https://www.w3.org/TR/did-core/)
(`did:key`, `did:web`) provide self-sovereign identity. A user's DID is derived
from their Ed25519 public key and is portable across servers.
**Architecture impact:** Replace raw Ed25519 public keys in MLS credentials with
DID URIs. The server resolves DIDs to public keys for routing.
**Crates:** `did-key`, `ssi`
### OPAQUE (aPAKE)
**Problem:** If quicnprotochat adds password-based account registration, the
server must never see the password -- not even a hash.
**Solution:** [OPAQUE](https://datatracker.ietf.org/doc/rfc9497/) is an
asymmetric password-authenticated key exchange where the server stores only a
one-way transformation of the password. The server cannot perform offline
dictionary attacks.
**Architecture impact:** Replace the registration/login flow with OPAQUE. The
server stores an OPAQUE registration record; the client runs the OPAQUE protocol
to authenticate and derive a session key.
**Crates:** `opaque-ke`
**References:** RFC 9497
### WebAuthn / Passkeys
**Problem:** Password-based auth (even with OPAQUE) is vulnerable to phishing.
Hardware-backed authentication provides stronger device binding.
**Solution:** [WebAuthn](https://www.w3.org/TR/webauthn-3/) / Passkeys allow
authentication via hardware tokens (YubiKey), platform authenticators (Touch ID,
Windows Hello), or synced passkeys.
**Architecture impact:** Add a WebAuthn registration/authentication flow to the
account system. Requires a server-side WebAuthn relying party implementation.
**Crates:** `webauthn-rs`
### Verifiable Credentials (W3C VC)
**Problem:** Proving attributes (organization membership, role, age) without
revealing full identity.
**Solution:** [Verifiable Credentials](https://www.w3.org/TR/vc-data-model/)
allow a user to present cryptographic proofs of attributes issued by a trusted
authority.
**Architecture impact:** Extend MLS credentials with VC presentation. A group
admin could require proof of organization membership before allowing join.
---
## Application Layer
### Matrix-style Federation
**Problem:** A single server is a single point of failure and a single point of
trust. Users on different servers cannot communicate.
**Solution:** Federation allows multiple quicnprotochat servers to exchange
messages, similar to [Matrix](https://matrix.org/) homeserver federation. Each
server manages its own users and relays messages to peer servers.
**Architecture impact:** Major. Requires server-to-server protocol, distributed
identity resolution, and cross-server MLS group management.
### WASM Plugin System
**Problem:** Extensibility (bots, bridges, custom message types) currently
requires forking the codebase.
**Solution:** A sandboxed WASM plugin system allows third-party extensions to run
inside the client or server without access to private key material.
**Architecture impact:** Define a plugin API (message hooks, command handlers).
Plugins compiled to WASM and loaded at runtime via `wasmtime` or `wasmer`.
**Crates:** `wasmtime`, `wasmer`, `extism`
### Double-Ratchet DM Layer
**Problem:** MLS is optimised for groups. For efficient 1:1 conversations, the
Signal double ratchet (X3DH + Axolotl) provides better performance
characteristics (no tree overhead for two parties).
**Solution:** Implement a double-ratchet layer for 1:1 DMs, using MLS only for
groups with N > 2. The [1:1 Channel Design](dm-channels.md) currently uses MLS
for DMs; this would be an optimisation.
**References:** [The Double Ratchet Algorithm](https://signal.org/docs/specifications/doubleratchet/),
[X3DH Key Agreement Protocol](https://signal.org/docs/specifications/x3dh/)
---
## Observability and Operations
### OpenTelemetry (Tracing + Metrics)
**Problem:** The current logging is `tracing`-based but lacks distributed
tracing context and structured metrics export.
**Solution:** [OpenTelemetry](https://opentelemetry.io/) provides a unified
framework for distributed tracing, metrics, and log correlation. OTLP export
enables integration with any observability backend.
**Architecture impact:** Add `tracing-opentelemetry` and `opentelemetry-otlp`
to the server. Instrument RPC handlers with spans. Export to Jaeger, Grafana
Tempo, or any OTLP-compatible backend.
**Crates:** `opentelemetry`, `opentelemetry-otlp`, `tracing-opentelemetry`
### Prometheus + Grafana
**Problem:** No quantitative visibility into server performance (throughput,
latency, queue depth, epoch advancement rate).
**Solution:** Export Prometheus metrics from the server. Visualise with Grafana
dashboards.
**Metrics to export:** message throughput (enqueue/fetch per second), RPC
latency histograms, MLS epoch advancement rate, delivery queue depth, KeyPackage
store size, active connections.
**Crates:** `prometheus`, `metrics`, `metrics-exporter-prometheus`
### Testcontainers-rs
**Problem:** Integration tests currently run server and client in the same
process (`tokio::spawn`). This does not test real network conditions, container
startup, or multi-process interactions.
**Solution:** [Testcontainers-rs](https://docs.rs/testcontainers/) runs Docker
containers from Rust tests, enabling true end-to-end CI with real network
boundaries.
**Architecture impact:** Add testcontainers-based integration tests alongside
the existing in-process tests. The Docker image is already maintained.
**Crates:** `testcontainers`, `testcontainers-modules`
---
## Developer Experience
### Tauri / Dioxus (Native GUI)
**Problem:** The current interface is CLI-only. A graphical client would broaden
the user base for testing and demonstration.
**Solution:** [Tauri](https://tauri.app/) or [Dioxus](https://dioxuslabs.com/)
provide native cross-platform GUI frameworks in Rust. The
`quicnprotochat-core` crate can be shared directly with the GUI client.
**Architecture impact:** Add a `quicnprotochat-gui` crate that depends on
`quicnprotochat-core` and `quicnprotochat-proto`. The GUI drives the same
`GroupMember` and RPC logic as the CLI client.
**Crates:** `tauri`, `dioxus`
### uniffi / diplomat (Mobile FFI)
**Problem:** Mobile clients (iOS, Android) cannot use the Rust binary directly.
**Solution:** [uniffi](https://github.com/aspect-build/aspect-cli) (Mozilla) and
[diplomat](https://github.com/nickelc/diplomat) generate idiomatic Swift and
Kotlin bindings from Rust definitions.
**Architecture impact:** Expose `quicnprotochat-core` through a C-compatible FFI
layer. Mobile apps call into the Rust crypto and protocol logic.
**Crates:** `uniffi`, `diplomat`
### Nix Flakes
**Problem:** The development environment requires `capnp` (Cap'n Proto compiler),
a specific Rust toolchain version, and test infrastructure. Setup varies across
developer machines.
**Solution:** [Nix flakes](https://nixos.wiki/wiki/Flakes) provide a
reproducible, declarative development environment. A single `nix develop`
command sets up the toolchain, `capnp`, and all dependencies.
**Architecture impact:** Add `flake.nix` and `flake.lock` to the repository root.
---
## Top 5 Priority Implementations
The following table ranks the most impactful technologies for near-term adoption,
considering the current state of the codebase and the [milestone plan](milestones.md).
| Priority | Technology | Why | Unlocks |
|----------|-----------|-----|---------|
| 1 | **Post-quantum hybrid KEM** | `ml-kem` is already vendored in the workspace. Completing the hybrid `OpenMlsCryptoProvider` makes quicnprotochat one of the first PQ MLS implementations. | M7 |
| 2 | **SQLCipher persistence** | Encrypted-at-rest storage is the prerequisite for multi-device support, offline usage, and server restart survival. | M6 |
| 3 | **OPAQUE auth** | Zero-knowledge password authentication is a massive security uplift for the account system. The server never sees or stores passwords. | Phase 3 (authz) |
| 4 | **iroh / LibP2P** | NAT traversal and optional P2P mesh makes quicnprotochat deployable without centralised infrastructure. Aligns with the existing QUIC transport. | Beyond M7 |
| 5 | **Sealed Sender + PIR** | Content encryption is table stakes. Metadata resistance (hiding who talks to whom) is the frontier of private messaging research. | Beyond M7 |
---
## Cross-references
- [Milestones](milestones.md) -- current milestone tracker
- [Production Readiness WBS](production-readiness.md) -- phased work breakdown
- [Auth, Devices, and Tokens](authz-plan.md) -- OPAQUE integration point
- [1:1 Channel Design](dm-channels.md) -- double-ratchet optimisation context
- [Hybrid KEM](../protocol-layers/hybrid-kem.md) -- existing PQ design
- [ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md) -- accepted PQ risk
- [References](../appendix/references.md) -- standards and crate documentation

View File

@@ -0,0 +1,194 @@
# Milestone Tracker
This page tracks the project milestones for quicnprotochat, from initial transport
layer through post-quantum cryptography. Each milestone produces production-ready,
tested, deployable code -- see [Coding Standards](../contributing/coding-standards.md)
for what that means in practice.
---
## Milestone Summary
| # | Name | Status | What it adds |
|---|------|--------|-------------|
| M1 | QUIC/TLS Transport | **Complete** | QUIC + TLS 1.3 endpoint, length-prefixed framing, Ping/Pong |
| M2 | Authentication Service | **Complete** | Ed25519 identity, KeyPackage generation, AS upload/fetch |
| M3 | Delivery Service + MLS Groups | **Complete** | DS relay, GroupMember create/join/add/send/recv |
| M4 | Group CLI Subcommands | **Next** | Persistent CLI (create-group, invite, join, send, recv); `demo-group` already available |
| M5 | Multi-party Groups | Planned | N > 2 members, Commit fan-out, Proposal handling |
| M6 | Persistence | Planned | SQLite key store, durable group state |
| M7 | Post-quantum | Planned | PQ hybrid for MLS/HPKE (X25519 + ML-KEM-768) |
---
## M1 -- QUIC/TLS Transport (Complete)
**Goal:** Two processes establish a QUIC connection over TLS 1.3 and exchange
typed Cap'n Proto frames.
**Deliverables:**
- `schemas/envelope.capnp`: `Envelope` struct with `MsgType` enum (Ping/Pong at this stage)
- `quicnprotochat-proto`: `build.rs` invoking `capnpc`, generated type re-exports,
canonical serialisation helpers
- `quicnprotochat-core`: static X25519 keypair generation, Noise\_XX initiator and
responder, length-prefixed Cap'n Proto frame codec (Tokio `Encoder`/`Decoder`)
- `quicnprotochat-server`: QUIC listener with TLS 1.3 (quinn/rustls), Ping to Pong
handler, one tokio task per connection
- `quicnprotochat-client`: connects over QUIC, sends Ping, receives Pong, exits 0
- Integration test: server and client in same test binary using `tokio::spawn`
- `docker-compose.yml` running the server
**Tests:** codec (7 unit tests), keypair (3 unit tests), Noise transport integration.
**Branch:** `feat/m1-noise-transport`
---
## M2 -- Authentication Service (Complete)
**Goal:** Clients register an Ed25519 identity and publish/fetch MLS KeyPackages
via Cap'n Proto RPC.
**Deliverables:**
- `schemas/auth.capnp`: `AuthenticationService` interface (`uploadKeyPackage`,
`fetchKeyPackage`)
- `quicnprotochat-core`: Ed25519 identity keypair generation, MLS KeyPackage
generation via `openmls`
- `quicnprotochat-server`: AS RPC server with `DashMap` store, atomic consume-on-fetch
- `quicnprotochat-client`: `register-state` and `fetch-key` CLI subcommands
- Integration test: Alice uploads KeyPackage, Bob fetches it, fingerprints match
**Tests:** auth\_service.rs integration tests (upload, fetch, consume semantics).
---
## M3 -- Delivery Service + MLS Groups (Complete)
**Goal:** Alice creates a group and adds Bob via MLS Welcome. Both exchange
encrypted application messages through the Delivery Service.
**Deliverables:**
- Unified `NodeService` on port 7000 combining Authentication Service and Delivery
Service into a single Cap'n Proto RPC interface
- `GroupMember` struct with full MLS lifecycle: `create_group`, `add_member`,
`join_from_welcome`, `send_message`, `receive_message`
- DS relay with `enqueue`, `fetch`, and `fetchWait` (long-polling) operations
- `demo-group` subcommand exercising the complete Alice/Bob flow in one process
- Channel-aware delivery: messages routed by `(channelId, recipientKey)`
**Tests:** All passing -- codec (5+ tests), keypair (3 tests), group round-trip,
group\_id lifecycle, MLS integration.
**Key design decisions from M3:**
1. **OpenMlsRustCrypto backend holds the HPKE init key in memory.** The same
`GroupMember` instance that generated the KeyPackage must process the
corresponding Welcome. If the process exits in between, the init private key
is lost. This is by design for M3; persistence comes at M6.
2. **KeyPackage wire format: raw TLS-encoded bytes.** KeyPackages are serialised
using `tls_serialize_detached()` rather than wrapped in `MlsMessageOut`. This
avoids an extra layer of indirection and matches what `openmls` expects on the
receive side via `KeyPackageIn::tls_deserialize_exact()`.
3. **openmls 0.5 API gotchas.** Several `openmls` methods changed signatures
between 0.4 and 0.5 (e.g., `MlsGroup::new` vs `MlsGroup::new_with_group_id`,
`BasicCredential::new` taking `Vec<u8>` directly). These differences are
documented inline in `quicnprotochat-core/src/group.rs`.
**Branch:** `feat/m1-noise-transport`
---
## M4 -- Group CLI Subcommands (Next)
**Goal:** Persistent, composable CLI subcommands for group operations, replacing
the monolithic `demo-group` proof-of-concept.
**Planned deliverables:**
- `create-group` -- creates a new MLS group, stores state locally
- `invite <identity>` -- adds a member by fetching their KeyPackage from the AS
- `join` -- processes a Welcome message and joins an existing group
- `send <message>` -- encrypts and enqueues an application message
- `recv` -- fetches and decrypts pending messages (or long-polls with `fetchWait`)
The `demo-group` subcommand remains available as a single-command demonstration
of the full flow.
---
## M5 -- Multi-party Groups (Planned)
**Goal:** Support groups with N > 2 members, including Commit fan-out and
Proposal handling.
**Planned deliverables:**
- Commit fan-out through the DS to all group members
- Proposal handling (Add, Remove, Update)
- Epoch synchronisation across N members
- Criterion benchmarks: key generation, encap/decap, group-add latency
(10/100/1000 members)
---
## M6 -- Persistence (Planned)
**Goal:** Server survives restart. Client state persists across sessions.
**Planned deliverables:**
- `quicnprotochat-server`: SQLite via `sqlx` for AS key store and DS message log,
`migrations/` directory
- `docker/Dockerfile`: multi-stage build (`rust:bookworm` builder, `debian:bookworm-slim` runtime)
- `docker-compose.yml`: server + SQLite volume, healthcheck
- Client reconnect with session resume (re-handshake + rejoin group epoch from
DS log)
See [Future Research: SQLCipher](future-research.md#storage--persistence) for
encrypted-at-rest options.
---
## M7 -- Post-quantum (Planned)
**Goal:** Replace the MLS crypto backend with a hybrid X25519 + ML-KEM-768 KEM,
providing post-quantum confidentiality for all group key material.
**Planned deliverables:**
- Custom `OpenMlsCryptoProvider` with hybrid KEM in `quicnprotochat-core`
- Hybrid shared secret derivation:
```
SharedSecret = HKDF-SHA256(
ikm = X25519_ss || ML-KEM-768_ss,
info = "quicnprotochat-hybrid-v1",
len = 32
)
```
- All M3/M4/M5 tests pass unchanged with the new ciphersuite
- Follows the combiner approach from `draft-ietf-tls-hybrid-design`
The `ml-kem` crate is already vendored in the workspace. See
[Hybrid KEM](../protocol-layers/hybrid-kem.md) for the detailed design and
[ADR-006: PQ Gap in Noise Transport](../design-rationale/adr-006-pq-gap.md) for
the accepted residual risk in the transport layer.
---
## Cross-references
- [Production Readiness WBS](production-readiness.md) -- phased work breakdown
for hardening beyond the milestone track
- [Auth, Devices, and Tokens](authz-plan.md) -- authentication and authorisation
design that cuts across M4--M6
- [1:1 Channel Design](dm-channels.md) -- DM channel schema and authz model
- [Future Research](future-research.md) -- technology options for M6+ and beyond
- [Testing Strategy](../contributing/testing.md) -- how tests are structured
across milestones

View File

@@ -0,0 +1,226 @@
# Production Readiness WBS
This page defines the work breakdown structure (WBS) for taking quicnprotochat
from a proof-of-concept to a production-hardened system. It covers feature scope,
security policy, phased delivery, and a planning checklist.
For the milestone-by-milestone tracker, see [Milestones](milestones.md). This
document focuses on the cross-cutting concerns that span multiple milestones.
---
## Feature Scope (Must-Have)
These are the feature areas that must be addressed before quicnprotochat can be
considered production-ready. Each area maps to one or more milestones or phases
in the WBS below.
| Area | Description | Primary Milestone |
|------|-------------|-------------------|
| **Identity / Auth** | Account creation, device registration, token-based RPC authentication, MLS identity binding | M4 + Phase 3 |
| **Key / MLS Lifecycle** | KeyPackage rotation, epoch advancement, member removal, credential updates | M5 + Phase 2 |
| **Transport / Delivery** | QUIC + TLS 1.3 hardening, ALPN enforcement, connection draining, reconnect | M1 (done) + Phase 2 |
| **Private 1:1 Channels** | Channel creation, per-channel authz, TTL eviction, DM-specific flows | Phase 4 |
| **Storage / Persistence** | SQLite (or SQLCipher) for AS, DS, client state; migrations; backup/restore | M6 + Phase 6 |
| **Observability / Ops** | Structured logging, metrics, distributed tracing, healthcheck endpoints | Phase 6 |
| **Client Resilience** | Offline queue, retry with backoff, idempotent message IDs, gap detection | Phase 4 |
| **Compatibility / Protocols** | Wire versioning, N-1 client interoperability, ciphersuite negotiation | Phase 2 + Phase 5 |
---
## Security Plan (By Design)
quicnprotochat follows a security-by-design philosophy. The standards below are
non-negotiable -- see [Coding Standards](../contributing/coding-standards.md) for
how they are enforced in code.
### Governance
- `CODEOWNERS` file mapping each crate to a responsible reviewer.
- All PRs require at least one review from a crate owner.
- Security-sensitive changes (crypto, auth, wire format) require two reviewers.
- GPG-signed commits only.
### Transport Policy
- TLS 1.3 only (`rustls` configured with `TLS13` cipher suites exclusively).
- ALPN token `b"capnp"` required; reject connections with mismatched ALPN.
- Self-signed certificates acceptable for development; production deployments
must use a CA-signed certificate or certificate pinning.
- Connection draining on shutdown (QUIC `CONNECTION_CLOSE`).
### MLS Policy
- Ciphersuite: `MLS_128_DHKEMX25519_AES128GCM_SHA256_Ed25519` (baseline).
- Single-use KeyPackages (consumed on fetch, per RFC 9420).
- KeyPackage TTL: 24 hours; clients must rotate before expiry.
- Ciphersuite allowlist: server rejects KeyPackages with unknown ciphersuites.
- No downgrade: once a group has used a ciphersuite, members cannot rejoin with
a weaker one.
### Input Validation
- All incoming Cap'n Proto messages validated against schema before processing.
- Maximum payload size: 5 MB per RPC call.
- Group ID, identity key, and channel ID fields validated for correct length
(32 bytes, 32 bytes, 16 bytes respectively).
- UTF-8 validation on all string fields.
### Secrets Management
- All private key material wrapped in `Zeroizing<T>` (via the `zeroize` crate).
- No secret material in log output at any level.
- No `unwrap()` on cryptographic operations -- all errors are typed and propagated.
- Constant-time comparison for authentication tokens and key fingerprints.
### Abuse / DoS Controls
- Rate limiting: 50 requests/second per IP, per account, and per device.
- Payload cap: 5 MB per message.
- Connection limit: configurable max concurrent QUIC connections.
- KeyPackage upload limit: configurable per account (prevents store exhaustion).
- Long-poll timeout cap: server-enforced maximum for `fetchWait`.
### Data Protection
- MLS ciphertext is opaque to the server (DS never holds group keys).
- Message retention: 7 days default, configurable.
- KeyPackage retention: 24 hours (TTL eviction).
- At-rest encryption for persistent storage (SQLCipher at M6).
### Logging Safety
- Structured logging via `tracing` with `env-filter`.
- Sensitive fields (keys, tokens, ciphertext) are never logged, even at `TRACE`.
- Audit-level events: auth success/failure, token issuance, keypackage upload,
enqueue/fetch, rate limit hits.
### Testing
- Unit tests for all crypto operations (see [Testing Strategy](../contributing/testing.md)).
- Integration tests for every RPC method.
- Negative tests: malformed input, expired tokens, wrong identity, replay attempts.
- N-1 compatibility tests (old client against new server).
- Fuzzing targets for Cap'n Proto parsers and MLS message handling (Phase 5).
---
## Work Breakdown (6 Phases)
### Phase 1 -- Baselines and Governance
**Goal:** Establish project hygiene before adding features.
| Task | Description |
|------|-------------|
| CODEOWNERS | Map crates to responsible reviewers |
| CI pipeline | GitHub Actions: `cargo test --workspace`, `cargo clippy`, `cargo fmt --check`, `cargo deny check` |
| SBOM generation | `cargo-cyclonedx` or `cargo-about` in CI; publish with each release |
| Threat model | Document assets, adversaries, attack surface, trust boundaries; reference in [Threat Model](../cryptography/threat-model.md) |
| Dependency audit | `cargo audit` in CI; pin all major versions per [Coding Standards](../contributing/coding-standards.md) |
### Phase 2 -- Protocols and Core Hardening
**Goal:** Lock down the wire format and cryptographic policy.
| Task | Description |
|------|-------------|
| Wire versioning | Add `version` field to all Cap'n Proto structs; reject unknown versions |
| Ciphersuite allowlist | Server rejects KeyPackages outside the allowed set |
| Downgrade guards | Prevent epoch rollback; reject Commits with weaker ciphersuites |
| ALPN enforcement | Reject connections without `b"capnp"` ALPN token |
| Connection draining | Graceful QUIC `CONNECTION_CLOSE` on server shutdown |
| KeyPackage rotation | Client-side timer to upload fresh KeyPackages before TTL expiry |
### Phase 3 -- Auth, Device, and Server Hardening
**Goal:** Add account/device identity and token-based authentication.
See [Auth, Devices, and Tokens](authz-plan.md) for the full design.
| Task | Description |
|------|-------------|
| Account + device model | `{account_id, device_id, device_pubkey}` with status lifecycle |
| Token issuance | Access + refresh tokens; configurable expiry |
| RPC auth middleware | Validate token on every RPC; map to account/device |
| Identity binding | Bind MLS identity key to account; reject mismatched uploads |
| Rate limiting | Per-IP, per-account, per-device counters |
| Audit logging | Auth events, token lifecycle, rate limit hits |
### Phase 4 -- Delivery Semantics and Client Resilience
**Goal:** Reliable message delivery and 1:1 channels.
See [1:1 Channel Design](dm-channels.md) for the DM-specific design.
| Task | Description |
|------|-------------|
| Idempotent message IDs | Client-generated UUIDs; server deduplicates |
| Ordering guarantees | Per-channel sequence numbers; client detects gaps |
| Offline queue | Server retains messages for offline recipients (up to TTL) |
| 1:1 channels | Channel creation, membership, per-channel authz |
| TTL eviction | Background sweep + fetch-time check for expired messages |
| Client retry | Exponential backoff with jitter on transient failures |
### Phase 5 -- E2E Harness and Security Tests
**Goal:** Automated end-to-end testing and security validation.
| Task | Description |
|------|-------------|
| docker-compose testnet | Multi-node test environment with configurable topology |
| Positive E2E tests | Full group lifecycle: register, create, invite, join, send, recv, leave |
| Negative E2E tests | Expired tokens, wrong identity, replay, malformed messages |
| Compat matrix | N-1 client/server version testing |
| Fuzz targets | `cargo-fuzz` targets for Cap'n Proto parsers, MLS message handlers |
| Golden-wire fixtures | Serialised test vectors for regression testing across versions |
### Phase 6 -- Reliability, Performance, and Operations
**Goal:** Production-grade operations and performance validation.
| Task | Description |
|------|-------------|
| SQLite/SQLCipher persistence | AS key store, DS message log, client state (M6) |
| Soak testing | 72-hour continuous operation under synthetic load |
| Load testing | Throughput and latency benchmarks (Criterion + custom harness) |
| Chaos testing | Network partitions, process crashes, disk full scenarios |
| Backup / restore | SQLite backup with integrity verification |
| Canary / rollback | Rolling deployment strategy with automatic rollback on failure |
| Metrics + dashboards | Prometheus metrics, Grafana dashboards (see [Future Research](future-research.md)) |
---
## Planning Checklist
Use this checklist when planning a new milestone or phase. Each item should have
a documented decision before implementation begins.
- [ ] **Release criteria / SLOs** -- Define what "done" means. Latency targets,
error rate thresholds, test coverage minimums.
- [ ] **Threat model review** -- Update the [Threat Model](../cryptography/threat-model.md)
for any new attack surface introduced by this phase.
- [ ] **Protocol policy** -- Ciphersuite allowlist, wire version, downgrade rules.
- [ ] **Identity / auth model** -- Who authenticates, how, and what operations
are gated.
- [ ] **Data model** -- Schema changes, migrations, backward compatibility.
- [ ] **Abuse controls** -- Rate limits, size caps, connection limits for this phase.
- [ ] **Observability contracts** -- What new metrics, logs, and traces are needed.
- [ ] **Environments / secrets** -- Dev, staging, production configuration;
secret rotation plan.
- [ ] **Testing matrix** -- Unit, integration, E2E, negative, fuzz, compat tests
for this phase.
- [ ] **Rollout / ops** -- Deployment strategy, rollback plan, monitoring during
rollout.
---
## Cross-references
- [Milestones](milestones.md) -- feature milestone tracker
- [Auth, Devices, and Tokens](authz-plan.md) -- Phase 3 design
- [1:1 Channel Design](dm-channels.md) -- Phase 4 design
- [Future Research](future-research.md) -- technology options for Phase 6+
- [Coding Standards](../contributing/coding-standards.md) -- engineering standards
- [Testing Strategy](../contributing/testing.md) -- test structure and conventions
- [Threat Model](../cryptography/threat-model.md) -- security analysis

View File

@@ -0,0 +1,149 @@
# Auth Schema
**Schema file:** `schemas/auth.capnp`
**File ID:** `@0xb3a8f1c2e4d97650`
The `AuthenticationService` interface defines the RPC contract for uploading and fetching MLS KeyPackages. It is the standalone version of the Authentication Service; in the current architecture, these methods are integrated into the unified [NodeService](node-service-schema.md) interface.
---
## Full schema listing
```capnp
# auth.capnp -- Authentication Service RPC interface.
#
# Clients call uploadKeyPackage before joining any group so that peers can
# fetch their key material to add them. Each KeyPackage is single-use (MLS
# requirement): fetchKeyPackage removes and returns one package atomically.
#
# The server indexes packages by the raw Ed25519 public key bytes (32 bytes),
# not a fingerprint, so callers must know the target's identity public key
# out-of-band (e.g. from a directory or QR code scan).
#
# ID generated with: capnp id
@0xb3a8f1c2e4d97650;
interface AuthenticationService {
# Upload a single-use KeyPackage for later retrieval by peers.
#
# identityKey : Ed25519 public key bytes (exactly 32 bytes).
# package : openmls-serialised KeyPackage blob (TLS encoding).
#
# Returns the SHA-256 fingerprint of `package`. Clients should record this
# and compare it against the fingerprint returned by a peer's fetchKeyPackage
# to detect tampering.
uploadKeyPackage @0 (identityKey :Data, package :Data) -> (fingerprint :Data);
# Fetch and atomically remove one KeyPackage for a given identity key.
#
# Returns empty Data if no KeyPackage is currently stored for this identity.
# Callers should handle the empty case by asking the target to upload more
# packages before retrying.
fetchKeyPackage @1 (identityKey :Data) -> (package :Data);
}
```
---
## Method-by-method analysis
### `uploadKeyPackage @0`
```
uploadKeyPackage (identityKey :Data, package :Data) -> (fingerprint :Data)
```
**Purpose:** A client uploads a single-use MLS KeyPackage so that peers can later fetch it to add the client to a group.
**Parameters:**
| Parameter | Type | Size | Description |
|---|---|---|---|
| `identityKey` | `Data` | Exactly 32 bytes | The uploader's raw Ed25519 public key bytes. This is the index key under which the package is stored. |
| `package` | `Data` | Variable (bounded by transport max) | An openmls-serialised KeyPackage blob in TLS encoding. Contains the client's HPKE init key, credential, and signature. |
**Return value:**
| Field | Type | Size | Description |
|---|---|---|---|
| `fingerprint` | `Data` | 32 bytes | SHA-256 digest of the uploaded `package` bytes. |
**Fingerprint semantics:** The returned fingerprint allows the uploading client to verify that the server stored the package correctly. More importantly, when a peer later fetches a KeyPackage, it can compare the fetched package's SHA-256 hash against the fingerprint (communicated out-of-band) to detect tampering by a malicious server.
**Idempotency:** Uploading the same package twice appends a second copy to the queue. The server does not deduplicate. Clients should avoid uploading duplicates to conserve their KeyPackage supply.
### `fetchKeyPackage @1`
```
fetchKeyPackage (identityKey :Data) -> (package :Data)
```
**Purpose:** Fetch and atomically remove one KeyPackage for a given identity. This is the mechanism by which a group creator obtains a peer's key material in order to add them to a group via MLS `add_members()`.
**Parameters:**
| Parameter | Type | Size | Description |
|---|---|---|---|
| `identityKey` | `Data` | Exactly 32 bytes | The raw Ed25519 public key of the target peer whose KeyPackage is being requested. |
**Return value:**
| Field | Type | Size | Description |
|---|---|---|---|
| `package` | `Data` | Variable, or 0 bytes | The fetched KeyPackage blob, or empty `Data` if no packages are stored for this identity. |
**Atomic removal:** The fetch operation is destructive: it removes the returned KeyPackage from the server's store in the same operation that returns it. This guarantees MLS's single-use requirement -- a KeyPackage is never served to two different requesters.
**Empty response handling:** Callers must check for an empty response. An empty `package` means the target has no KeyPackages available. The caller should either:
1. Retry after a delay, hoping the target uploads more packages.
2. Signal the user that the target is unreachable for group addition.
---
## Indexing by raw Ed25519 public key
The Authentication Service indexes KeyPackages by the **raw 32-byte Ed25519 public key**, not by a fingerprint or any higher-level identifier. This design choice has several implications:
1. **No directory service required for lookup.** The caller must already know the target's Ed25519 public key (obtained out-of-band via QR code scan, manual exchange, or a future directory service).
2. **Consistent with DS indexing.** The [Delivery Service](delivery-schema.md) uses the same 32-byte Ed25519 key as its queue index, so a single key serves as the universal identifier across both services.
3. **No ambiguity.** Unlike fingerprints (which could collide if truncated) or human-readable names (which require a mapping layer), the raw public key is the canonical, collision-resistant identifier.
---
## Single-use semantics
MLS requires that each KeyPackage be used at most once to preserve the forward secrecy of the initial key exchange. The Authentication Service enforces this by atomically removing the KeyPackage on fetch.
**Consequences for clients:**
- Clients should **pre-upload multiple KeyPackages** after generating their identity, so that several peers can add them to groups concurrently without exhausting the supply.
- Clients should **monitor their KeyPackage count** on the server (via a future monitoring endpoint or periodic re-upload) and replenish when the supply runs low.
- If a client has zero KeyPackages stored, it is effectively unreachable for new group invitations until it uploads more.
For the design rationale behind single-use KeyPackages, see [ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md).
---
## Relationship to NodeService
In the current unified architecture, the Authentication Service methods are exposed as part of the [NodeService interface](node-service-schema.md):
| AuthenticationService Method | NodeService Method | Additional Parameters |
|---|---|---|
| `uploadKeyPackage @0` | `uploadKeyPackage @0` | `auth :Auth` |
| `fetchKeyPackage @1` | `fetchKeyPackage @1` | `auth :Auth` |
The standalone `AuthenticationService` interface remains in the schema for documentation purposes and for use in contexts where the full NodeService is not needed.
---
## Further reading
- [Wire Format Overview](overview.md) -- serialisation pipeline context
- [NodeService Schema](node-service-schema.md) -- unified interface that subsumes AuthenticationService
- [Delivery Schema](delivery-schema.md) -- the companion service for message routing
- [Envelope Schema](envelope-schema.md) -- legacy framing that used `keyPackageUpload`/`keyPackageFetch` message types
- [ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md) -- design rationale for atomic removal on fetch
- [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md) -- why the server does not inspect MLS content

View File

@@ -0,0 +1,193 @@
# Delivery Schema
**Schema file:** `schemas/delivery.capnp`
**File ID:** `@0xc5d9e2b4f1a83076`
The `DeliveryService` interface defines the RPC contract for the store-and-forward message relay. The DS is intentionally MLS-unaware: it routes opaque byte strings by recipient key and optional channel ID without parsing or inspecting the content.
---
## Full schema listing
```capnp
# delivery.capnp -- Delivery Service RPC interface.
#
# The Delivery Service is a simple store-and-forward relay. It does not parse
# MLS messages -- all payloads are opaque byte strings routed by recipient key.
#
# Callers are responsible for:
# - Routing Welcome messages to the correct new member after add_members().
# - Routing Commit messages to any existing group members (other than self).
# - Routing Application messages to the intended recipient(s).
#
# The DS indexes queues by the recipient's raw Ed25519 public key (32 bytes),
# matching the indexing scheme used by the Authentication Service.
#
# ID generated with: capnp id
@0xc5d9e2b4f1a83076;
interface DeliveryService {
# Enqueue an opaque payload for delivery to a recipient.
#
# recipientKey : Ed25519 public key of the intended recipient (exactly 32 bytes).
# payload : Opaque byte string -- a TLS-encoded MlsMessageOut blob or any
# other framed data the application layer wants to deliver.
# channelId : Optional channel identifier (empty for legacy). A 16-byte UUID
# is recommended for 1:1 channels.
# version : Schema/wire version. Must be 0 (legacy) or 1 (this spec).
#
# The payload is appended to the recipient's FIFO queue. Returns immediately;
# the recipient retrieves it via `fetch`.
enqueue @0 (recipientKey :Data, payload :Data, channelId :Data, version :UInt16) -> ();
# Fetch and atomically drain all queued payloads for a given recipient.
#
# recipientKey : Ed25519 public key of the caller (exactly 32 bytes).
# channelId : Optional channel identifier (empty for legacy).
# version : Schema/wire version. Must be 0 (legacy) or 1 (this spec).
#
# Returns the complete queue in FIFO order and clears it. Returns an empty
# list if there are no pending messages.
fetch @1 (recipientKey :Data, channelId :Data, version :UInt16) -> (payloads :List(Data));
}
```
---
## Method-by-method analysis
### `enqueue @0`
```
enqueue (recipientKey :Data, payload :Data, channelId :Data, version :UInt16) -> ()
```
**Purpose:** Append an opaque payload to a recipient's delivery queue. The DS stores the payload until the recipient fetches it. The call returns immediately after the payload is enqueued; it does not block until delivery.
**Parameters:**
| Parameter | Type | Size | Description |
|---|---|---|---|
| `recipientKey` | `Data` | Exactly 32 bytes | Ed25519 public key of the intended recipient. Used as the primary queue index. |
| `payload` | `Data` | Variable (bounded by transport max) | Opaque byte string. Typically a TLS-encoded `MlsMessageOut` blob, but the DS does not inspect it. |
| `channelId` | `Data` | 0 bytes (legacy) or 16 bytes (UUID) | Channel identifier for channel-aware routing. Empty `Data` is treated as the legacy default channel. |
| `version` | `UInt16` | 2 bytes | Schema/wire version. `0` = legacy (no channel routing), `1` = current spec (channel-aware). |
**Return value:** Void. The method returns `()` on success. Errors are surfaced as Cap'n Proto RPC exceptions.
**Queue semantics:** Payloads are appended in FIFO order. The DS does not deduplicate, reorder, or inspect payloads. Multiple enqueue calls for the same recipient and channel ID are simply appended to the queue in the order they arrive.
### `fetch @1`
```
fetch (recipientKey :Data, channelId :Data, version :UInt16) -> (payloads :List(Data))
```
**Purpose:** Fetch and atomically drain all queued payloads for a given recipient on a given channel. This is the "pull" side of the store-and-forward relay.
**Parameters:**
| Parameter | Type | Size | Description |
|---|---|---|---|
| `recipientKey` | `Data` | Exactly 32 bytes | Ed25519 public key of the caller. Must match the key used in the enqueue calls. |
| `channelId` | `Data` | 0 bytes (legacy) or 16 bytes (UUID) | Channel identifier. Must match the `channelId` used during enqueue. |
| `version` | `UInt16` | 2 bytes | Schema/wire version. Must match the version used during enqueue. |
**Return value:**
| Field | Type | Description |
|---|---|---|
| `payloads` | `List(Data)` | All queued payloads in FIFO order. Empty list if no messages are pending. |
**Atomic drain:** The fetch operation returns the entire queue and clears it in a single atomic operation. There is no "peek" or partial fetch. This simplifies the concurrency model: the client processes all returned payloads and does not need to track which ones it has already seen.
---
## Channel-aware routing
The `channelId` field enables per-channel queue separation. Each unique `(recipientKey, channelId)` pair maps to an independent FIFO queue on the server.
### Compound key structure
```text
Queue Key = recipientKey (32 bytes) || channelId (0 or 16 bytes)
```
When `channelId` is empty (0 bytes), the queue key degenerates to just the `recipientKey`, preserving backward compatibility with legacy clients that do not use channels.
### Channel ID format
The recommended format for `channelId` is a 16-byte UUID (128-bit, typically UUID v4). The DS treats the channel ID as an opaque byte string and does not parse its structure. Using UUIDs provides:
1. **Collision resistance** -- 2^122 random bits (for UUID v4) makes accidental collision negligible.
2. **Privacy** -- The channel ID reveals no information about the channel's participants or purpose.
3. **Fixed size** -- 16 bytes is compact and predictable for indexing.
### Use cases
| Scenario | channelId | recipientKey | Result |
|---|---|---|---|
| Legacy client, no channels | Empty (0 bytes) | Alice's Ed25519 key | Single queue for all of Alice's messages |
| 1:1 channel between Alice and Bob | UUID of the 1:1 channel | Alice's Ed25519 key | Separate queue for this specific channel |
| Group channel | UUID of the group channel | Alice's Ed25519 key | Separate queue for this group's messages to Alice |
---
## Version field
The `version` field provides a mechanism for wire-level schema evolution without breaking existing clients.
| Version | Semantics |
|---|---|
| `0` | Legacy mode. `channelId` is ignored (treated as empty). Behaves like the pre-channel DeliveryService. |
| `1` | Current specification. `channelId` is used for channel-aware routing. |
The server validates the version field and rejects unknown versions as protocol errors. Clients must set the version field to match the schema revision they implement.
---
## FIFO queue semantics
The Delivery Service provides strict FIFO ordering within each `(recipientKey, channelId)` queue:
1. **Enqueue order is preserved.** Payloads are returned by `fetch` in the exact order they were enqueued.
2. **Atomic drain.** Each `fetch` call returns all pending payloads and clears the queue. There is no risk of partial reads or interleaving.
3. **No persistence guarantees (current implementation).** The in-memory queue is lost on server restart. Persistent storage is planned for a future milestone.
4. **No redelivery.** Once a payload is returned by `fetch`, it is permanently removed. If the client crashes before processing it, the payload is lost. Reliable delivery with acknowledgments is a future enhancement.
---
## MLS-unaware design
The DS intentionally does not parse, validate, or inspect MLS messages. All payloads are opaque `Data` blobs. This design has several consequences:
- **Security:** The server cannot extract plaintext from MLS ciphertext, even if compromised.
- **Simplicity:** The DS has no dependency on openmls or any MLS library.
- **Flexibility:** The same DS can carry non-MLS payloads (e.g., signaling, metadata) without modification.
- **No server-side optimization:** The DS cannot optimize delivery based on MLS message type (e.g., fanning out a Commit to all group members). The client must enqueue separately for each recipient.
For the full design rationale, see [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md).
---
## Relationship to NodeService
In the current unified architecture, the Delivery Service methods are exposed as part of the [NodeService interface](node-service-schema.md) with additional methods:
| DeliveryService Method | NodeService Method | Additional Parameters |
|---|---|---|
| `enqueue @0` | `enqueue @2` | `auth :Auth` |
| `fetch @1` | `fetch @3` | `auth :Auth` |
| *(none)* | `fetchWait @4` | `auth :Auth`, `timeoutMs :UInt64` |
The `fetchWait` method is a NodeService extension that provides long-polling semantics: it blocks until either new payloads arrive or the timeout expires. This avoids the latency and bandwidth overhead of repeated `fetch` polling.
---
## Further reading
- [Wire Format Overview](overview.md) -- serialisation pipeline context
- [NodeService Schema](node-service-schema.md) -- unified interface that subsumes DeliveryService
- [Auth Schema](auth-schema.md) -- the companion service for KeyPackage management
- [Envelope Schema](envelope-schema.md) -- legacy framing that used `mlsWelcome`/`mlsCommit`/`mlsApplication` message types
- [ADR-004: MLS-Unaware Delivery Service](../design-rationale/adr-004-mls-unaware-ds.md) -- why the DS does not inspect MLS content

View File

@@ -0,0 +1,151 @@
# Envelope Schema
**Schema file:** `schemas/envelope.capnp`
**File ID:** `@0xe4a7f2c8b1d63509`
The Envelope is the legacy top-level wire message used in M1 for all quicnprotochat traffic over the Noise channel. Every frame exchanged between peers was serialised as an Envelope, with the Delivery Service routing by `(groupId, msgType)` without inspecting the payload.
> **Note:** The Envelope is the M1-era framing format. The current M3+ architecture uses Cap'n Proto RPC directly via the [NodeService](node-service-schema.md) interface. The Envelope schema remains in the codebase for backward compatibility and for use in integration tests that exercise the Noise transport path.
---
## Full schema listing
```capnp
# envelope.capnp -- top-level wire message for all quicnprotochat traffic.
#
# Every frame exchanged over the Noise channel is serialised as an Envelope.
# The Delivery Service routes by (groupId, msgType) without inspecting payload.
#
# Field sizing rationale:
# groupId / senderId : 32 bytes -- SHA-256 digest
# payload : opaque -- MLS blob or control data; size bounded by
# the Noise transport max message size (65535 B)
# timestampMs : UInt64 -- unix epoch milliseconds; sufficient until year 292M
#
# ID generated with: capnp id
@0xe4a7f2c8b1d63509;
struct Envelope {
# Message type discriminant -- determines how payload is interpreted.
msgType @0 :MsgType;
# 32-byte SHA-256 digest of the group name.
# The Delivery Service uses this as its routing key.
# Zero-filled for point-to-point control messages (ping, keyPackageUpload, etc.).
groupId @1 :Data;
# 32-byte SHA-256 digest of the sender's Ed25519 identity public key.
senderId @2 :Data;
# Opaque payload. Interpretation is determined by msgType.
payload @3 :Data;
# Unix timestamp in milliseconds at the time of send.
timestampMs @4 :UInt64;
enum MsgType {
ping @0;
pong @1;
keyPackageUpload @2;
keyPackageFetch @3;
keyPackageResponse @4;
mlsWelcome @5;
mlsCommit @6;
mlsApplication @7;
error @8;
}
}
```
---
## Field-by-field analysis
### `msgType @0 :MsgType`
A 16-bit enum discriminant (Cap'n Proto enums are encoded as UInt16). Determines how the `payload` field should be interpreted. The discriminant is the first field in the struct for efficient dispatch: a router can read the first two bytes of the struct section to decide how to handle the message without parsing any pointer fields.
### `groupId @1 :Data`
A 32-byte `Data` field containing the SHA-256 digest of the group name. The Delivery Service uses this as its primary routing key when the Envelope-based protocol is active.
**Sizing rationale:** SHA-256 produces a 32-byte (256-bit) digest. This is stored as a variable-length `Data` field rather than a fixed-size blob because Cap'n Proto does not have a fixed-size array type. Implementations must validate that the field contains exactly 32 bytes.
**Special case:** For point-to-point control messages (`ping`, `pong`, `keyPackageUpload`, `keyPackageFetch`), the `groupId` is zero-filled (32 zero bytes) because these messages are not associated with any group.
### `senderId @2 :Data`
A 32-byte `Data` field containing the SHA-256 digest of the sender's Ed25519 identity public key. This allows the receiver to identify the sender without inspecting the MLS-layer credentials.
**Sizing rationale:** Same as `groupId` -- SHA-256 digest, 32 bytes.
### `payload @3 :Data`
An opaque byte string whose interpretation depends on `msgType`. The payload is bounded by the Noise transport maximum message size of 65,535 bytes (see [Framing Codec](framing-codec.md)).
### `timestampMs @4 :UInt64`
Unix epoch timestamp in milliseconds, set by the sender at the time of send. Encoded as a `UInt64`, which provides sufficient range until approximately year 292,000,000 -- effectively unlimited for practical purposes.
The timestamp is sender-asserted and **not** authenticated by the server. Receivers should treat it as advisory (for display ordering) rather than authoritative.
---
## MsgType enum
The `MsgType` enum defines nine message types. Each variant determines how the `payload` field is interpreted:
| Ordinal | Variant | Payload Contents | Direction |
|---|---|---|---|
| 0 | `ping` | Empty | Client -> Server or Peer -> Peer |
| 1 | `pong` | Empty | Server -> Client or Peer -> Peer |
| 2 | `keyPackageUpload` | openmls-serialised KeyPackage blob (TLS encoding) | Client -> Server |
| 3 | `keyPackageFetch` | Target identity key (32 bytes, raw Ed25519 public key) | Client -> Server |
| 4 | `keyPackageResponse` | openmls-serialised KeyPackage blob, or empty if none stored | Server -> Client |
| 5 | `mlsWelcome` | `MLSMessage` blob (Welcome variant) | Peer -> Peer (via DS) |
| 6 | `mlsCommit` | `MLSMessage` blob (PublicMessage / Commit variant) | Peer -> Group (via DS) |
| 7 | `mlsApplication` | `MLSMessage` blob (PrivateMessage / Application variant) | Peer -> Group (via DS) |
| 8 | `error` | UTF-8 error description string | Any direction |
### Control messages (0-1)
`ping` and `pong` are keepalive probes with empty payloads. They serve as health checks over long-lived Noise connections.
### Authentication messages (2-4)
`keyPackageUpload`, `keyPackageFetch`, and `keyPackageResponse` implement the Authentication Service protocol over the Envelope format. In the current architecture, these operations are handled by the [NodeService RPC](node-service-schema.md) methods `uploadKeyPackage` and `fetchKeyPackage` instead.
### MLS messages (5-7)
`mlsWelcome`, `mlsCommit`, and `mlsApplication` carry MLS protocol messages as opaque blobs. The Envelope does not inspect or validate the MLS content; it simply transports the bytes between peers via the Delivery Service.
### Error messages (8)
`error` carries a UTF-8 string describing an error condition. Used for protocol-level error reporting (e.g., "no KeyPackage found for identity").
---
## Relationship to NodeService
The Envelope schema was the original M1 wire format, where all communication was multiplexed over a single Noise-encrypted TCP stream. With the transition to QUIC + TLS 1.3 and Cap'n Proto RPC in M3, the Envelope's role has been superseded by the [NodeService interface](node-service-schema.md), which provides typed RPC methods for each operation.
The key differences:
| Aspect | Envelope (M1) | NodeService RPC (M3+) |
|---|---|---|
| Dispatch | Manual, based on `msgType` enum | Automatic, Cap'n Proto RPC method dispatch |
| Type safety | Payload is opaque `Data` | Each method has typed parameters and return values |
| Transport | Noise\_XX over TCP | QUIC + TLS 1.3 |
| Auth | Implicit (Noise handshake authenticates peers) | Explicit `Auth` struct per method call |
---
## Further reading
- [Wire Format Overview](overview.md) -- serialisation pipeline context
- [NodeService Schema](node-service-schema.md) -- the current RPC interface that replaced Envelope-based dispatch
- [Auth Schema](auth-schema.md) -- standalone Authentication Service interface
- [Delivery Schema](delivery-schema.md) -- standalone Delivery Service interface
- [Framing Codec](framing-codec.md) -- length-prefixed framing that wraps serialised Envelopes
- [ADR-002: Cap'n Proto over MessagePack](../design-rationale/adr-002-capnproto.md) -- why Cap'n Proto was chosen for the wire format

View File

@@ -0,0 +1,221 @@
# Length-Prefixed Framing Codec
**Source file:** `crates/quicnprotochat-core/src/codec.rs`
The `LengthPrefixedCodec` is a stateless Tokio codec that frames byte payloads with a 4-byte little-endian length prefix. It is the bridge between Cap'n Proto serialisation (which produces a byte buffer of variable length) and the Noise transport (which needs discrete message boundaries over a TCP byte stream).
---
## Wire format
```text
+----------------------------+--------------------------------------+
| length (4 bytes, LE u32) | payload (length bytes) |
+----------------------------+--------------------------------------+
```
Each frame consists of:
1. A **4-byte length field** encoded as a little-endian unsigned 32-bit integer (`u32`). This gives a theoretical maximum payload size of 4,294,967,295 bytes, but the actual limit is much lower (see below).
2. A **payload** of exactly `length` bytes. The codec treats the payload as opaque -- it does not inspect or interpret the bytes.
### Byte order: little-endian
The length prefix uses **little-endian** byte order. This was a deliberate choice for consistency with Cap'n Proto's segment table encoding, which also uses little-endian 32-bit integers. Benefits of this choice:
- **No endianness confusion.** A developer inspecting a raw byte dump sees uniform little-endian encoding throughout the entire frame (length header + Cap'n Proto header + Cap'n Proto data).
- **Native performance on common architectures.** x86-64 and AArch64 (in its default little-endian mode) can read the length field without byte-swapping.
- **Alignment with Cap'n Proto conventions.** Cap'n Proto defines its canonical byte order as little-endian (segment count and segment sizes are LE u32).
### Example encoding
For the ASCII payload `"le-check"` (8 bytes), the encoded frame is:
```text
Offset Hex Meaning
------ ------------------ -------
0x00 08 00 00 00 Length = 8 (little-endian)
0x04 6C 65 2D 63 68 65 Payload: "le-che"
0x0A 63 6B Payload: "ck"
```
Total frame size: 4 (header) + 8 (payload) = 12 bytes.
---
## Frame size limit
```rust
/// Maximum Noise protocol message size in bytes (per RFC / Noise spec S3).
pub const NOISE_MAX_MSG: usize = 65_535;
```
The maximum payload size is **65,535 bytes** (64 KiB - 1), matching the Noise protocol specification's maximum message size. This constant is defined as `NOISE_MAX_MSG` in the codec module.
Any frame with a payload exceeding this limit is rejected as a protocol violation:
- **On encode:** `Encoder::encode()` returns `CodecError::FrameTooLarge` before writing any bytes to the buffer.
- **On decode:** `Decoder::decode()` returns `CodecError::FrameTooLarge` upon reading a length field that exceeds the limit, without attempting to read the payload bytes.
In both cases, the error is **unrecoverable**. The connection should be closed rather than retried, because an oversized frame indicates either a bug or a malicious peer.
### Relationship to Noise plaintext limit
The `NOISE_MAX_MSG` constant (65,535 bytes) represents the maximum Noise *message* size, which includes the Poly1305 authentication tag (16 bytes). The maximum *plaintext* per Noise transport frame is therefore:
```rust
/// Maximum plaintext bytes per Noise transport frame.
pub const MAX_PLAINTEXT_LEN: usize = 65_519; // 65,535 - 16
```
This constant is defined in `crates/quicnprotochat-core/src/error.rs`. The codec operates at the ciphertext level (framing Noise messages, not plaintext), so it uses `NOISE_MAX_MSG` as its limit.
---
## Implementation
The codec implements Tokio's `Encoder<Bytes>` and `Decoder` traits, making it compatible with `tokio_util::codec::Framed` for use with any `AsyncRead + AsyncWrite` stream.
### Struct
```rust
#[derive(Debug, Clone, Copy, Default)]
pub struct LengthPrefixedCodec;
```
The codec is **stateless** -- it holds no internal buffering state. This means it is `Clone`, `Copy`, and `Default`, and multiple codec instances are interchangeable.
### Encoder
```rust
impl Encoder<Bytes> for LengthPrefixedCodec {
type Error = CodecError;
fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> Result<(), Self::Error> {
let len = item.len();
if len > NOISE_MAX_MSG {
return Err(CodecError::FrameTooLarge {
len,
max: NOISE_MAX_MSG,
});
}
dst.reserve(4 + len);
dst.put_u32_le(len as u32);
dst.extend_from_slice(&item);
Ok(())
}
}
```
**Steps:**
1. Check payload size against `NOISE_MAX_MSG`. Reject if oversized.
2. Reserve exactly `4 + len` bytes in the output buffer to avoid reallocation.
3. Write the 4-byte little-endian length prefix.
4. Copy the payload bytes.
### Decoder
```rust
impl Decoder for LengthPrefixedCodec {
type Item = BytesMut;
type Error = CodecError;
fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
if src.len() < 4 {
src.reserve(4_usize.saturating_sub(src.len()));
return Ok(None);
}
let frame_len = u32::from_le_bytes([src[0], src[1], src[2], src[3]]) as usize;
if frame_len > NOISE_MAX_MSG {
return Err(CodecError::FrameTooLarge {
len: frame_len,
max: NOISE_MAX_MSG,
});
}
let total = 4 + frame_len;
if src.len() < total {
src.reserve(total - src.len());
return Ok(None);
}
src.advance(4);
Ok(Some(src.split_to(frame_len)))
}
}
```
**Steps:**
1. **Check for header completeness.** If fewer than 4 bytes are available, reserve the remaining bytes and return `Ok(None)` (the standard Tokio Decoder contract for "need more data").
2. **Peek at the length field** without advancing the cursor. This avoids mutating buffer state when the full frame is not yet available.
3. **Validate the length.** If it exceeds `NOISE_MAX_MSG`, return an error immediately.
4. **Check for payload completeness.** If fewer than `4 + frame_len` bytes are available, reserve the difference and return `Ok(None)`.
5. **Consume the frame.** Advance past the 4-byte header, then split the payload from the front of the buffer.
The `reserve()` calls in steps 1 and 4 are a performance optimization: they hint to Tokio how many additional bytes the decoder needs, avoiding O(n) polling behavior where the decoder is called once per incoming byte.
---
## Error handling
```rust
#[derive(Debug, Error)]
pub enum CodecError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("frame length {len} exceeds maximum {max} bytes")]
FrameTooLarge { len: usize, max: usize },
}
```
The codec produces two error variants:
| Variant | Cause | Recovery |
|---|---|---|
| `Io` | The underlying TCP stream returned an I/O error. Auto-converted from `std::io::Error` via the `From` impl required by `tokio-util`. | Depends on the I/O error. Typically the connection is broken and should be dropped. |
| `FrameTooLarge` | A frame's length field exceeds `NOISE_MAX_MSG` (65,535 bytes). | **Unrecoverable.** The connection should be closed. An oversized frame indicates a protocol violation -- either a bug or a malicious peer. |
---
## Transport context
The `LengthPrefixedCodec` is used in the **Noise transport path** (M1 stack), where Cap'n Proto messages and Noise handshake messages are sent over a raw TCP stream that has no built-in message boundaries.
In the **QUIC transport path** (M3+ stack), the codec is **not used**. QUIC provides native stream framing through its stream abstraction, and the `capnp-rpc` crate handles message delimitation internally. The QUIC path also does not need the 65,535-byte frame limit because QUIC flow control operates at a different level.
```text
Noise path: App -> Cap'n Proto -> LengthPrefixedCodec -> Noise encrypt -> TCP
QUIC path: App -> Cap'n Proto RPC -> capnp-rpc stream adapter -> QUIC stream -> UDP
```
---
## Test coverage
The codec module includes comprehensive tests that verify:
| Test | What it validates |
|---|---|
| `round_trip_empty_payload` | Empty payloads encode and decode correctly (0-length frame) |
| `round_trip_small_payload` | Small payloads survive a round trip without corruption |
| `round_trip_max_size_payload` | A payload of exactly `NOISE_MAX_MSG` bytes (the maximum) encodes and decodes correctly |
| `oversized_encode_returns_error` | Encoding a payload of `NOISE_MAX_MSG + 1` bytes returns `FrameTooLarge` |
| `oversized_length_field_decode_returns_error` | Decoding a frame with a length field exceeding `NOISE_MAX_MSG` returns `FrameTooLarge` |
| `partial_payload_returns_none` | A frame with a valid header but incomplete payload returns `None` (need more data) |
| `partial_header_returns_none` | A buffer with fewer than 4 bytes returns `None` (need more data) |
| `length_field_is_little_endian` | The encoded length of `"le-check"` (8 bytes) produces `[0x08, 0x00, 0x00, 0x00]` |
---
## Further reading
- [Wire Format Overview](overview.md) -- where the codec fits in the serialisation pipeline
- [Envelope Schema](envelope-schema.md) -- the Cap'n Proto messages that the codec frames (M1 path)
- [NodeService Schema](node-service-schema.md) -- the RPC messages carried over QUIC (M3+ path, does not use this codec)
- [ADR-003: RPC Inside the Noise Tunnel](../design-rationale/adr-003-rpc-inside-noise.md) -- why the codec sits between Cap'n Proto and Noise
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how all the layers stack

View File

@@ -0,0 +1,258 @@
# NodeService Schema
**Schema file:** `schemas/node.capnp`
**File ID:** `@0xd5ca5648a9cc1c28`
The `NodeService` interface is the unified Cap'n Proto RPC surface that every quicnprotochat client talks to. It combines the Authentication Service and Delivery Service into a single interface, adds long-polling support (`fetchWait`), a health probe (`health`), and hybrid KEM key management. Every method that mutates state or accesses per-user data accepts an `Auth` struct for versioned authentication.
---
## Full schema listing
```capnp
# node.capnp -- Unified quicnprotochat node RPC interface.
#
# Combines Authentication and Delivery operations into a single service.
#
# ID generated with: capnp id
@0xd5ca5648a9cc1c28;
interface NodeService {
# Upload a single-use KeyPackage for later retrieval by peers.
# identityKey : Ed25519 public key bytes (32 bytes)
# package : TLS-encoded openmls KeyPackage
# auth : Auth context (versioned). For legacy clients, pass an empty
# struct or version=0.
uploadKeyPackage @0 (identityKey :Data, package :Data, auth :Auth)
-> (fingerprint :Data);
# Fetch and atomically remove one KeyPackage for a given identity key.
# Returns empty Data if none are stored.
fetchKeyPackage @1 (identityKey :Data, auth :Auth) -> (package :Data);
# Enqueue an opaque payload for delivery to a recipient.
# channelId : Optional channel identifier (empty for legacy). A 16-byte UUID
# is recommended for 1:1 channels.
# version : Schema/wire version. Must be 0 (legacy) or 1 (this spec).
enqueue @2 (recipientKey :Data, payload :Data, channelId :Data,
version :UInt16, auth :Auth) -> ();
# Fetch and drain all queued payloads for the recipient.
fetch @3 (recipientKey :Data, channelId :Data, version :UInt16, auth :Auth)
-> (payloads :List(Data));
# Long-poll: wait up to timeoutMs for new payloads, then drain queue.
fetchWait @4 (recipientKey :Data, channelId :Data, version :UInt16,
timeoutMs :UInt64, auth :Auth) -> (payloads :List(Data));
# Health probe for readiness/liveness.
health @5 () -> (status :Text);
# Upload the hybrid (X25519 + ML-KEM-768) public key for sealed envelope
# encryption.
uploadHybridKey @6 (identityKey :Data, hybridPublicKey :Data) -> ();
# Fetch a peer's hybrid public key (for post-quantum envelope encryption).
fetchHybridKey @7 (identityKey :Data) -> (hybridPublicKey :Data);
}
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based auth
accessToken @1 :Data; # opaque bearer token issued at login
deviceId @2 :Data; # optional UUID bytes for auditing/rate limiting
}
```
---
## Interface methods
### Authentication methods
#### `uploadKeyPackage @0`
```
uploadKeyPackage (identityKey :Data, package :Data, auth :Auth) -> (fingerprint :Data)
```
Uploads a single-use MLS KeyPackage. Identical semantics to the standalone [AuthenticationService](auth-schema.md) method, with the addition of the `auth` parameter for access control.
| Parameter | Type | Size | Description |
|---|---|---|---|
| `identityKey` | `Data` | 32 bytes | Uploader's raw Ed25519 public key |
| `package` | `Data` | Variable | TLS-encoded openmls KeyPackage blob |
| `auth` | `Auth` | Struct | Authentication context (see [Auth struct](#auth-struct) below) |
**Returns:** `fingerprint :Data` -- 32-byte SHA-256 digest of the stored package.
#### `fetchKeyPackage @1`
```
fetchKeyPackage (identityKey :Data, auth :Auth) -> (package :Data)
```
Fetches and atomically removes one KeyPackage for the specified identity key. Returns empty `Data` if no packages are stored. See [Auth Schema](auth-schema.md) for full single-use semantics and [ADR-005](../design-rationale/adr-005-single-use-keypackages.md) for the design rationale.
### Delivery methods
#### `enqueue @2`
```
enqueue (recipientKey :Data, payload :Data, channelId :Data, version :UInt16, auth :Auth) -> ()
```
Enqueues an opaque payload for delivery. Identical semantics to the standalone [DeliveryService](delivery-schema.md) `enqueue` method, with the addition of the `auth` parameter.
| Parameter | Type | Size | Description |
|---|---|---|---|
| `recipientKey` | `Data` | 32 bytes | Recipient's raw Ed25519 public key |
| `payload` | `Data` | Variable | Opaque byte string (typically MLS ciphertext) |
| `channelId` | `Data` | 0 or 16 bytes | Channel identifier (empty for legacy, UUID recommended) |
| `version` | `UInt16` | 2 bytes | Wire version: `0` = legacy, `1` = current |
| `auth` | `Auth` | Struct | Authentication context |
#### `fetch @3`
```
fetch (recipientKey :Data, channelId :Data, version :UInt16, auth :Auth) -> (payloads :List(Data))
```
Fetches and atomically drains all queued payloads for the specified recipient and channel. Returns an empty list if no messages are pending. See [Delivery Schema](delivery-schema.md) for full queue semantics.
#### `fetchWait @4`
```
fetchWait (recipientKey :Data, channelId :Data, version :UInt16, timeoutMs :UInt64, auth :Auth)
-> (payloads :List(Data))
```
**Long-polling variant of `fetch`.** This method blocks on the server side until either:
1. One or more payloads become available in the queue, **or**
2. The `timeoutMs` duration expires.
In case (1), the method returns all available payloads and drains the queue, identical to `fetch`. In case (2), the method returns an empty list.
| Parameter | Type | Description |
|---|---|---|
| `timeoutMs` | `UInt64` | Maximum wait time in milliseconds. A value of `0` means return immediately (equivalent to `fetch`). |
**Why long-polling?** Without `fetchWait`, clients must poll the server at a fixed interval, which wastes bandwidth when no messages are pending and introduces latency equal to half the polling interval on average. Long-polling provides near-real-time delivery while avoiding busy-wait overhead.
**Server implementation:** The server holds the RPC response open until a payload is enqueued for the recipient or the timeout fires. The underlying mechanism is a `tokio::sync::Notify` per recipient, which is woken by `enqueue`.
### Infrastructure methods
#### `health @5`
```
health () -> (status :Text)
```
A readiness/liveness probe that takes no parameters and returns a human-readable status string (e.g., `"ok"`). This method:
- Does not require authentication (`auth` is not a parameter).
- Is suitable for use as a Kubernetes or Docker health check endpoint.
- Can be extended in future versions to report more detailed status (e.g., queue depth, uptime).
### Hybrid KEM methods
#### `uploadHybridKey @6`
```
uploadHybridKey (identityKey :Data, hybridPublicKey :Data) -> ()
```
Uploads the client's hybrid (X25519 + ML-KEM-768) public key for post-quantum sealed envelope encryption. Peers fetch this key to encrypt payloads with post-quantum protection before enqueuing them.
| Parameter | Type | Description |
|---|---|---|
| `identityKey` | `Data` | Uploader's 32-byte Ed25519 public key (index key) |
| `hybridPublicKey` | `Data` | Concatenated X25519 public key (32 bytes) + ML-KEM-768 encapsulation key |
#### `fetchHybridKey @7`
```
fetchHybridKey (identityKey :Data) -> (hybridPublicKey :Data)
```
Fetches a peer's hybrid public key. Unlike `fetchKeyPackage`, this is **not** a destructive operation -- the hybrid key persists across fetches because it is a long-lived public key, not a single-use package.
---
## Auth struct
```capnp
struct Auth {
version @0 :UInt16;
accessToken @1 :Data;
deviceId @2 :Data;
}
```
The `Auth` struct is attached to every mutating or per-user method call. It provides a versioned authentication context that supports clean schema evolution.
### Fields
| Field | Type | Description |
|---|---|---|
| `version` | `UInt16` | Authentication protocol version. Determines how `accessToken` and `deviceId` are interpreted. |
| `accessToken` | `Data` | Opaque bearer token issued at login. The server validates this token against its auth backend. |
| `deviceId` | `Data` | Optional device identifier (UUID bytes). Used for auditing, rate limiting, and per-device session management. |
### Version semantics
| Version | Behavior |
|---|---|
| `0` | **Legacy / no authentication.** The server ignores `accessToken` and `deviceId`. All requests are accepted unconditionally. This is the default for M1-M3 development. |
| `1` | **Token-based authentication.** The server validates `accessToken` and rejects requests with missing or invalid tokens. `deviceId` is used for audit logging. |
### Backward compatibility
The `version` field enables a clean migration path:
1. **Existing clients** that do not set the `Auth` struct (or set `version=0`) continue to work with servers running in legacy mode.
2. **New clients** set `version=1` and provide a valid `accessToken`.
3. **The server** inspects `version` to decide which validation path to use. When the migration is complete, the server can reject `version=0` requests.
This pattern avoids the need for a breaking schema change when authentication is introduced.
---
## Method ordinal summary
| Ordinal | Method | Origin | Category |
|---|---|---|---|
| `@0` | `uploadKeyPackage` | AuthenticationService | Auth |
| `@1` | `fetchKeyPackage` | AuthenticationService | Auth |
| `@2` | `enqueue` | DeliveryService | Delivery |
| `@3` | `fetch` | DeliveryService | Delivery |
| `@4` | `fetchWait` | NodeService (new) | Delivery |
| `@5` | `health` | NodeService (new) | Infrastructure |
| `@6` | `uploadHybridKey` | NodeService (new) | Auth / PQ |
| `@7` | `fetchHybridKey` | NodeService (new) | Auth / PQ |
Ordinals are stable and must not be reused. New methods are appended with the next available ordinal. This is a fundamental Cap'n Proto schema evolution rule: removing a method does not free its ordinal.
---
## Schema evolution
Cap'n Proto supports forward-compatible schema evolution through several mechanisms, all of which are used in the NodeService interface:
1. **New methods can be added** by appending with a new ordinal. Old clients ignore unknown methods; new clients can call them.
2. **New struct fields can be added** to `Auth` (or any other struct) by appending with a new field number. Old structs that lack the new field will read the default value.
3. **The `version` field** provides application-level versioning on top of Cap'n Proto's structural versioning, allowing the server to change validation behavior without changing the schema.
---
## Further reading
- [Wire Format Overview](overview.md) -- serialisation pipeline context
- [Auth Schema](auth-schema.md) -- standalone Authentication Service interface (subset of NodeService)
- [Delivery Schema](delivery-schema.md) -- standalone Delivery Service interface (subset of NodeService)
- [Envelope Schema](envelope-schema.md) -- legacy M1 framing that NodeService replaced
- [Framing Codec](framing-codec.md) -- length-prefixed framing used in the Noise transport path
- [Architecture Overview](../architecture/overview.md) -- system-level view showing NodeService in context
- [ADR-005: Single-Use KeyPackages](../design-rationale/adr-005-single-use-keypackages.md) -- why fetchKeyPackage is destructive
- [ADR-004: MLS-Unaware DS](../design-rationale/adr-004-mls-unaware-ds.md) -- why payloads are opaque

View File

@@ -0,0 +1,107 @@
# Wire Format Overview
This section documents the serialisation pipeline that transforms application-level data structures into encrypted bytes on the wire. Every byte exchanged between quicnprotochat clients and the server passes through this pipeline, so understanding it is prerequisite to reading the protocol deep dives or the server/client source code.
---
## Serialisation pipeline
Data flows through four stages on the send path. The receive path reverses the order.
```text
Stage 1 Stage 2 Stage 3 Stage 4
-------- -------- -------- --------
Application Cap'n Proto Length-prefixed Transport
data serialisation framing encryption
ParsedEnvelope capnp::serialize [u32 LE len][payload] Noise ChaCha20-Poly1305
or RPC call (zero-copy bytes) or QUIC/TLS 1.3
| | | |
v v v v
Rust structs Canonical byte Framed byte stream Encrypted
& method representation ready for transport ciphertext
invocations (no deserialization on the wire
needed on receive)
```
### Stage 1: Application creates a message or RPC call
At the application layer, the client or server constructs a typed Cap'n Proto message. In the legacy Envelope path (M1), this means building an `Envelope` struct with a `MsgType` discriminant, group ID, sender ID, and opaque payload. In the current NodeService path (M3+), this means invoking a Cap'n Proto RPC method such as `enqueue()` or `fetchKeyPackage()`.
- **Envelope** (legacy): see [Envelope Schema](envelope-schema.md)
- **NodeService** (current): see [NodeService Schema](node-service-schema.md)
- **AuthenticationService** (standalone): see [Auth Schema](auth-schema.md)
- **DeliveryService** (standalone): see [Delivery Schema](delivery-schema.md)
### Stage 2: Cap'n Proto serialises to bytes
Cap'n Proto converts the in-memory message to its canonical wire representation. This is a **zero-copy** format: the byte layout in memory is identical to the byte layout on the wire. No serialisation or deserialisation pass is required; readers can traverse the bytes in-place using pointer arithmetic.
The wire representation consists of:
1. A **segment table** -- a list of segment sizes encoded as little-endian 32-bit integers.
2. One or more **segments** -- contiguous runs of 8-byte aligned words containing struct data, list data, and far pointers.
Cap'n Proto's canonical form is deterministic for a given message, which makes it suitable for signing: two implementations that build the same logical message will produce identical bytes.
### Stage 3: Length-prefixed framing
Before the serialised bytes enter the transport, they are wrapped in a length-prefixed frame:
```text
+----------------------------+--------------------------------------+
| length (4 bytes, LE u32) | payload (length bytes) |
+----------------------------+--------------------------------------+
```
The length prefix is encoded as a **little-endian** 32-bit unsigned integer. Little-endian was chosen for consistency with Cap'n Proto's own segment table encoding, which also uses little-endian integers. This avoids byte-order confusion when the same buffer contains both framing headers and Cap'n Proto data.
The maximum payload size is **65,535 bytes**, matching the Noise protocol's maximum message size. Frames exceeding this limit are rejected as protocol violations. See [Framing Codec](framing-codec.md) for the full `LengthPrefixedCodec` implementation.
> **Note:** This framing stage applies only to the Noise transport path. The QUIC transport uses native QUIC stream framing, which provides its own length delimitation. Cap'n Proto RPC over QUIC relies on the `capnp-rpc` crate's built-in stream adapter rather than `LengthPrefixedCodec`.
### Stage 4: Transport encryption
The framed byte stream is encrypted by the transport layer:
| Transport | Encryption | Authentication | When Used |
|---|---|---|---|
| **Noise\_XX over TCP** | ChaCha20-Poly1305 (per-session key from XX handshake) | Mutual, via static X25519 keys | M1 stack, peer-to-peer, integration tests |
| **QUIC + TLS 1.3** | AES-128-GCM or ChaCha20-Poly1305 (negotiated by TLS) | Server cert (rustls/quinn) | M3+ primary transport |
In both cases, the transport layer treats the payload as opaque bytes. It does not inspect or interpret the Cap'n Proto content. This clean separation means the serialisation format can evolve independently of the transport.
---
## Little-endian framing rationale
Cap'n Proto uses little-endian encoding for its segment table (the header that precedes each serialised message). The `LengthPrefixedCodec` uses the same byte order for its 4-byte length field. This consistency means:
1. A developer inspecting a raw byte dump sees uniform endianness throughout.
2. On little-endian architectures (x86-64, AArch64 in LE mode), both the framing header and the Cap'n Proto header can be read without byte-swapping.
3. There is no risk of accidentally mixing big-endian and little-endian headers in the same stream.
---
## Schema index
The Cap'n Proto schemas that define the wire-level messages are documented on dedicated pages:
| Schema File | Documentation Page | Purpose |
|---|---|---|
| `schemas/envelope.capnp` | [Envelope Schema](envelope-schema.md) | Legacy message envelope (M1) |
| `schemas/auth.capnp` | [Auth Schema](auth-schema.md) | Authentication Service RPC interface |
| `schemas/delivery.capnp` | [Delivery Schema](delivery-schema.md) | Delivery Service RPC interface |
| `schemas/node.capnp` | [NodeService Schema](node-service-schema.md) | Unified node RPC (current) |
The length-prefixed framing codec that wraps serialised messages is documented at [Framing Codec](framing-codec.md).
---
## Further reading
- [Architecture Overview](../architecture/overview.md) -- system-level view of how services compose
- [Protocol Layers Overview](../protocol-layers/overview.md) -- how transport, framing, and E2E encryption stack
- [ADR-002: Cap'n Proto over MessagePack](../design-rationale/adr-002-capnproto.md) -- why Cap'n Proto was chosen
- [ADR-003: RPC Inside the Noise Tunnel](../design-rationale/adr-003-rpc-inside-noise.md) -- why RPC runs inside the encrypted channel

View File

@@ -9,24 +9,38 @@ interface NodeService {
# Upload a single-use KeyPackage for later retrieval by peers. # Upload a single-use KeyPackage for later retrieval by peers.
# identityKey : Ed25519 public key bytes (32 bytes) # identityKey : Ed25519 public key bytes (32 bytes)
# package : TLS-encoded openmls KeyPackage # package : TLS-encoded openmls KeyPackage
uploadKeyPackage @0 (identityKey :Data, package :Data) -> (fingerprint :Data); # auth : Auth context (versioned). For legacy clients, pass an empty
# struct or version=0.
uploadKeyPackage @0 (identityKey :Data, package :Data, auth :Auth) -> (fingerprint :Data);
# Fetch and atomically remove one KeyPackage for a given identity key. # Fetch and atomically remove one KeyPackage for a given identity key.
# Returns empty Data if none are stored. # Returns empty Data if none are stored.
fetchKeyPackage @1 (identityKey :Data) -> (package :Data); fetchKeyPackage @1 (identityKey :Data, auth :Auth) -> (package :Data);
# Enqueue an opaque payload for delivery to a recipient. # Enqueue an opaque payload for delivery to a recipient.
# channelId : Optional channel identifier (empty for legacy). A 16-byte UUID # channelId : Optional channel identifier (empty for legacy). A 16-byte UUID
# is recommended for 1:1 channels. # is recommended for 1:1 channels.
# version : Schema/wire version. Must be 0 (legacy) or 1 (this spec). # version : Schema/wire version. Must be 0 (legacy) or 1 (this spec).
enqueue @2 (recipientKey :Data, payload :Data, channelId :Data, version :UInt16) -> (); enqueue @2 (recipientKey :Data, payload :Data, channelId :Data, version :UInt16, auth :Auth) -> ();
# Fetch and drain all queued payloads for the recipient. # Fetch and drain all queued payloads for the recipient.
fetch @3 (recipientKey :Data, channelId :Data, version :UInt16) -> (payloads :List(Data)); fetch @3 (recipientKey :Data, channelId :Data, version :UInt16, auth :Auth) -> (payloads :List(Data));
# Long-poll: wait up to timeoutMs for new payloads, then drain queue. # Long-poll: wait up to timeoutMs for new payloads, then drain queue.
fetchWait @4 (recipientKey :Data, channelId :Data, version :UInt16, timeoutMs :UInt64) -> (payloads :List(Data)); fetchWait @4 (recipientKey :Data, channelId :Data, version :UInt16, timeoutMs :UInt64, auth :Auth) -> (payloads :List(Data));
# Health probe for readiness/liveness. # Health probe for readiness/liveness.
health @5 () -> (status :Text); health @5 () -> (status :Text);
# Upload the hybrid (X25519 + ML-KEM-768) public key for sealed envelope encryption.
uploadHybridKey @6 (identityKey :Data, hybridPublicKey :Data) -> ();
# Fetch a peer's hybrid public key (for post-quantum envelope encryption).
fetchHybridKey @7 (identityKey :Data) -> (hybridPublicKey :Data);
}
struct Auth {
version @0 :UInt16; # 0 = legacy/none, 1 = token-based auth
accessToken @1 :Data; # opaque bearer token issued at login
deviceId @2 :Data; # optional UUID bytes for auditing/rate limiting
} }