//! Append-only Merkle log backed by a flat `Vec` of all leaf hashes. //! //! The tree structure is virtual — roots and paths are computed on-demand from the //! leaf array. This keeps the storage footprint to `32 * n` bytes for `n` leaves. use serde::{Deserialize, Serialize}; use crate::{leaf_hash, node_hash, KtError}; use crate::proof::{InclusionProof, PathStep}; /// An append-only Merkle log of `(username, identity_key)` leaf entries. /// /// Internally stores only the 32-byte SHA-256 leaf hashes. Roots and inclusion /// proofs are recomputed from the flat list on demand. /// /// Persistence: the caller serialises the whole struct with `bincode` and stores /// the bytes in the DB (`kt_log` table). The log is load-on-startup, append-on-write. #[derive(Serialize, Deserialize, Default, Clone)] pub struct MerkleLog { /// All leaf hashes in append order. leaves: Vec<[u8; 32]>, } impl MerkleLog { /// Create an empty log. pub fn new() -> Self { Self::default() } /// Number of leaves in the log. pub fn len(&self) -> u64 { self.leaves.len() as u64 } /// Return `true` if the log has no leaves. pub fn is_empty(&self) -> bool { self.leaves.is_empty() } /// Append a `(username, identity_key)` binding and return the leaf's index. /// /// The leaf hash is computed using the canonical formula: /// `SHA-256(0x00 || SHA-256(username || 0x00 || identity_key))`. pub fn append(&mut self, username: &str, identity_key: &[u8]) -> u64 { let h = leaf_hash(username, identity_key); let idx = self.leaves.len() as u64; self.leaves.push(h); idx } /// Return the current Merkle root hash, or `None` if the log is empty. pub fn root(&self) -> Option<[u8; 32]> { if self.leaves.is_empty() { return None; } Some(merkle_root(&self.leaves)) } /// Generate an inclusion proof for the leaf at `index`. /// /// Returns `Err` if `index >= self.len()`. pub fn inclusion_proof(&self, index: u64) -> Result { let n = self.len(); if index >= n { return Err(KtError::IndexOutOfRange { index, tree_size: n }); } let raw_path = compute_path(&self.leaves, index as usize, self.leaves.len()); let path: Vec = raw_path .into_iter() .map(|(hash, sibling_is_left)| PathStep { hash, sibling_is_left }) .collect(); let root = merkle_root(&self.leaves); Ok(InclusionProof { leaf_index: index, tree_size: n, leaf_hash: self.leaves[index as usize], path, root, }) } /// Find the leaf index for a `(username, identity_key)` pair, if present. /// /// O(n) scan — suitable for small logs. For large-scale deployments a /// username→index index would be maintained separately. pub fn find(&self, username: &str, identity_key: &[u8]) -> Option { let target = leaf_hash(username, identity_key); self.leaves .iter() .position(|h| h == &target) .map(|i| i as u64) } /// Append a pre-computed leaf hash directly (used by revocation entries). /// /// Returns the leaf index. pub fn append_raw(&mut self, hash: [u8; 32]) -> u64 { let idx = self.leaves.len() as u64; self.leaves.push(hash); idx } /// Return log entries in the range `[start, end)` as `(index, leaf_hash)` pairs. /// /// Used for KT audit — clients download the full log and verify inclusion proofs. /// Returns an empty vec if `start >= self.len()`. pub fn audit_log(&self, start: u64, end: u64) -> Vec<(u64, [u8; 32])> { let n = self.len(); let start = start.min(n) as usize; let end = end.min(n) as usize; if start >= end { return Vec::new(); } self.leaves[start..end] .iter() .enumerate() .map(|(i, &h)| ((start + i) as u64, h)) .collect() } /// Serialise the log to bytes (bincode). pub fn to_bytes(&self) -> Result, KtError> { bincode::serialize(self) .map_err(|e| KtError::Serialisation(e.to_string())) } /// Deserialise a log from bytes (bincode). pub fn from_bytes(bytes: &[u8]) -> Result { bincode::deserialize(bytes) .map_err(|e| KtError::Serialisation(e.to_string())) } } /// Compute the Merkle root over a non-empty slice of leaf hashes. /// /// Uses RFC 9162 §2.1 balanced tree construction: when the number of leaves is /// odd, the rightmost leaf is promoted (not duplicated — that's vulnerable to /// second-preimage attacks). Specifically: /// /// - `MTH({d[0]}) = H(0x00 || d[0])` (already computed as `leaf_hash`) /// - `MTH(D[n]) = H(0x01 || MTH(D[0..k]) || MTH(D[k..n]))` where `k` is the /// largest power of two strictly less than `n`. /// /// This is a standard SHA-256 Merkle tree — the leaves are already hashed /// so the recursion just applies the internal-node formula. pub(crate) fn merkle_root(leaves: &[[u8; 32]]) -> [u8; 32] { match leaves.len() { 0 => unreachable!("merkle_root called on empty slice"), 1 => leaves[0], n => { let k = largest_power_of_two_less_than(n); let left = merkle_root(&leaves[..k]); let right = merkle_root(&leaves[k..]); node_hash(&left, &right) } } } /// Compute the path (list of `(sibling_hash, sibling_is_on_left)`) from /// `leaf_idx` to the root, in leaf-to-root order. /// /// `sibling_is_on_left` is `true` when the sibling is the LEFT child of their /// common parent, i.e., the current node being proved is on the RIGHT. pub(crate) fn compute_path( leaves: &[[u8; 32]], leaf_idx: usize, n: usize, ) -> Vec<([u8; 32], bool)> { let mut path = Vec::new(); collect_path(&leaves[..n], leaf_idx, &mut path); path } /// Recurse into the subtree `leaves` (already sub-sliced to the right window). fn collect_path( leaves: &[[u8; 32]], leaf_idx: usize, path: &mut Vec<([u8; 32], bool)>, ) { let n = leaves.len(); if n <= 1 { return; } let k = largest_power_of_two_less_than(n); if leaf_idx < k { // Leaf is in the left subtree; sibling is the right subtree. collect_path(&leaves[..k], leaf_idx, path); let right_root = merkle_root(&leaves[k..]); path.push((right_root, false)); // sibling is on the RIGHT } else { // Leaf is in the right subtree; sibling is the left subtree. collect_path(&leaves[k..], leaf_idx - k, path); let left_root = merkle_root(&leaves[..k]); path.push((left_root, true)); // sibling is on the LEFT } } /// Largest power of two strictly less than `n`. /// Panics if `n < 2`. fn largest_power_of_two_less_than(n: usize) -> usize { assert!(n >= 2, "n must be >= 2"); let mut k = 1usize; while k * 2 < n { k *= 2; } k } #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { use super::*; #[test] fn empty_log_has_no_root() { let log = MerkleLog::new(); assert_eq!(log.root(), None); assert_eq!(log.len(), 0); } #[test] fn single_leaf_root_equals_leaf_hash() { let mut log = MerkleLog::new(); log.append("alice", b"A" as &[u8]); let lh = leaf_hash("alice", b"A"); assert_eq!(log.root(), Some(lh)); } #[test] fn append_returns_correct_index() { let mut log = MerkleLog::new(); assert_eq!(log.append("a", b"k1"), 0); assert_eq!(log.append("b", b"k2"), 1); assert_eq!(log.append("c", b"k3"), 2); assert_eq!(log.len(), 3); } #[test] fn root_changes_on_append() { let mut log = MerkleLog::new(); log.append("alice", b"K1"); let root1 = log.root(); log.append("bob", b"K2"); let root2 = log.root(); assert_ne!(root1, root2); } #[test] fn find_returns_correct_index() { let mut log = MerkleLog::new(); log.append("alice", b"K1"); log.append("bob", b"K2"); log.append("charlie", b"K3"); assert_eq!(log.find("bob", b"K2"), Some(1)); assert_eq!(log.find("missing", b""), None); } #[test] fn inclusion_proof_out_of_range() { let mut log = MerkleLog::new(); log.append("alice", b"K"); assert!(matches!( log.inclusion_proof(1), Err(KtError::IndexOutOfRange { .. }) )); } #[test] fn serialise_roundtrip() { let mut log = MerkleLog::new(); log.append("alice", b"K1"); log.append("bob", b"K2"); let bytes = log.to_bytes().unwrap(); let log2 = MerkleLog::from_bytes(&bytes).unwrap(); assert_eq!(log2.root(), log.root()); assert_eq!(log2.len(), log.len()); } #[test] fn largest_power_of_two_less_than_values() { assert_eq!(largest_power_of_two_less_than(2), 1); assert_eq!(largest_power_of_two_less_than(3), 2); assert_eq!(largest_power_of_two_less_than(4), 2); assert_eq!(largest_power_of_two_less_than(5), 4); assert_eq!(largest_power_of_two_less_than(8), 4); assert_eq!(largest_power_of_two_less_than(9), 8); } }