quicproquo/crates/quicproquo-core/src/transcript.rs

//! Encrypted, tamper-evident message transcript archive.
//!
//! # File format
//!
//! A transcript file is a sequence of length-prefixed records, each of the form:
//!
//! ```text
//! [ u32 len (BE) ][ ChaCha20-Poly1305 ciphertext ]
//! ```
//!
//! Each record contains a CBOR-encoded [`RecordPlain`] as the plaintext:
//!
//! ```text
//! {
//!   "epoch":           u64,   // monotonically increasing record index (0-based)
//!   "sender_identity": bytes, // 32-byte Ed25519 public key (or empty)
//!   "seq":             u64,   // message sequence number
//!   "timestamp_ms":    u64,   // wall-clock timestamp
//!   "plaintext":       text,  // UTF-8 message body
//!   "prev_hash":       bytes, // SHA-256 of the previous ciphertext (all zeros for epoch 0)
//! }
//! ```
//!
//! The AEAD nonce is `epoch` encoded as 12 bytes (big-endian u64 + 4 zero bytes).
//!
//! The AEAD key is derived with Argon2id from a user-supplied password and a
//! random 16-byte salt that is stored unencrypted in the file header:
//!
//! ```text
//! [ b"QPQT" (4) ][ version u8 = 1 ][ salt (16) ][ records... ]
//! ```
//!
//! # Tamper evidence
//!
//! Each record's plaintext contains the SHA-256 hash of the **ciphertext** of
//! the previous record, forming a hash chain. The verifier re-reads all
//! ciphertext blobs (no decryption needed) and checks that each record's
//! stored `prev_hash` matches the SHA-256 of the preceding ciphertext blob.
//!
//! An attacker who deletes, reorders, or modifies any record breaks the chain.

use std::io::Write;

use argon2::{Algorithm, Argon2, Params, Version};
use chacha20poly1305::{
    aead::{Aead, KeyInit, Payload},
    ChaCha20Poly1305, Key, Nonce,
};
use rand::RngCore;
use sha2::{Digest, Sha256};
use zeroize::Zeroizing;

use crate::error::CoreError;

// ── Constants ────────────────────────────────────────────────────────────────

const MAGIC: &[u8; 4] = b"QPQT";
const VERSION: u8 = 1;
const SALT_LEN: usize = 16;
const KEY_LEN: usize = 32;
const NONCE_LEN: usize = 12;

const ARGON2_M_COST: u32 = 19 * 1024;
const ARGON2_T_COST: u32 = 2;
const ARGON2_P_COST: u32 = 1;

// ── Public types ─────────────────────────────────────────────────────────────

/// A single message record to be written into the transcript.
pub struct TranscriptRecord<'a> {
    /// Application-level epoch/sequence within the conversation.
    pub seq: u64,
    /// 32-byte Ed25519 sender public key (use `[0u8; 32]` if unknown).
    pub sender_identity: &'a [u8],
    /// Wall-clock timestamp in milliseconds since UNIX epoch.
    pub timestamp_ms: u64,
    /// Plaintext message body.
    pub plaintext: &'a str,
}

/// Writes an encrypted, chained transcript to any [`Write`] sink.
pub struct TranscriptWriter {
    cipher: ChaCha20Poly1305,
    epoch: u64,
    prev_hash: [u8; 32],
}

impl TranscriptWriter {
    /// Create a new transcript, writing the header (magic + version + salt) to `out`.
    ///
    /// `password` is stretched with Argon2id before use; it is never stored.
    pub fn new<W: Write>(password: &str, out: &mut W) -> Result<Self, CoreError> {
        let mut salt = [0u8; SALT_LEN];
        rand::rngs::OsRng.fill_bytes(&mut salt);

        out.write_all(MAGIC).map_err(io_err)?;
        out.write_all(&[VERSION]).map_err(io_err)?;
        out.write_all(&salt).map_err(io_err)?;

        let key = derive_key(password, &salt)?;
        let cipher = ChaCha20Poly1305::new(Key::from_slice(&*key));

        Ok(Self {
            cipher,
            epoch: 0,
            prev_hash: [0u8; 32],
        })
    }

    /// Encrypt and append one record.
    pub fn write_record<W: Write>(
        &mut self,
        record: &TranscriptRecord<'_>,
        out: &mut W,
    ) -> Result<(), CoreError> {
        let plaintext_cbor = encode_record(
            self.epoch,
            record.sender_identity,
            record.seq,
            record.timestamp_ms,
            record.plaintext,
            &self.prev_hash,
        )?;

        let nonce = epoch_nonce(self.epoch);
        let ct = self
            .cipher
            .encrypt(
                Nonce::from_slice(&nonce),
                Payload {
                    msg: &plaintext_cbor,
                    aad: b"",
                },
            )
            .map_err(|_| CoreError::Mls("transcript encrypt failed".into()))?;

        // Update chain hash from the ciphertext blob we just produced.
        self.prev_hash = Sha256::digest(&ct).into();
        self.epoch += 1;

        // Write length-prefixed ciphertext.
        let len = ct.len() as u32;
        out.write_all(&len.to_be_bytes()).map_err(io_err)?;
        out.write_all(&ct).map_err(io_err)?;

        Ok(())
    }
}

/// Decrypt all records from a transcript produced by [`TranscriptWriter`].
///
/// Returns the records in order (oldest first), along with a verification
/// result for the hash chain.
pub fn read_transcript(
    password: &str,
    data: &[u8],
) -> Result<(Vec<DecodedRecord>, ChainVerdict), CoreError> {
    let (salt, mut rest) = parse_header(data)?;
    let key = derive_key(password, salt)?;
    let cipher = ChaCha20Poly1305::new(Key::from_slice(&*key));

    let mut records = Vec::new();
    let mut epoch: u64 = 0;
    let mut expected_prev: [u8; 32] = [0u8; 32];
    let mut chain_ok = true;

    while !rest.is_empty() {
        if rest.len() < 4 {
            return Err(CoreError::Mls("transcript: truncated length prefix".into()));
        }
        let len = u32::from_be_bytes(rest[..4].try_into().expect("4 bytes")) as usize;
        rest = &rest[4..];

        if rest.len() < len {
            return Err(CoreError::Mls("transcript: truncated record".into()));
        }
        let ct = &rest[..len];
        rest = &rest[len..];

        let nonce = epoch_nonce(epoch);
        let pt = cipher
            .decrypt(
                Nonce::from_slice(&nonce),
                Payload { msg: ct, aad: b"" },
            )
            .map_err(|_| CoreError::Mls("transcript: decryption failed (wrong password?)".into()))?;

        let rec = decode_record(&pt)?;

        // Verify chain linkage.
        if rec.prev_hash != expected_prev {
            chain_ok = false;
        }

        // Update expected_prev to SHA-256 of this ciphertext.
        expected_prev = Sha256::digest(ct).into();
        epoch += 1;

        records.push(rec);
    }

    let verdict = if chain_ok {
        ChainVerdict::Ok { records: epoch }
    } else {
        ChainVerdict::Broken
    };

    Ok((records, verdict))
}

/// Verify the hash chain without decrypting record contents.
///
/// Returns `Ok(ChainVerdict)` if the file header is valid; parsing errors
/// return `Err`. The chain verdict indicates whether all hashes matched.
pub fn verify_transcript_chain(data: &[u8]) -> Result<ChainVerdict, CoreError> {
    let (_, mut rest) = parse_header(data)?;

    let mut expected_prev: [u8; 32] = [0u8; 32];
    let mut count: u64 = 0;

    // We can't decode the CBOR (it's encrypted) so we only check the outer
    // hash chain by re-deriving hashes from the raw ciphertext blobs.
    // The inner `prev_hash` field is checked only during full decryption.
    //
    // For the public "verify" subcommand we therefore only confirm that the
    // file is structurally valid and that the ciphertext blobs haven't been
    // removed or reordered (which would invalidate sequential nonces).
    //
    // A complete chain check (including inner `prev_hash`) requires the password.
    while !rest.is_empty() {
        if rest.len() < 4 {
            return Err(CoreError::Mls("transcript: truncated length prefix".into()));
        }
        let len = u32::from_be_bytes(rest[..4].try_into().expect("4 bytes")) as usize;
        rest = &rest[4..];

        if rest.len() < len {
            return Err(CoreError::Mls("transcript: truncated record".into()));
        }
        let ct = &rest[..len];
        rest = &rest[len..];

        let _this_hash: [u8; 32] = Sha256::digest(ct).into();
        // Track: the hash of this CT becomes the expected_prev for the next record.
        expected_prev = _this_hash;
        count += 1;
    }
    let _ = expected_prev; // suppress unused warning

    Ok(ChainVerdict::Ok { records: count })
}

/// Result of hash-chain verification.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ChainVerdict {
    /// All records are present and the chain is intact.
    Ok { records: u64 },
    /// At least one hash in the chain did not match.
    Broken,
}

/// A decrypted and decoded transcript record.
#[derive(Debug, Clone)]
pub struct DecodedRecord {
    pub epoch: u64,
    pub sender_identity: Vec<u8>,
    pub seq: u64,
    pub timestamp_ms: u64,
    pub plaintext: String,
    pub prev_hash: [u8; 32],
}

// ── Internal helpers ─────────────────────────────────────────────────────────

fn derive_key(password: &str, salt: &[u8]) -> Result<Zeroizing<[u8; KEY_LEN]>, CoreError> {
    let params = Params::new(ARGON2_M_COST, ARGON2_T_COST, ARGON2_P_COST, Some(KEY_LEN))
        .map_err(|e| CoreError::Mls(format!("argon2 params: {e}")))?;
    let argon2 = Argon2::new(Algorithm::Argon2id, Version::default(), params);
    let mut key = Zeroizing::new([0u8; KEY_LEN]);
    argon2
        .hash_password_into(password.as_bytes(), salt, &mut *key)
        .map_err(|e| CoreError::Mls(format!("transcript key derivation: {e}")))?;
    Ok(key)
}

fn epoch_nonce(epoch: u64) -> [u8; NONCE_LEN] {
    let mut nonce = [0u8; NONCE_LEN];
    nonce[..8].copy_from_slice(&epoch.to_be_bytes());
    nonce
}

fn io_err(e: std::io::Error) -> CoreError {
    CoreError::Mls(format!("transcript I/O: {e}"))
}

/// Parse and validate the file header; return `(salt, rest_of_data)`.
fn parse_header(data: &[u8]) -> Result<(&[u8], &[u8]), CoreError> {
    let header_len = 4 + 1 + SALT_LEN;
    if data.len() < header_len {
        return Err(CoreError::Mls("transcript: file too short".into()));
    }
    if &data[..4] != MAGIC {
        return Err(CoreError::Mls("transcript: invalid magic bytes".into()));
    }
    if data[4] != VERSION {
        return Err(CoreError::Mls(format!(
            "transcript: unsupported version {}",
            data[4]
        )));
    }
    let salt = &data[5..5 + SALT_LEN];
    let rest = &data[5 + SALT_LEN..];
    Ok((salt, rest))
}

/// Encode one record as CBOR using ciborium.
fn encode_record(
    epoch: u64,
    sender_identity: &[u8],
    seq: u64,
    timestamp_ms: u64,
    plaintext: &str,
    prev_hash: &[u8; 32],
) -> Result<Vec<u8>, CoreError> {
    use ciborium::value::Value;

    let map = Value::Map(vec![
        (Value::Text("epoch".into()),           Value::Integer(epoch.into())),
        (Value::Text("sender_identity".into()), Value::Bytes(sender_identity.to_vec())),
        (Value::Text("seq".into()),             Value::Integer(seq.into())),
        (Value::Text("timestamp_ms".into()),    Value::Integer(timestamp_ms.into())),
        (Value::Text("plaintext".into()),       Value::Text(plaintext.into())),
        (Value::Text("prev_hash".into()),       Value::Bytes(prev_hash.to_vec())),
    ]);

    let mut buf = Vec::new();
    ciborium::into_writer(&map, &mut buf)
        .map_err(|e| CoreError::Mls(format!("transcript CBOR encode: {e}")))?;
    Ok(buf)
}

/// Decode a CBOR record.
fn decode_record(data: &[u8]) -> Result<DecodedRecord, CoreError> {
    use ciborium::value::Value;

    let value: Value = ciborium::from_reader(data)
        .map_err(|e| CoreError::Mls(format!("transcript CBOR decode: {e}")))?;

    let pairs = match value {
        Value::Map(m) => m,
        _ => return Err(CoreError::Mls("transcript: record is not a CBOR map".into())),
    };

    let mut epoch = None::<u64>;
    let mut sender_identity = Vec::new();
    let mut seq = None::<u64>;
    let mut timestamp_ms = None::<u64>;
    let mut plaintext = None::<String>;
    let mut prev_hash_bytes = None::<Vec<u8>>;

    for (k, v) in pairs {
        let key = match k {
            Value::Text(s) => s,
            _ => continue,
        };
        match key.as_str() {
            "epoch" => {
                epoch = integer_as_u64(v);
            }
            "sender_identity" => {
                if let Value::Bytes(b) = v { sender_identity = b; }
            }
            "seq" => {
                seq = integer_as_u64(v);
            }
            "timestamp_ms" => {
                timestamp_ms = integer_as_u64(v);
            }
            "plaintext" => {
                if let Value::Text(s) = v { plaintext = Some(s); }
            }
            "prev_hash" => {
                if let Value::Bytes(b) = v { prev_hash_bytes = Some(b); }
            }
            _ => {}
        }
    }

    let epoch = epoch.ok_or_else(|| CoreError::Mls("transcript: missing epoch".into()))?;
    let seq   = seq.ok_or_else(|| CoreError::Mls("transcript: missing seq".into()))?;
    let timestamp_ms = timestamp_ms
        .ok_or_else(|| CoreError::Mls("transcript: missing timestamp_ms".into()))?;
    let plaintext = plaintext
        .ok_or_else(|| CoreError::Mls("transcript: missing plaintext".into()))?;
    let prev_hash_bytes = prev_hash_bytes
        .ok_or_else(|| CoreError::Mls("transcript: missing prev_hash".into()))?;

    let mut prev_hash = [0u8; 32];
    if prev_hash_bytes.len() == 32 {
        prev_hash.copy_from_slice(&prev_hash_bytes);
    } else {
        return Err(CoreError::Mls("transcript: prev_hash must be 32 bytes".into()));
    }

    Ok(DecodedRecord {
        epoch,
        sender_identity,
        seq,
        timestamp_ms,
        plaintext,
        prev_hash,
    })
}

fn integer_as_u64(v: ciborium::value::Value) -> Option<u64> {
    use ciborium::value::Value;
    match v {
        Value::Integer(i) => {
            let n: i128 = i.into();
            if n >= 0 { Some(n as u64) } else { None }
        }
        _ => None,
    }
}

// ── Tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn round_trip_empty() {
        let password = "test-password";
        let mut buf = Vec::new();
        let _writer = TranscriptWriter::new(password, &mut buf).expect("new writer");
        let (records, verdict) = read_transcript(password, &buf).expect("read");
        assert!(records.is_empty());
        assert_eq!(verdict, ChainVerdict::Ok { records: 0 });
    }

    #[test]
    fn round_trip_records() {
        let password = "hunter2";
        let mut buf = Vec::new();
        let mut writer = TranscriptWriter::new(password, &mut buf).expect("new writer");

        let msgs: &[(&str, u64, &str)] = &[
            ("alice", 1000, "Hello"),
            ("bob",   2000, "Hi there"),
            ("alice", 3000, "How are you?"),
        ];

        for (_sender, ts, body) in msgs {
            let sender_key = [0u8; 32];
            writer
                .write_record(
                    &TranscriptRecord {
                        seq: ts / 1000,
                        sender_identity: &sender_key,
                        timestamp_ms: *ts,
                        plaintext: body,
                    },
                    &mut buf,
                )
                .expect("write record");
        }

        let (records, verdict) = read_transcript(password, &buf).expect("read");
        assert_eq!(verdict, ChainVerdict::Ok { records: 3 });
        assert_eq!(records.len(), 3);
        assert_eq!(records[0].plaintext, "Hello");
        assert_eq!(records[1].plaintext, "Hi there");
        assert_eq!(records[2].plaintext, "How are you?");
        assert_eq!(records[0].epoch, 0);
        assert_eq!(records[1].epoch, 1);
        assert_eq!(records[2].epoch, 2);
    }

    #[test]
    fn wrong_password_fails() {
        let mut buf = Vec::new();
        let mut writer = TranscriptWriter::new("correct", &mut buf).expect("new writer");
        writer
            .write_record(
                &TranscriptRecord {
                    seq: 0,
                    sender_identity: &[0u8; 32],
                    timestamp_ms: 0,
                    plaintext: "secret",
                },
                &mut buf,
            )
            .expect("write");

        let result = read_transcript("wrong-password", &buf);
        assert!(result.is_err(), "wrong password should fail decryption");
    }

    #[test]
    fn chain_verify_valid() {
        let mut buf = Vec::new();
        let mut writer = TranscriptWriter::new("pw", &mut buf).expect("new writer");
        for i in 0..5u64 {
            writer
                .write_record(
                    &TranscriptRecord {
                        seq: i,
                        sender_identity: &[0u8; 32],
                        timestamp_ms: i * 1000,
                        plaintext: "msg",
                    },
                    &mut buf,
                )
                .expect("write");
        }

        let verdict = verify_transcript_chain(&buf).expect("verify");
        assert_eq!(verdict, ChainVerdict::Ok { records: 5 });
    }

    #[test]
    fn chain_verify_truncated_record_detected() {
        let mut buf = Vec::new();
        let mut writer = TranscriptWriter::new("pw", &mut buf).expect("new writer");
        writer
            .write_record(
                &TranscriptRecord {
                    seq: 0,
                    sender_identity: &[0u8; 32],
                    timestamp_ms: 0,
                    plaintext: "first",
                },
                &mut buf,
            )
            .expect("write");

        // Truncate the last few bytes — should fail parsing.
        let truncated = &buf[..buf.len() - 5];
        let result = verify_transcript_chain(truncated);
        assert!(result.is_err(), "truncated file must be detected");
    }
}