Files
quicproquo/crates/quicprochat-server/src/metrics.rs
Christian Nennemann 66eca065e0 feat: add in-flight RPC tracking, plugin shutdown hooks, and graceful drain
Replace the fixed 30s sleep-based shutdown drain with actual in-flight RPC
tracking using an Arc<AtomicUsize> counter and RAII InFlightGuard. On
SIGTERM/SIGINT the server now:

1. Stops accepting new client and federation connections
2. Sends QUIC CONNECTION_CLOSE with reason "server shutting down"
3. Polls the in-flight counter until it reaches 0 (or drain timeout)
4. Logs drain progress as RPCs complete
5. Calls plugin on_shutdown hooks before exit

Also adds:
- on_shutdown hook to HookVTable (C-ABI plugin API) and ServerHooks trait
- server_in_flight_rpcs Prometheus gauge metric
- Federation connection tracking via shared in-flight counter
2026-03-21 19:14:06 +01:00

72 lines
2.8 KiB
Rust

//! Prometheus metrics for the server.
//!
//! All counters/histograms/gauges use the `metrics` crate and are exported
//! via metrics-exporter-prometheus on a configurable HTTP port (e.g. /metrics).
/// Record one enqueue (success). Call after a message is enqueued.
pub fn record_enqueue_total() {
metrics::counter!("enqueue_total").increment(1);
}
/// Record enqueued payload size in bytes.
pub fn record_enqueue_bytes(bytes: u64) {
metrics::counter!("enqueue_bytes_total").increment(bytes);
}
/// Record one fetch (success). Call when fetch returns.
pub fn record_fetch_total() {
metrics::counter!("fetch_total").increment(1);
}
/// Record one fetch_wait (success). Call when fetch_wait returns.
pub fn record_fetch_wait_total() {
metrics::counter!("fetch_wait_total").increment(1);
}
/// Set the delivery queue depth gauge (sample). Updated at enqueue/fetch time.
pub fn record_delivery_queue_depth(depth: usize) {
metrics::gauge!("delivery_queue_depth").set(depth as f64);
}
/// Record one KeyPackage upload (success).
pub fn record_key_package_upload_total() {
metrics::counter!("key_package_upload_total").increment(1);
}
/// Record successful auth login (session token issued).
pub fn record_auth_login_success_total() {
metrics::counter!("auth_login_success_total").increment(1);
}
/// Record failed auth login attempt.
pub fn record_auth_login_failure_total() {
metrics::counter!("auth_login_failure_total").increment(1);
}
/// Record rate limit hit (enqueue rejected).
pub fn record_rate_limit_hit_total() {
metrics::counter!("rate_limit_hit_total").increment(1);
}
// ── Storage operation latency ───────────────────────────────────────────────
/// Record storage operation latency. Called by instrumented Store wrappers.
pub fn record_storage_latency(operation: &'static str, duration: std::time::Duration) {
metrics::histogram!("storage_operation_duration_seconds", "op" => operation)
.record(duration.as_secs_f64());
}
// ── In-flight RPCs ────────────────────────────────────────────────────────
/// Record the current number of in-flight RPCs (connections being served).
pub fn record_in_flight_rpcs(count: usize) {
metrics::gauge!("server_in_flight_rpcs").set(count as f64);
}
// ── Server info ────────────────────────────────────────────────────────────
/// Record the server uptime in seconds (set periodically).
pub fn record_uptime_seconds(secs: f64) {
metrics::gauge!("server_uptime_seconds").set(secs);
}