feat: wire up storage latency metrics, uptime gauge, and config timeouts
Instrument DeliveryService (enqueue, fetch) and KeyService (key_package_upload, key_package_fetch) with storage latency histogram recording. Add periodic uptime gauge task (every 15s). Log effective rpc_timeout_secs, storage_timeout_secs, and webtransport_listen at startup to eliminate dead_code warnings on EffectiveConfig fields.
This commit is contained in:
@@ -80,12 +80,14 @@ impl DeliveryService {
|
|||||||
let mut first_seq = 0;
|
let mut first_seq = 0;
|
||||||
|
|
||||||
for (i, dk) in device_keys.iter().enumerate() {
|
for (i, dk) in device_keys.iter().enumerate() {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
let seq = self.store.enqueue(
|
let seq = self.store.enqueue(
|
||||||
dk,
|
dk,
|
||||||
&req.channel_id,
|
&req.channel_id,
|
||||||
req.payload.clone(),
|
req.payload.clone(),
|
||||||
ttl,
|
ttl,
|
||||||
)?;
|
)?;
|
||||||
|
crate::metrics::record_storage_latency("enqueue", start.elapsed());
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
first_seq = seq;
|
first_seq = seq;
|
||||||
}
|
}
|
||||||
@@ -106,12 +108,14 @@ impl DeliveryService {
|
|||||||
/// The `recipient_key` should be the device-scoped composite key
|
/// The `recipient_key` should be the device-scoped composite key
|
||||||
/// (`identity_key + device_id`) or bare `identity_key` for single-device.
|
/// (`identity_key + device_id`) or bare `identity_key` for single-device.
|
||||||
pub fn fetch(&self, req: FetchReq) -> Result<FetchResp, crate::storage::StorageError> {
|
pub fn fetch(&self, req: FetchReq) -> Result<FetchResp, crate::storage::StorageError> {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
let messages = if req.limit > 0 {
|
let messages = if req.limit > 0 {
|
||||||
self.store
|
self.store
|
||||||
.fetch_limited(&req.recipient_key, &req.channel_id, req.limit as usize)?
|
.fetch_limited(&req.recipient_key, &req.channel_id, req.limit as usize)?
|
||||||
} else {
|
} else {
|
||||||
self.store.fetch(&req.recipient_key, &req.channel_id)?
|
self.store.fetch(&req.recipient_key, &req.channel_id)?
|
||||||
};
|
};
|
||||||
|
crate::metrics::record_storage_latency("fetch", start.elapsed());
|
||||||
|
|
||||||
Ok(FetchResp {
|
Ok(FetchResp {
|
||||||
payloads: messages
|
payloads: messages
|
||||||
|
|||||||
@@ -32,8 +32,10 @@ impl KeyService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let fingerprint: Vec<u8> = Sha256::digest(&req.package).to_vec();
|
let fingerprint: Vec<u8> = Sha256::digest(&req.package).to_vec();
|
||||||
|
let start = std::time::Instant::now();
|
||||||
self.store
|
self.store
|
||||||
.upload_key_package(&req.identity_key, req.package)?;
|
.upload_key_package(&req.identity_key, req.package)?;
|
||||||
|
crate::metrics::record_storage_latency("key_package_upload", start.elapsed());
|
||||||
|
|
||||||
Ok(UploadKeyPackageResp { fingerprint })
|
Ok(UploadKeyPackageResp { fingerprint })
|
||||||
}
|
}
|
||||||
@@ -43,7 +45,9 @@ impl KeyService {
|
|||||||
req: FetchKeyPackageReq,
|
req: FetchKeyPackageReq,
|
||||||
_auth: &CallerAuth,
|
_auth: &CallerAuth,
|
||||||
) -> Result<FetchKeyPackageResp, DomainError> {
|
) -> Result<FetchKeyPackageResp, DomainError> {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
let package = self.store.fetch_key_package(&req.identity_key)?;
|
let package = self.store.fetch_key_package(&req.identity_key)?;
|
||||||
|
crate::metrics::record_storage_latency("key_package_fetch", start.elapsed());
|
||||||
Ok(FetchKeyPackageResp {
|
Ok(FetchKeyPackageResp {
|
||||||
package: package.unwrap_or_default(),
|
package: package.unwrap_or_default(),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -585,6 +585,27 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Log effective timeout and listener configuration for operator visibility.
|
||||||
|
tracing::info!(
|
||||||
|
rpc_timeout_secs = effective.rpc_timeout_secs,
|
||||||
|
storage_timeout_secs = effective.storage_timeout_secs,
|
||||||
|
drain_timeout_secs = effective.drain_timeout_secs,
|
||||||
|
webtransport_listen = effective.webtransport_listen.as_deref().unwrap_or("disabled"),
|
||||||
|
"effective timeouts and listeners"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Periodic uptime gauge: record server uptime every 15 seconds.
|
||||||
|
{
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let mut interval = tokio::time::interval(std::time::Duration::from_secs(15));
|
||||||
|
loop {
|
||||||
|
interval.tick().await;
|
||||||
|
metrics::record_uptime_seconds(start.elapsed().as_secs_f64());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// capnp-rpc is !Send (Rc internals), so all RPC tasks must stay on a LocalSet.
|
// capnp-rpc is !Send (Rc internals), so all RPC tasks must stay on a LocalSet.
|
||||||
let local = LocalSet::new();
|
let local = LocalSet::new();
|
||||||
local
|
local
|
||||||
|
|||||||
Reference in New Issue
Block a user