feat: add graceful shutdown with drain timeout and per-RPC timeouts
Graceful shutdown (Phase 6.4): - Listen for SIGTERM + SIGINT via tokio::signal - Configurable drain timeout (--drain-timeout / QPQ_DRAIN_TIMEOUT, default 30s) - Health endpoint returns "draining" during shutdown for load balancer awareness - ServerState carries atomic draining flag - Add RpcStatus::Unavailable (9) for shutdown-related rejections Per-RPC timeouts (Phase 6.5): - Add RpcStatus::DeadlineExceeded (8) for server-side timeouts - MethodRegistry supports default_timeout and per-method timeout overrides - RPC dispatch wraps handler invocation with tokio::time::timeout - RequestContext carries optional deadline (Instant) for handlers - Health: 5s timeout, blob upload/download: 120s timeout, default: 30s - Config: --rpc-timeout / QPQ_RPC_TIMEOUT, --storage-timeout / QPQ_STORAGE_TIMEOUT
This commit is contained in:
@@ -38,6 +38,12 @@ pub struct FileConfig {
|
||||
pub redact_logs: Option<bool>,
|
||||
/// WebSocket JSON-RPC bridge listen address (e.g. "0.0.0.0:9000").
|
||||
pub ws_listen: Option<String>,
|
||||
/// Graceful shutdown drain timeout in seconds.
|
||||
pub drain_timeout_secs: Option<u64>,
|
||||
/// Default per-RPC timeout in seconds.
|
||||
pub rpc_timeout_secs: Option<u64>,
|
||||
/// Storage/database operation timeout in seconds.
|
||||
pub storage_timeout_secs: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -64,8 +70,18 @@ pub struct EffectiveConfig {
|
||||
pub redact_logs: bool,
|
||||
/// WebSocket JSON-RPC bridge listen address. If set, the bridge is started.
|
||||
pub ws_listen: Option<String>,
|
||||
/// Graceful shutdown drain timeout in seconds.
|
||||
pub drain_timeout_secs: u64,
|
||||
/// Default per-RPC timeout in seconds.
|
||||
pub rpc_timeout_secs: u64,
|
||||
/// Storage/database operation timeout in seconds.
|
||||
pub storage_timeout_secs: u64,
|
||||
}
|
||||
|
||||
pub const DEFAULT_DRAIN_TIMEOUT_SECS: u64 = 30;
|
||||
pub const DEFAULT_RPC_TIMEOUT_SECS: u64 = 30;
|
||||
pub const DEFAULT_STORAGE_TIMEOUT_SECS: u64 = 10;
|
||||
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
pub struct FederationFileConfig {
|
||||
pub enabled: Option<bool>,
|
||||
@@ -234,6 +250,22 @@ pub fn merge_config(args: &crate::Args, file: &FileConfig) -> EffectiveConfig {
|
||||
.clone()
|
||||
.or_else(|| file.ws_listen.clone());
|
||||
|
||||
let drain_timeout_secs = if args.drain_timeout == DEFAULT_DRAIN_TIMEOUT_SECS {
|
||||
file.drain_timeout_secs.unwrap_or(DEFAULT_DRAIN_TIMEOUT_SECS)
|
||||
} else {
|
||||
args.drain_timeout
|
||||
};
|
||||
let rpc_timeout_secs = if args.rpc_timeout == DEFAULT_RPC_TIMEOUT_SECS {
|
||||
file.rpc_timeout_secs.unwrap_or(DEFAULT_RPC_TIMEOUT_SECS)
|
||||
} else {
|
||||
args.rpc_timeout
|
||||
};
|
||||
let storage_timeout_secs = if args.storage_timeout == DEFAULT_STORAGE_TIMEOUT_SECS {
|
||||
file.storage_timeout_secs.unwrap_or(DEFAULT_STORAGE_TIMEOUT_SECS)
|
||||
} else {
|
||||
args.storage_timeout
|
||||
};
|
||||
|
||||
EffectiveConfig {
|
||||
listen,
|
||||
data_dir,
|
||||
@@ -251,6 +283,9 @@ pub fn merge_config(args: &crate::Args, file: &FileConfig) -> EffectiveConfig {
|
||||
plugin_dir,
|
||||
redact_logs,
|
||||
ws_listen,
|
||||
drain_timeout_secs,
|
||||
rpc_timeout_secs,
|
||||
storage_timeout_secs,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user