fix(stop): honour per-app graceful-stop grace in orchestrator stop path
package.stop left slow-to-SIGTERM apps (fedimint/electrumx/bitcoin/btcpay/immich) running: the orchestrator path hardcoded podman API ?t=10 / CLI -t 30 and the CLI wrapper deadline (30s) equalled the -t grace, so the await fired exactly as podman SIGKILLed -> stop reported failed -> state reverted to running. Reproduced live on clean .198 (fedimint). - container/runtime.rs: add ContainerRuntime::stop_container_with_grace (defaulted so mock/dev impls are unchanged); PodmanRuntime honours grace for API + CLI with deadline = grace + 15s buffer; AutoRuntime delegates. New canonical per-app table stop_grace_secs_for() + DEFAULT_STOP_GRACE_SECS / STOP_GRACE_DEADLINE_BUFFER_SECS. - podman_client.rs: stop_container_with_grace uses ?t=<grace> + longer HTTP deadline. - prod_orchestrator::stop: resolve grace = manifest stop_grace_secs (north-star) else the table; pass to quadlet::stop_service_with_timeout AND stop_container_with_grace. - quadlet.rs: stop_service_with_timeout so slow apps aren't SIGKILLed at 45s. - rpc/package/runtime.rs: doc-note its &str stop_timeout_secs mirrors the canonical table. - tests: resolve_stop_grace_secs (manifest field wins / table fallback / default 30). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
470e3c649a
commit
2dad64b2ee
@ -22,6 +22,11 @@ const PODMAN_LOG_TIMEOUT: Duration = Duration::from_secs(15);
|
||||
/// Per-container graceful shutdown timeout in seconds.
|
||||
/// Bitcoin Core needs 600s to flush UTXO set, LND 330s for channel state,
|
||||
/// indexers 300s for index flush, databases 120s for WAL/transaction commit.
|
||||
///
|
||||
/// MIRRORS `archipelago_container::runtime::stop_grace_secs_for` (which returns
|
||||
/// `u64` and is the canonical table used by the orchestrator stop path). This
|
||||
/// `&str` variant exists for the legacy `podman stop -t <s>` call sites here —
|
||||
/// keep the two tables in sync until those are migrated to the orchestrator.
|
||||
pub fn stop_timeout_secs(container_name: &str) -> &'static str {
|
||||
let id = container_name
|
||||
.strip_prefix("archy-")
|
||||
|
||||
@ -171,6 +171,22 @@ pub fn compute_container_name(manifest: &AppManifest) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the graceful-stop grace (seconds) for an app: the manifest
|
||||
/// `stop_grace_secs` extension if declared (manifest-driven, north-star), else
|
||||
/// the historical per-app `stop_timeout_secs` table keyed by container name.
|
||||
pub fn resolve_stop_grace_secs(manifest: &AppManifest, container_name: &str) -> u64 {
|
||||
if let Some(v) = manifest.app.extensions.get("stop_grace_secs") {
|
||||
// Accept either a YAML integer or a numeric string.
|
||||
if let Some(n) = v.as_u64() {
|
||||
return n;
|
||||
}
|
||||
if let Some(n) = v.as_str().and_then(|s| s.trim().parse::<u64>().ok()) {
|
||||
return n;
|
||||
}
|
||||
}
|
||||
archipelago_container::runtime::stop_grace_secs_for(container_name)
|
||||
}
|
||||
|
||||
/// Fingerprint a local build context so a changed source tree (e.g. a rebuilt
|
||||
/// `neode-ui` dist copied into `docker/<ui>/`) forces an image rebuild even
|
||||
/// when the image tag already exists (#34). Walks the context directory and
|
||||
@ -2896,13 +2912,25 @@ impl ContainerOrchestrator for ProdContainerOrchestrator {
|
||||
let lock = self.app_lock(app_id).await;
|
||||
let _guard = lock.lock().await;
|
||||
let name = compute_container_name(&lm.manifest);
|
||||
// Per-app graceful-stop grace: manifest `stop_grace_secs` if declared,
|
||||
// else the historical per-app table. Slow-to-SIGTERM apps (bitcoin-core
|
||||
// 600s, lnd 330s, electrumx 300s, fedimint 60s…) otherwise get a too-short
|
||||
// `podman stop -t` and the stop is reported failed while the container
|
||||
// keeps running. See PRODUCTION-MASTER-PLAN §8b.
|
||||
let grace_secs = resolve_stop_grace_secs(&lm.manifest, &name);
|
||||
// Quadlet-owned containers are restarted by systemd if only `podman stop`
|
||||
// is used. Stop the user service first, then stop the container as a
|
||||
// defensive fallback for legacy/non-Quadlet installs.
|
||||
if let Err(err) = quadlet::stop_service(&format!("{name}.service")).await {
|
||||
// defensive fallback for legacy/non-Quadlet installs. Give systemd the
|
||||
// per-app grace before it force-kills the app-scoped unit.
|
||||
let quadlet_timeout = std::time::Duration::from_secs(
|
||||
grace_secs + archipelago_container::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS,
|
||||
);
|
||||
if let Err(err) =
|
||||
quadlet::stop_service_with_timeout(&format!("{name}.service"), quadlet_timeout).await
|
||||
{
|
||||
tracing::debug!(container = %name, error = %err, "quadlet stop skipped/failed");
|
||||
}
|
||||
match self.runtime.stop_container(&name).await {
|
||||
match self.runtime.stop_container_with_grace(&name, grace_secs).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(err) => {
|
||||
let stuck_stopping = self
|
||||
@ -3467,6 +3495,37 @@ app:
|
||||
assert_eq!(compute_container_name(&m), "legacy-bitcoin-ui");
|
||||
}
|
||||
|
||||
fn manifest_with_stop_grace(id: &str, grace: &str) -> AppManifest {
|
||||
let yaml = format!(
|
||||
"app:\n id: {id}\n name: {id}\n version: 1.0.0\n stop_grace_secs: {grace}\n container:\n image: foo:1\n"
|
||||
);
|
||||
AppManifest::parse(&yaml).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stop_grace_manifest_field_wins() {
|
||||
// An explicit stop_grace_secs overrides the per-app table (fedimint=60).
|
||||
let m = manifest_with_stop_grace("fedimint", "180");
|
||||
assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 180);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stop_grace_falls_back_to_table() {
|
||||
// No manifest field → the historical per-app table by container name.
|
||||
let m = pull_manifest("fedimint", "foo:1");
|
||||
assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 60);
|
||||
let m = pull_manifest("bitcoin-knots", "foo:1");
|
||||
assert_eq!(resolve_stop_grace_secs(&m, "bitcoin-knots"), 600);
|
||||
let m = pull_manifest("electrumx", "foo:1");
|
||||
assert_eq!(resolve_stop_grace_secs(&m, "electrumx"), 300);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stop_grace_unknown_app_defaults_to_30() {
|
||||
let m = pull_manifest("some-unknown-app", "foo:1");
|
||||
assert_eq!(resolve_stop_grace_secs(&m, "some-unknown-app"), 30);
|
||||
}
|
||||
|
||||
async fn orch_with(runtime: Arc<MockRuntime>) -> ProdContainerOrchestrator {
|
||||
let mut orch = ProdContainerOrchestrator::with_runtime(
|
||||
runtime,
|
||||
|
||||
@ -642,7 +642,17 @@ pub async fn restart_service(service: &str) -> Result<()> {
|
||||
|
||||
/// Stop a generated Quadlet service without removing its unit file.
|
||||
pub async fn stop_service(service: &str) -> Result<()> {
|
||||
match systemctl_user_status(&["stop", service], QUADLET_STOP_TIMEOUT).await {
|
||||
stop_service_with_timeout(service, QUADLET_STOP_TIMEOUT).await
|
||||
}
|
||||
|
||||
/// Stop a user service, waiting up to `timeout` for a graceful stop before
|
||||
/// force-killing the app-scoped unit. Slow-to-SIGTERM apps (bitcoin-core ~600s,
|
||||
/// lnd ~330s) must not be SIGKILLed at the default 45s — that risks data
|
||||
/// corruption — so the orchestrator passes the per-app grace here. Never waits
|
||||
/// less than `QUADLET_STOP_TIMEOUT`.
|
||||
pub async fn stop_service_with_timeout(service: &str, timeout: Duration) -> Result<()> {
|
||||
let timeout = timeout.max(QUADLET_STOP_TIMEOUT);
|
||||
match systemctl_user_status(&["stop", service], timeout).await {
|
||||
Ok(status) if status.success() => Ok(()),
|
||||
Ok(status) => Err(anyhow!("systemctl --user stop {service} exited {status}")),
|
||||
Err(err) => {
|
||||
|
||||
@ -422,11 +422,22 @@ impl PodmanClient {
|
||||
}
|
||||
|
||||
pub async fn stop_container(&self, name: &str) -> Result<()> {
|
||||
self.stop_container_with_grace(name, 10).await
|
||||
}
|
||||
|
||||
/// Stop via libpod honouring a per-app grace (seconds). The HTTP deadline is
|
||||
/// kept above the grace so the post-grace SIGKILL lands before we give up —
|
||||
/// otherwise slow-to-SIGTERM apps (fedimint, bitcoin-core, electrumx…) time
|
||||
/// out at exactly the grace boundary and the stop is reported as failed.
|
||||
pub async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||
let deadline = std::time::Duration::from_secs(
|
||||
grace_secs + crate::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS,
|
||||
);
|
||||
self.api_request(
|
||||
"POST",
|
||||
&format!("libpod/containers/{}/stop?t=10", name),
|
||||
&format!("libpod/containers/{}/stop?t={}", name, grace_secs),
|
||||
None,
|
||||
DEFAULT_TIMEOUT,
|
||||
deadline,
|
||||
)
|
||||
.await
|
||||
.map(|_| ())
|
||||
|
||||
@ -10,6 +10,35 @@ const PODMAN_CLI_DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const PODMAN_CLI_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
const PODMAN_CLI_BUILD_TIMEOUT: Duration = Duration::from_secs(900);
|
||||
|
||||
/// Default graceful-stop grace (seconds) when a caller doesn't supply a per-app
|
||||
/// value. Mirrors the historical `podman stop -t 30`.
|
||||
pub const DEFAULT_STOP_GRACE_SECS: u64 = 30;
|
||||
/// Headroom added to a stop grace to form the await/HTTP deadline, so podman's
|
||||
/// post-grace SIGKILL completes before the wrapper times out.
|
||||
pub const STOP_GRACE_DEADLINE_BUFFER_SECS: u64 = 15;
|
||||
|
||||
/// Canonical per-app graceful-stop grace (seconds), keyed by container name.
|
||||
/// Slow-to-SIGTERM apps need far longer than the 30s default: bitcoin-core
|
||||
/// flushes its chainstate, lnd closes channels, electrumx finishes indexing,
|
||||
/// stack DBs checkpoint. Used as the fallback when a manifest doesn't declare
|
||||
/// `stop_grace_secs`. NOTE: the RPC layer's `stop_timeout_secs` mirrors this
|
||||
/// (returns the same values as `&str` for legacy `podman stop -t` call sites) —
|
||||
/// keep the two in sync until that path is retired.
|
||||
pub fn stop_grace_secs_for(container_name: &str) -> u64 {
|
||||
let id = container_name
|
||||
.strip_prefix("archy-")
|
||||
.unwrap_or(container_name);
|
||||
match id {
|
||||
"bitcoin-knots" | "bitcoin-core" | "bitcoin" => 600,
|
||||
"lnd" => 330,
|
||||
"electrumx" | "electrs" | "mempool-electrs" => 300,
|
||||
"btcpay-db" | "mempool-db" | "penpot-postgres" | "immich_postgres" | "nextcloud-db"
|
||||
| "endurain-db" => 120,
|
||||
"btcpay-server" | "nbxplorer" | "fedimint" | "fedimint-gateway" => 60,
|
||||
_ => DEFAULT_STOP_GRACE_SECS,
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait ContainerRuntime: Send + Sync {
|
||||
async fn pull_image(&self, image: &str, signature: Option<&str>) -> Result<()>;
|
||||
@ -21,6 +50,19 @@ pub trait ContainerRuntime: Send + Sync {
|
||||
) -> Result<String>;
|
||||
async fn start_container(&self, name: &str) -> Result<()>;
|
||||
async fn stop_container(&self, name: &str) -> Result<()>;
|
||||
/// Stop a container honouring a per-app graceful-shutdown grace (seconds).
|
||||
///
|
||||
/// Slow-to-SIGTERM apps (bitcoin-core, lnd, electrumx, fedimint, immich…)
|
||||
/// need a longer `podman stop -t` than the default 30s, or `podman stop`
|
||||
/// returns before the container exits and the orchestrator treats the stop
|
||||
/// as failed (the container keeps running). The wrapping deadline is always
|
||||
/// kept strictly greater than `grace_secs` so podman's post-grace SIGKILL
|
||||
/// lands inside the await. The default impl ignores the grace and calls
|
||||
/// `stop_container` — only the real podman runtime honours it.
|
||||
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||
let _ = grace_secs;
|
||||
self.stop_container(name).await
|
||||
}
|
||||
async fn remove_container(&self, name: &str) -> Result<()>;
|
||||
async fn get_container_status(&self, name: &str) -> Result<ContainerStatus>;
|
||||
async fn get_container_logs(&self, name: &str, lines: u32) -> Result<Vec<String>>;
|
||||
@ -122,10 +164,23 @@ impl ContainerRuntime for PodmanRuntime {
|
||||
}
|
||||
|
||||
async fn stop_container(&self, name: &str) -> Result<()> {
|
||||
match self.client.stop_container(name).await {
|
||||
self.stop_container_with_grace(name, DEFAULT_STOP_GRACE_SECS)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||
match self.client.stop_container_with_grace(name, grace_secs).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(api_err) => {
|
||||
let output = self.podman_cli(&["stop", "-t", "30", name]).await?;
|
||||
// CLI fallback. Keep the wrapper deadline strictly above the
|
||||
// `-t` grace so podman's post-grace SIGKILL completes before the
|
||||
// await gives up (otherwise a deadline == grace races the kill
|
||||
// and reports a spurious timeout).
|
||||
let grace = grace_secs.to_string();
|
||||
let deadline = Duration::from_secs(grace_secs + STOP_GRACE_DEADLINE_BUFFER_SECS);
|
||||
let output = self
|
||||
.podman_cli_timeout(&["stop", "-t", &grace, name], deadline)
|
||||
.await?;
|
||||
if output.status.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
@ -841,6 +896,10 @@ impl ContainerRuntime for AutoRuntime {
|
||||
self.runtime.stop_container(name).await
|
||||
}
|
||||
|
||||
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||
self.runtime.stop_container_with_grace(name, grace_secs).await
|
||||
}
|
||||
|
||||
async fn remove_container(&self, name: &str) -> Result<()> {
|
||||
self.runtime.remove_container(name).await
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user