diff --git a/core/archipelago/src/api/rpc/package/runtime.rs b/core/archipelago/src/api/rpc/package/runtime.rs index 3f2b1d54..9598f053 100644 --- a/core/archipelago/src/api/rpc/package/runtime.rs +++ b/core/archipelago/src/api/rpc/package/runtime.rs @@ -22,6 +22,11 @@ const PODMAN_LOG_TIMEOUT: Duration = Duration::from_secs(15); /// Per-container graceful shutdown timeout in seconds. /// Bitcoin Core needs 600s to flush UTXO set, LND 330s for channel state, /// indexers 300s for index flush, databases 120s for WAL/transaction commit. +/// +/// MIRRORS `archipelago_container::runtime::stop_grace_secs_for` (which returns +/// `u64` and is the canonical table used by the orchestrator stop path). This +/// `&str` variant exists for the legacy `podman stop -t ` call sites here — +/// keep the two tables in sync until those are migrated to the orchestrator. pub fn stop_timeout_secs(container_name: &str) -> &'static str { let id = container_name .strip_prefix("archy-") diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index a18c3489..1dac2496 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -171,6 +171,22 @@ pub fn compute_container_name(manifest: &AppManifest) -> String { } } +/// Resolve the graceful-stop grace (seconds) for an app: the manifest +/// `stop_grace_secs` extension if declared (manifest-driven, north-star), else +/// the historical per-app `stop_timeout_secs` table keyed by container name. +pub fn resolve_stop_grace_secs(manifest: &AppManifest, container_name: &str) -> u64 { + if let Some(v) = manifest.app.extensions.get("stop_grace_secs") { + // Accept either a YAML integer or a numeric string. + if let Some(n) = v.as_u64() { + return n; + } + if let Some(n) = v.as_str().and_then(|s| s.trim().parse::().ok()) { + return n; + } + } + archipelago_container::runtime::stop_grace_secs_for(container_name) +} + /// Fingerprint a local build context so a changed source tree (e.g. a rebuilt /// `neode-ui` dist copied into `docker//`) forces an image rebuild even /// when the image tag already exists (#34). Walks the context directory and @@ -2896,13 +2912,25 @@ impl ContainerOrchestrator for ProdContainerOrchestrator { let lock = self.app_lock(app_id).await; let _guard = lock.lock().await; let name = compute_container_name(&lm.manifest); + // Per-app graceful-stop grace: manifest `stop_grace_secs` if declared, + // else the historical per-app table. Slow-to-SIGTERM apps (bitcoin-core + // 600s, lnd 330s, electrumx 300s, fedimint 60s…) otherwise get a too-short + // `podman stop -t` and the stop is reported failed while the container + // keeps running. See PRODUCTION-MASTER-PLAN §8b. + let grace_secs = resolve_stop_grace_secs(&lm.manifest, &name); // Quadlet-owned containers are restarted by systemd if only `podman stop` // is used. Stop the user service first, then stop the container as a - // defensive fallback for legacy/non-Quadlet installs. - if let Err(err) = quadlet::stop_service(&format!("{name}.service")).await { + // defensive fallback for legacy/non-Quadlet installs. Give systemd the + // per-app grace before it force-kills the app-scoped unit. + let quadlet_timeout = std::time::Duration::from_secs( + grace_secs + archipelago_container::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS, + ); + if let Err(err) = + quadlet::stop_service_with_timeout(&format!("{name}.service"), quadlet_timeout).await + { tracing::debug!(container = %name, error = %err, "quadlet stop skipped/failed"); } - match self.runtime.stop_container(&name).await { + match self.runtime.stop_container_with_grace(&name, grace_secs).await { Ok(()) => Ok(()), Err(err) => { let stuck_stopping = self @@ -3467,6 +3495,37 @@ app: assert_eq!(compute_container_name(&m), "legacy-bitcoin-ui"); } + fn manifest_with_stop_grace(id: &str, grace: &str) -> AppManifest { + let yaml = format!( + "app:\n id: {id}\n name: {id}\n version: 1.0.0\n stop_grace_secs: {grace}\n container:\n image: foo:1\n" + ); + AppManifest::parse(&yaml).unwrap() + } + + #[test] + fn stop_grace_manifest_field_wins() { + // An explicit stop_grace_secs overrides the per-app table (fedimint=60). + let m = manifest_with_stop_grace("fedimint", "180"); + assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 180); + } + + #[test] + fn stop_grace_falls_back_to_table() { + // No manifest field → the historical per-app table by container name. + let m = pull_manifest("fedimint", "foo:1"); + assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 60); + let m = pull_manifest("bitcoin-knots", "foo:1"); + assert_eq!(resolve_stop_grace_secs(&m, "bitcoin-knots"), 600); + let m = pull_manifest("electrumx", "foo:1"); + assert_eq!(resolve_stop_grace_secs(&m, "electrumx"), 300); + } + + #[test] + fn stop_grace_unknown_app_defaults_to_30() { + let m = pull_manifest("some-unknown-app", "foo:1"); + assert_eq!(resolve_stop_grace_secs(&m, "some-unknown-app"), 30); + } + async fn orch_with(runtime: Arc) -> ProdContainerOrchestrator { let mut orch = ProdContainerOrchestrator::with_runtime( runtime, diff --git a/core/archipelago/src/container/quadlet.rs b/core/archipelago/src/container/quadlet.rs index 680fa039..2e76cf80 100644 --- a/core/archipelago/src/container/quadlet.rs +++ b/core/archipelago/src/container/quadlet.rs @@ -642,7 +642,17 @@ pub async fn restart_service(service: &str) -> Result<()> { /// Stop a generated Quadlet service without removing its unit file. pub async fn stop_service(service: &str) -> Result<()> { - match systemctl_user_status(&["stop", service], QUADLET_STOP_TIMEOUT).await { + stop_service_with_timeout(service, QUADLET_STOP_TIMEOUT).await +} + +/// Stop a user service, waiting up to `timeout` for a graceful stop before +/// force-killing the app-scoped unit. Slow-to-SIGTERM apps (bitcoin-core ~600s, +/// lnd ~330s) must not be SIGKILLed at the default 45s — that risks data +/// corruption — so the orchestrator passes the per-app grace here. Never waits +/// less than `QUADLET_STOP_TIMEOUT`. +pub async fn stop_service_with_timeout(service: &str, timeout: Duration) -> Result<()> { + let timeout = timeout.max(QUADLET_STOP_TIMEOUT); + match systemctl_user_status(&["stop", service], timeout).await { Ok(status) if status.success() => Ok(()), Ok(status) => Err(anyhow!("systemctl --user stop {service} exited {status}")), Err(err) => { diff --git a/core/container/src/podman_client.rs b/core/container/src/podman_client.rs index 1d4c06c0..fb208e71 100644 --- a/core/container/src/podman_client.rs +++ b/core/container/src/podman_client.rs @@ -422,11 +422,22 @@ impl PodmanClient { } pub async fn stop_container(&self, name: &str) -> Result<()> { + self.stop_container_with_grace(name, 10).await + } + + /// Stop via libpod honouring a per-app grace (seconds). The HTTP deadline is + /// kept above the grace so the post-grace SIGKILL lands before we give up — + /// otherwise slow-to-SIGTERM apps (fedimint, bitcoin-core, electrumx…) time + /// out at exactly the grace boundary and the stop is reported as failed. + pub async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> { + let deadline = std::time::Duration::from_secs( + grace_secs + crate::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS, + ); self.api_request( "POST", - &format!("libpod/containers/{}/stop?t=10", name), + &format!("libpod/containers/{}/stop?t={}", name, grace_secs), None, - DEFAULT_TIMEOUT, + deadline, ) .await .map(|_| ()) diff --git a/core/container/src/runtime.rs b/core/container/src/runtime.rs index e92626ea..ca84775c 100644 --- a/core/container/src/runtime.rs +++ b/core/container/src/runtime.rs @@ -10,6 +10,35 @@ const PODMAN_CLI_DEFAULT_TIMEOUT: Duration = Duration::from_secs(30); const PODMAN_CLI_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(10); const PODMAN_CLI_BUILD_TIMEOUT: Duration = Duration::from_secs(900); +/// Default graceful-stop grace (seconds) when a caller doesn't supply a per-app +/// value. Mirrors the historical `podman stop -t 30`. +pub const DEFAULT_STOP_GRACE_SECS: u64 = 30; +/// Headroom added to a stop grace to form the await/HTTP deadline, so podman's +/// post-grace SIGKILL completes before the wrapper times out. +pub const STOP_GRACE_DEADLINE_BUFFER_SECS: u64 = 15; + +/// Canonical per-app graceful-stop grace (seconds), keyed by container name. +/// Slow-to-SIGTERM apps need far longer than the 30s default: bitcoin-core +/// flushes its chainstate, lnd closes channels, electrumx finishes indexing, +/// stack DBs checkpoint. Used as the fallback when a manifest doesn't declare +/// `stop_grace_secs`. NOTE: the RPC layer's `stop_timeout_secs` mirrors this +/// (returns the same values as `&str` for legacy `podman stop -t` call sites) — +/// keep the two in sync until that path is retired. +pub fn stop_grace_secs_for(container_name: &str) -> u64 { + let id = container_name + .strip_prefix("archy-") + .unwrap_or(container_name); + match id { + "bitcoin-knots" | "bitcoin-core" | "bitcoin" => 600, + "lnd" => 330, + "electrumx" | "electrs" | "mempool-electrs" => 300, + "btcpay-db" | "mempool-db" | "penpot-postgres" | "immich_postgres" | "nextcloud-db" + | "endurain-db" => 120, + "btcpay-server" | "nbxplorer" | "fedimint" | "fedimint-gateway" => 60, + _ => DEFAULT_STOP_GRACE_SECS, + } +} + #[async_trait] pub trait ContainerRuntime: Send + Sync { async fn pull_image(&self, image: &str, signature: Option<&str>) -> Result<()>; @@ -21,6 +50,19 @@ pub trait ContainerRuntime: Send + Sync { ) -> Result; async fn start_container(&self, name: &str) -> Result<()>; async fn stop_container(&self, name: &str) -> Result<()>; + /// Stop a container honouring a per-app graceful-shutdown grace (seconds). + /// + /// Slow-to-SIGTERM apps (bitcoin-core, lnd, electrumx, fedimint, immich…) + /// need a longer `podman stop -t` than the default 30s, or `podman stop` + /// returns before the container exits and the orchestrator treats the stop + /// as failed (the container keeps running). The wrapping deadline is always + /// kept strictly greater than `grace_secs` so podman's post-grace SIGKILL + /// lands inside the await. The default impl ignores the grace and calls + /// `stop_container` — only the real podman runtime honours it. + async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> { + let _ = grace_secs; + self.stop_container(name).await + } async fn remove_container(&self, name: &str) -> Result<()>; async fn get_container_status(&self, name: &str) -> Result; async fn get_container_logs(&self, name: &str, lines: u32) -> Result>; @@ -122,10 +164,23 @@ impl ContainerRuntime for PodmanRuntime { } async fn stop_container(&self, name: &str) -> Result<()> { - match self.client.stop_container(name).await { + self.stop_container_with_grace(name, DEFAULT_STOP_GRACE_SECS) + .await + } + + async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> { + match self.client.stop_container_with_grace(name, grace_secs).await { Ok(()) => Ok(()), Err(api_err) => { - let output = self.podman_cli(&["stop", "-t", "30", name]).await?; + // CLI fallback. Keep the wrapper deadline strictly above the + // `-t` grace so podman's post-grace SIGKILL completes before the + // await gives up (otherwise a deadline == grace races the kill + // and reports a spurious timeout). + let grace = grace_secs.to_string(); + let deadline = Duration::from_secs(grace_secs + STOP_GRACE_DEADLINE_BUFFER_SECS); + let output = self + .podman_cli_timeout(&["stop", "-t", &grace, name], deadline) + .await?; if output.status.success() { Ok(()) } else { @@ -841,6 +896,10 @@ impl ContainerRuntime for AutoRuntime { self.runtime.stop_container(name).await } + async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> { + self.runtime.stop_container_with_grace(name, grace_secs).await + } + async fn remove_container(&self, name: &str) -> Result<()> { self.runtime.remove_container(name).await }