fix(stop): honour per-app graceful-stop grace in orchestrator stop path
package.stop left slow-to-SIGTERM apps (fedimint/electrumx/bitcoin/btcpay/immich) running: the orchestrator path hardcoded podman API ?t=10 / CLI -t 30 and the CLI wrapper deadline (30s) equalled the -t grace, so the await fired exactly as podman SIGKILLed -> stop reported failed -> state reverted to running. Reproduced live on clean .198 (fedimint). - container/runtime.rs: add ContainerRuntime::stop_container_with_grace (defaulted so mock/dev impls are unchanged); PodmanRuntime honours grace for API + CLI with deadline = grace + 15s buffer; AutoRuntime delegates. New canonical per-app table stop_grace_secs_for() + DEFAULT_STOP_GRACE_SECS / STOP_GRACE_DEADLINE_BUFFER_SECS. - podman_client.rs: stop_container_with_grace uses ?t=<grace> + longer HTTP deadline. - prod_orchestrator::stop: resolve grace = manifest stop_grace_secs (north-star) else the table; pass to quadlet::stop_service_with_timeout AND stop_container_with_grace. - quadlet.rs: stop_service_with_timeout so slow apps aren't SIGKILLed at 45s. - rpc/package/runtime.rs: doc-note its &str stop_timeout_secs mirrors the canonical table. - tests: resolve_stop_grace_secs (manifest field wins / table fallback / default 30). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
470e3c649a
commit
2dad64b2ee
@ -22,6 +22,11 @@ const PODMAN_LOG_TIMEOUT: Duration = Duration::from_secs(15);
|
|||||||
/// Per-container graceful shutdown timeout in seconds.
|
/// Per-container graceful shutdown timeout in seconds.
|
||||||
/// Bitcoin Core needs 600s to flush UTXO set, LND 330s for channel state,
|
/// Bitcoin Core needs 600s to flush UTXO set, LND 330s for channel state,
|
||||||
/// indexers 300s for index flush, databases 120s for WAL/transaction commit.
|
/// indexers 300s for index flush, databases 120s for WAL/transaction commit.
|
||||||
|
///
|
||||||
|
/// MIRRORS `archipelago_container::runtime::stop_grace_secs_for` (which returns
|
||||||
|
/// `u64` and is the canonical table used by the orchestrator stop path). This
|
||||||
|
/// `&str` variant exists for the legacy `podman stop -t <s>` call sites here —
|
||||||
|
/// keep the two tables in sync until those are migrated to the orchestrator.
|
||||||
pub fn stop_timeout_secs(container_name: &str) -> &'static str {
|
pub fn stop_timeout_secs(container_name: &str) -> &'static str {
|
||||||
let id = container_name
|
let id = container_name
|
||||||
.strip_prefix("archy-")
|
.strip_prefix("archy-")
|
||||||
|
|||||||
@ -171,6 +171,22 @@ pub fn compute_container_name(manifest: &AppManifest) -> String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve the graceful-stop grace (seconds) for an app: the manifest
|
||||||
|
/// `stop_grace_secs` extension if declared (manifest-driven, north-star), else
|
||||||
|
/// the historical per-app `stop_timeout_secs` table keyed by container name.
|
||||||
|
pub fn resolve_stop_grace_secs(manifest: &AppManifest, container_name: &str) -> u64 {
|
||||||
|
if let Some(v) = manifest.app.extensions.get("stop_grace_secs") {
|
||||||
|
// Accept either a YAML integer or a numeric string.
|
||||||
|
if let Some(n) = v.as_u64() {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
if let Some(n) = v.as_str().and_then(|s| s.trim().parse::<u64>().ok()) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
archipelago_container::runtime::stop_grace_secs_for(container_name)
|
||||||
|
}
|
||||||
|
|
||||||
/// Fingerprint a local build context so a changed source tree (e.g. a rebuilt
|
/// Fingerprint a local build context so a changed source tree (e.g. a rebuilt
|
||||||
/// `neode-ui` dist copied into `docker/<ui>/`) forces an image rebuild even
|
/// `neode-ui` dist copied into `docker/<ui>/`) forces an image rebuild even
|
||||||
/// when the image tag already exists (#34). Walks the context directory and
|
/// when the image tag already exists (#34). Walks the context directory and
|
||||||
@ -2896,13 +2912,25 @@ impl ContainerOrchestrator for ProdContainerOrchestrator {
|
|||||||
let lock = self.app_lock(app_id).await;
|
let lock = self.app_lock(app_id).await;
|
||||||
let _guard = lock.lock().await;
|
let _guard = lock.lock().await;
|
||||||
let name = compute_container_name(&lm.manifest);
|
let name = compute_container_name(&lm.manifest);
|
||||||
|
// Per-app graceful-stop grace: manifest `stop_grace_secs` if declared,
|
||||||
|
// else the historical per-app table. Slow-to-SIGTERM apps (bitcoin-core
|
||||||
|
// 600s, lnd 330s, electrumx 300s, fedimint 60s…) otherwise get a too-short
|
||||||
|
// `podman stop -t` and the stop is reported failed while the container
|
||||||
|
// keeps running. See PRODUCTION-MASTER-PLAN §8b.
|
||||||
|
let grace_secs = resolve_stop_grace_secs(&lm.manifest, &name);
|
||||||
// Quadlet-owned containers are restarted by systemd if only `podman stop`
|
// Quadlet-owned containers are restarted by systemd if only `podman stop`
|
||||||
// is used. Stop the user service first, then stop the container as a
|
// is used. Stop the user service first, then stop the container as a
|
||||||
// defensive fallback for legacy/non-Quadlet installs.
|
// defensive fallback for legacy/non-Quadlet installs. Give systemd the
|
||||||
if let Err(err) = quadlet::stop_service(&format!("{name}.service")).await {
|
// per-app grace before it force-kills the app-scoped unit.
|
||||||
|
let quadlet_timeout = std::time::Duration::from_secs(
|
||||||
|
grace_secs + archipelago_container::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS,
|
||||||
|
);
|
||||||
|
if let Err(err) =
|
||||||
|
quadlet::stop_service_with_timeout(&format!("{name}.service"), quadlet_timeout).await
|
||||||
|
{
|
||||||
tracing::debug!(container = %name, error = %err, "quadlet stop skipped/failed");
|
tracing::debug!(container = %name, error = %err, "quadlet stop skipped/failed");
|
||||||
}
|
}
|
||||||
match self.runtime.stop_container(&name).await {
|
match self.runtime.stop_container_with_grace(&name, grace_secs).await {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let stuck_stopping = self
|
let stuck_stopping = self
|
||||||
@ -3467,6 +3495,37 @@ app:
|
|||||||
assert_eq!(compute_container_name(&m), "legacy-bitcoin-ui");
|
assert_eq!(compute_container_name(&m), "legacy-bitcoin-ui");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn manifest_with_stop_grace(id: &str, grace: &str) -> AppManifest {
|
||||||
|
let yaml = format!(
|
||||||
|
"app:\n id: {id}\n name: {id}\n version: 1.0.0\n stop_grace_secs: {grace}\n container:\n image: foo:1\n"
|
||||||
|
);
|
||||||
|
AppManifest::parse(&yaml).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_grace_manifest_field_wins() {
|
||||||
|
// An explicit stop_grace_secs overrides the per-app table (fedimint=60).
|
||||||
|
let m = manifest_with_stop_grace("fedimint", "180");
|
||||||
|
assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 180);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_grace_falls_back_to_table() {
|
||||||
|
// No manifest field → the historical per-app table by container name.
|
||||||
|
let m = pull_manifest("fedimint", "foo:1");
|
||||||
|
assert_eq!(resolve_stop_grace_secs(&m, "fedimint"), 60);
|
||||||
|
let m = pull_manifest("bitcoin-knots", "foo:1");
|
||||||
|
assert_eq!(resolve_stop_grace_secs(&m, "bitcoin-knots"), 600);
|
||||||
|
let m = pull_manifest("electrumx", "foo:1");
|
||||||
|
assert_eq!(resolve_stop_grace_secs(&m, "electrumx"), 300);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn stop_grace_unknown_app_defaults_to_30() {
|
||||||
|
let m = pull_manifest("some-unknown-app", "foo:1");
|
||||||
|
assert_eq!(resolve_stop_grace_secs(&m, "some-unknown-app"), 30);
|
||||||
|
}
|
||||||
|
|
||||||
async fn orch_with(runtime: Arc<MockRuntime>) -> ProdContainerOrchestrator {
|
async fn orch_with(runtime: Arc<MockRuntime>) -> ProdContainerOrchestrator {
|
||||||
let mut orch = ProdContainerOrchestrator::with_runtime(
|
let mut orch = ProdContainerOrchestrator::with_runtime(
|
||||||
runtime,
|
runtime,
|
||||||
|
|||||||
@ -642,7 +642,17 @@ pub async fn restart_service(service: &str) -> Result<()> {
|
|||||||
|
|
||||||
/// Stop a generated Quadlet service without removing its unit file.
|
/// Stop a generated Quadlet service without removing its unit file.
|
||||||
pub async fn stop_service(service: &str) -> Result<()> {
|
pub async fn stop_service(service: &str) -> Result<()> {
|
||||||
match systemctl_user_status(&["stop", service], QUADLET_STOP_TIMEOUT).await {
|
stop_service_with_timeout(service, QUADLET_STOP_TIMEOUT).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop a user service, waiting up to `timeout` for a graceful stop before
|
||||||
|
/// force-killing the app-scoped unit. Slow-to-SIGTERM apps (bitcoin-core ~600s,
|
||||||
|
/// lnd ~330s) must not be SIGKILLed at the default 45s — that risks data
|
||||||
|
/// corruption — so the orchestrator passes the per-app grace here. Never waits
|
||||||
|
/// less than `QUADLET_STOP_TIMEOUT`.
|
||||||
|
pub async fn stop_service_with_timeout(service: &str, timeout: Duration) -> Result<()> {
|
||||||
|
let timeout = timeout.max(QUADLET_STOP_TIMEOUT);
|
||||||
|
match systemctl_user_status(&["stop", service], timeout).await {
|
||||||
Ok(status) if status.success() => Ok(()),
|
Ok(status) if status.success() => Ok(()),
|
||||||
Ok(status) => Err(anyhow!("systemctl --user stop {service} exited {status}")),
|
Ok(status) => Err(anyhow!("systemctl --user stop {service} exited {status}")),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
|
|||||||
@ -422,11 +422,22 @@ impl PodmanClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn stop_container(&self, name: &str) -> Result<()> {
|
pub async fn stop_container(&self, name: &str) -> Result<()> {
|
||||||
|
self.stop_container_with_grace(name, 10).await
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stop via libpod honouring a per-app grace (seconds). The HTTP deadline is
|
||||||
|
/// kept above the grace so the post-grace SIGKILL lands before we give up —
|
||||||
|
/// otherwise slow-to-SIGTERM apps (fedimint, bitcoin-core, electrumx…) time
|
||||||
|
/// out at exactly the grace boundary and the stop is reported as failed.
|
||||||
|
pub async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||||
|
let deadline = std::time::Duration::from_secs(
|
||||||
|
grace_secs + crate::runtime::STOP_GRACE_DEADLINE_BUFFER_SECS,
|
||||||
|
);
|
||||||
self.api_request(
|
self.api_request(
|
||||||
"POST",
|
"POST",
|
||||||
&format!("libpod/containers/{}/stop?t=10", name),
|
&format!("libpod/containers/{}/stop?t={}", name, grace_secs),
|
||||||
None,
|
None,
|
||||||
DEFAULT_TIMEOUT,
|
deadline,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.map(|_| ())
|
.map(|_| ())
|
||||||
|
|||||||
@ -10,6 +10,35 @@ const PODMAN_CLI_DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
|
|||||||
const PODMAN_CLI_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(10);
|
const PODMAN_CLI_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
const PODMAN_CLI_BUILD_TIMEOUT: Duration = Duration::from_secs(900);
|
const PODMAN_CLI_BUILD_TIMEOUT: Duration = Duration::from_secs(900);
|
||||||
|
|
||||||
|
/// Default graceful-stop grace (seconds) when a caller doesn't supply a per-app
|
||||||
|
/// value. Mirrors the historical `podman stop -t 30`.
|
||||||
|
pub const DEFAULT_STOP_GRACE_SECS: u64 = 30;
|
||||||
|
/// Headroom added to a stop grace to form the await/HTTP deadline, so podman's
|
||||||
|
/// post-grace SIGKILL completes before the wrapper times out.
|
||||||
|
pub const STOP_GRACE_DEADLINE_BUFFER_SECS: u64 = 15;
|
||||||
|
|
||||||
|
/// Canonical per-app graceful-stop grace (seconds), keyed by container name.
|
||||||
|
/// Slow-to-SIGTERM apps need far longer than the 30s default: bitcoin-core
|
||||||
|
/// flushes its chainstate, lnd closes channels, electrumx finishes indexing,
|
||||||
|
/// stack DBs checkpoint. Used as the fallback when a manifest doesn't declare
|
||||||
|
/// `stop_grace_secs`. NOTE: the RPC layer's `stop_timeout_secs` mirrors this
|
||||||
|
/// (returns the same values as `&str` for legacy `podman stop -t` call sites) —
|
||||||
|
/// keep the two in sync until that path is retired.
|
||||||
|
pub fn stop_grace_secs_for(container_name: &str) -> u64 {
|
||||||
|
let id = container_name
|
||||||
|
.strip_prefix("archy-")
|
||||||
|
.unwrap_or(container_name);
|
||||||
|
match id {
|
||||||
|
"bitcoin-knots" | "bitcoin-core" | "bitcoin" => 600,
|
||||||
|
"lnd" => 330,
|
||||||
|
"electrumx" | "electrs" | "mempool-electrs" => 300,
|
||||||
|
"btcpay-db" | "mempool-db" | "penpot-postgres" | "immich_postgres" | "nextcloud-db"
|
||||||
|
| "endurain-db" => 120,
|
||||||
|
"btcpay-server" | "nbxplorer" | "fedimint" | "fedimint-gateway" => 60,
|
||||||
|
_ => DEFAULT_STOP_GRACE_SECS,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait ContainerRuntime: Send + Sync {
|
pub trait ContainerRuntime: Send + Sync {
|
||||||
async fn pull_image(&self, image: &str, signature: Option<&str>) -> Result<()>;
|
async fn pull_image(&self, image: &str, signature: Option<&str>) -> Result<()>;
|
||||||
@ -21,6 +50,19 @@ pub trait ContainerRuntime: Send + Sync {
|
|||||||
) -> Result<String>;
|
) -> Result<String>;
|
||||||
async fn start_container(&self, name: &str) -> Result<()>;
|
async fn start_container(&self, name: &str) -> Result<()>;
|
||||||
async fn stop_container(&self, name: &str) -> Result<()>;
|
async fn stop_container(&self, name: &str) -> Result<()>;
|
||||||
|
/// Stop a container honouring a per-app graceful-shutdown grace (seconds).
|
||||||
|
///
|
||||||
|
/// Slow-to-SIGTERM apps (bitcoin-core, lnd, electrumx, fedimint, immich…)
|
||||||
|
/// need a longer `podman stop -t` than the default 30s, or `podman stop`
|
||||||
|
/// returns before the container exits and the orchestrator treats the stop
|
||||||
|
/// as failed (the container keeps running). The wrapping deadline is always
|
||||||
|
/// kept strictly greater than `grace_secs` so podman's post-grace SIGKILL
|
||||||
|
/// lands inside the await. The default impl ignores the grace and calls
|
||||||
|
/// `stop_container` — only the real podman runtime honours it.
|
||||||
|
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||||
|
let _ = grace_secs;
|
||||||
|
self.stop_container(name).await
|
||||||
|
}
|
||||||
async fn remove_container(&self, name: &str) -> Result<()>;
|
async fn remove_container(&self, name: &str) -> Result<()>;
|
||||||
async fn get_container_status(&self, name: &str) -> Result<ContainerStatus>;
|
async fn get_container_status(&self, name: &str) -> Result<ContainerStatus>;
|
||||||
async fn get_container_logs(&self, name: &str, lines: u32) -> Result<Vec<String>>;
|
async fn get_container_logs(&self, name: &str, lines: u32) -> Result<Vec<String>>;
|
||||||
@ -122,10 +164,23 @@ impl ContainerRuntime for PodmanRuntime {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn stop_container(&self, name: &str) -> Result<()> {
|
async fn stop_container(&self, name: &str) -> Result<()> {
|
||||||
match self.client.stop_container(name).await {
|
self.stop_container_with_grace(name, DEFAULT_STOP_GRACE_SECS)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||||
|
match self.client.stop_container_with_grace(name, grace_secs).await {
|
||||||
Ok(()) => Ok(()),
|
Ok(()) => Ok(()),
|
||||||
Err(api_err) => {
|
Err(api_err) => {
|
||||||
let output = self.podman_cli(&["stop", "-t", "30", name]).await?;
|
// CLI fallback. Keep the wrapper deadline strictly above the
|
||||||
|
// `-t` grace so podman's post-grace SIGKILL completes before the
|
||||||
|
// await gives up (otherwise a deadline == grace races the kill
|
||||||
|
// and reports a spurious timeout).
|
||||||
|
let grace = grace_secs.to_string();
|
||||||
|
let deadline = Duration::from_secs(grace_secs + STOP_GRACE_DEADLINE_BUFFER_SECS);
|
||||||
|
let output = self
|
||||||
|
.podman_cli_timeout(&["stop", "-t", &grace, name], deadline)
|
||||||
|
.await?;
|
||||||
if output.status.success() {
|
if output.status.success() {
|
||||||
Ok(())
|
Ok(())
|
||||||
} else {
|
} else {
|
||||||
@ -841,6 +896,10 @@ impl ContainerRuntime for AutoRuntime {
|
|||||||
self.runtime.stop_container(name).await
|
self.runtime.stop_container(name).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn stop_container_with_grace(&self, name: &str, grace_secs: u64) -> Result<()> {
|
||||||
|
self.runtime.stop_container_with_grace(name, grace_secs).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn remove_container(&self, name: &str) -> Result<()> {
|
async fn remove_container(&self, name: &str) -> Result<()> {
|
||||||
self.runtime.remove_container(name).await
|
self.runtime.remove_container(name).await
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user