From d1cd42c82170cae69e8ec3381b0fbed8b237dd71 Mon Sep 17 00:00:00 2001 From: archipelago Date: Wed, 24 Jun 2026 04:58:57 -0400 Subject: [PATCH] fix(orchestrator): stop retrying unrepairable volume chowns every reconcile ensure_running_container_ownership re-probed and re-attempted the in-container chown on every reconcile pass. For a mount that can't be re-owned from inside the userns (observed: mempool-api /data -> 'Operation not permitted'), this burned CPU and logged a WARN on every pass, forever (~6x/30min on .228/.116). Remember hard chown failures in a process-lifetime set keyed by (container-id, dest) and skip the probe+chown for known-unrepairable mounts. Keyed by Id (not name) so a recreated container gets a fresh repair attempt. Verified on .116: one recorded failure at startup, then silent across subsequent reconciles. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/container/prod_orchestrator.rs | 54 +++++++++++++++++-- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 73c0ffa6..308fa868 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -294,6 +294,20 @@ async fn chown_for_rootless_container(uid_gid: &str, path: &str) -> Result<()> { )) } +/// `(container-id, mount-dest)` pairs whose in-container chown returned a hard, +/// permanent failure (e.g. "Operation not permitted" on a mount that can't be +/// re-owned from inside the userns). Remembered for the life of the process so +/// the per-reconcile repair stops re-attempting them — otherwise a single +/// unrepairable mount (observed: mempool-api `/data`) burns CPU + floods the +/// journal on every pass. Keyed by Id so a recreated container retries afresh. +fn unrepairable_ownership() -> &'static std::sync::Mutex> +{ + static SET: std::sync::OnceLock< + std::sync::Mutex>, + > = std::sync::OnceLock::new(); + SET.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new())) +} + /// App-agnostic, userns-mapping-proof volume-ownership repair for a RUNNING /// container. /// @@ -332,6 +346,13 @@ async fn ensure_running_container_ownership(name: &str) -> bool { .filter(|g| !g.is_empty()) .unwrap_or_else(|| uid.clone()); + // Stable identity of THIS container instance — used to remember mounts whose + // chown is hard-unrepairable so we stop hammering them every reconcile. Keyed + // by Id (not name) so a recreated container gets a fresh repair attempt. + let cid = podman_stdout(&["inspect", name, "--format", "{{.Id}}"]) + .await + .unwrap_or_default(); + // Writable bind-mount destinations only. let dests = match podman_stdout(&[ "inspect", @@ -359,6 +380,19 @@ async fn ensure_running_container_ownership(name: &str) -> bool { continue; } + // Known hard-unrepairable for this container instance (a previous chown + // returned a permanent error like "Operation not permitted"). Skip the + // probe+chown entirely — retrying every reconcile only burns CPU and + // floods the journal; it will never succeed for this instance. + if !cid.is_empty() + && unrepairable_ownership() + .lock() + .map(|s| s.contains(&(cid.clone(), dest.to_string()))) + .unwrap_or(false) + { + continue; + } + // Drift check: can the service user write here already? let probe = format!( "t=\"{dest}/.archy-wtest.$$\"; touch \"$t\" 2>/dev/null && rm -f \"$t\" 2>/dev/null" @@ -395,11 +429,21 @@ async fn ensure_running_container_ownership(name: &str) -> bool { "repaired unwritable volume ownership (in-container chown)" ); } - Ok(o) => tracing::warn!( - container = %name, dest, - "volume ownership repair failed: {}", - String::from_utf8_lossy(&o.stderr).trim() - ), + Ok(o) => { + // Permanent failure (e.g. "Operation not permitted" on a mount + // that simply can't be re-owned from inside the userns). Record + // it so we don't re-attempt every reconcile — log once, loudly. + if !cid.is_empty() { + if let Ok(mut s) = unrepairable_ownership().lock() { + s.insert((cid.clone(), dest.to_string())); + } + } + tracing::warn!( + container = %name, dest, + "volume ownership repair failed (won't retry for this container instance): {}", + String::from_utf8_lossy(&o.stderr).trim() + ) + } Err(e) => { tracing::warn!(container = %name, dest, "volume ownership repair errored: {e}") }