From ff78b31212df6269202dd6fff7a626f95376adbc Mon Sep 17 00:00:00 2001 From: archipelago Date: Sun, 21 Jun 2026 17:38:23 -0400 Subject: [PATCH] fix(hooks): run post_install `exec` in a transient user scope (fixes cgroup denial) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live on .228 the post_install `exec` steps failed with "crun: write cgroup.procs: Permission denied / OCI permission denied": a `podman exec` launched from archipelago.service can't place its child in the container's cgroup (under the service's own slice). Wrap `exec` in `systemd-run --user --scope --quiet --collect podman exec …` so it gets its own delegated cgroup — same trick as `podman_user_scope` for pasta starts. `copy_from_host` (a host-side `cp`, no in-container process) stays direct. Without this only copy_from_host worked; indeedhub happened to be unaffected (its image pre-bakes the nginx config so the exec steps were no-ops), but the hook capability is only generally useful with exec working. hooks unit tests pass; live verify on .228 next. Co-Authored-By: Claude Opus 4.8 (1M context) --- core/archipelago/src/container/hooks.rs | 38 ++++++++++++++++++------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/core/archipelago/src/container/hooks.rs b/core/archipelago/src/container/hooks.rs index f75cf36b..771b6f4e 100644 --- a/core/archipelago/src/container/hooks.rs +++ b/core/archipelago/src/container/hooks.rs @@ -97,26 +97,44 @@ async fn run_step( args.push("exec"); args.push(container); args.extend(exec.iter().map(String::as_str)); - run_podman(&args).await + // `exec` spawns a process INSIDE the container's cgroup. When the + // container was started by archipelago.service, that cgroup is under + // the service's slice and a bare `podman exec` from the service can't + // write its `cgroup.procs` ("crun: ... Permission denied / OCI + // permission denied"). Run it in a transient user scope (its own + // delegated cgroup) — mirrors `podman_user_scope` for pasta starts. + run_podman(&args, /* scoped */ true).await } HookStep::CopyFromHost { copy_from_host } => { let abs = resolve_copy_src(©_from_host.src, app_id, data_dir)?; let abs = abs.to_string_lossy().into_owned(); let dest = format!("{container}:{}", copy_from_host.dest); - run_podman(&["cp", &abs, &dest]).await + // `cp` is a host-side copy (no in-container process), so no scope needed. + run_podman(&["cp", &abs, &dest], /* scoped */ false).await } } } -async fn run_podman(args: &[&str]) -> Result<()> { +/// Run a podman command, optionally inside a transient systemd user scope. The +/// scope gives the invocation its own delegated cgroup so `podman exec` can +/// place its child process — without it, an exec launched from the service's +/// own cgroup is denied write to the container's `cgroup.procs`. +async fn run_podman(args: &[&str], scoped: bool) -> Result<()> { let rendered = args.join(" "); - let out = tokio::time::timeout( - HOOK_TIMEOUT, - tokio::process::Command::new("podman").args(args).output(), - ) - .await - .map_err(|_| anyhow::anyhow!("podman {rendered} timed out after {:?}", HOOK_TIMEOUT))? - .map_err(|e| anyhow::anyhow!("podman {rendered}: {e}"))?; + let mut cmd = if scoped { + let mut c = tokio::process::Command::new("systemd-run"); + c.args(["--user", "--scope", "--quiet", "--collect", "podman"]); + c.args(args); + c + } else { + let mut c = tokio::process::Command::new("podman"); + c.args(args); + c + }; + let out = tokio::time::timeout(HOOK_TIMEOUT, cmd.output()) + .await + .map_err(|_| anyhow::anyhow!("podman {rendered} timed out after {:?}", HOOK_TIMEOUT))? + .map_err(|e| anyhow::anyhow!("podman {rendered}: {e}"))?; if !out.status.success() { bail!(