fix(hooks): run post_install exec in a transient user scope (fixes cgroup denial)

Live on .228 the post_install `exec` steps failed with "crun: write
cgroup.procs: Permission denied / OCI permission denied": a `podman exec`
launched from archipelago.service can't place its child in the container's
cgroup (under the service's own slice). Wrap `exec` in
`systemd-run --user --scope --quiet --collect podman exec …` so it gets its own
delegated cgroup — same trick as `podman_user_scope` for pasta starts.
`copy_from_host` (a host-side `cp`, no in-container process) stays direct.

Without this only copy_from_host worked; indeedhub happened to be unaffected
(its image pre-bakes the nginx config so the exec steps were no-ops), but the
hook capability is only generally useful with exec working. hooks unit tests
pass; live verify on .228 next.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-06-21 17:38:23 -04:00
parent fdb465f8ac
commit ff78b31212

View File

@ -97,26 +97,44 @@ async fn run_step(
args.push("exec"); args.push("exec");
args.push(container); args.push(container);
args.extend(exec.iter().map(String::as_str)); args.extend(exec.iter().map(String::as_str));
run_podman(&args).await // `exec` spawns a process INSIDE the container's cgroup. When the
// container was started by archipelago.service, that cgroup is under
// the service's slice and a bare `podman exec` from the service can't
// write its `cgroup.procs` ("crun: ... Permission denied / OCI
// permission denied"). Run it in a transient user scope (its own
// delegated cgroup) — mirrors `podman_user_scope` for pasta starts.
run_podman(&args, /* scoped */ true).await
} }
HookStep::CopyFromHost { copy_from_host } => { HookStep::CopyFromHost { copy_from_host } => {
let abs = resolve_copy_src(&copy_from_host.src, app_id, data_dir)?; let abs = resolve_copy_src(&copy_from_host.src, app_id, data_dir)?;
let abs = abs.to_string_lossy().into_owned(); let abs = abs.to_string_lossy().into_owned();
let dest = format!("{container}:{}", copy_from_host.dest); let dest = format!("{container}:{}", copy_from_host.dest);
run_podman(&["cp", &abs, &dest]).await // `cp` is a host-side copy (no in-container process), so no scope needed.
run_podman(&["cp", &abs, &dest], /* scoped */ false).await
} }
} }
} }
async fn run_podman(args: &[&str]) -> Result<()> { /// Run a podman command, optionally inside a transient systemd user scope. The
/// scope gives the invocation its own delegated cgroup so `podman exec` can
/// place its child process — without it, an exec launched from the service's
/// own cgroup is denied write to the container's `cgroup.procs`.
async fn run_podman(args: &[&str], scoped: bool) -> Result<()> {
let rendered = args.join(" "); let rendered = args.join(" ");
let out = tokio::time::timeout( let mut cmd = if scoped {
HOOK_TIMEOUT, let mut c = tokio::process::Command::new("systemd-run");
tokio::process::Command::new("podman").args(args).output(), c.args(["--user", "--scope", "--quiet", "--collect", "podman"]);
) c.args(args);
.await c
.map_err(|_| anyhow::anyhow!("podman {rendered} timed out after {:?}", HOOK_TIMEOUT))? } else {
.map_err(|e| anyhow::anyhow!("podman {rendered}: {e}"))?; let mut c = tokio::process::Command::new("podman");
c.args(args);
c
};
let out = tokio::time::timeout(HOOK_TIMEOUT, cmd.output())
.await
.map_err(|_| anyhow::anyhow!("podman {rendered} timed out after {:?}", HOOK_TIMEOUT))?
.map_err(|e| anyhow::anyhow!("podman {rendered}: {e}"))?;
if !out.status.success() { if !out.status.success() {
bail!( bail!(