fix(orchestrator): chown the whole fresh bind subtree, not just the leaf

ensure_bind_mount_dirs chowned a freshly-created no-data_uid bind dir
with --reference={immediate_parent}. For a NESTED bind source like
jellyfin's /var/lib/archipelago/jellyfin/config (or netbird's .../netbird/
data), `mkdir -p` creates the intermediate <app> dir root:root too, so
referencing the immediate parent just copied ROOT — leaving the dir
unwritable and the app EACCES-crash-looping on reinstall (found by the
all-apps-lifecycle pass: jellyfin "/config/log denied" exit 139;
netbird-server "unable to open database file"). It only ever worked for
direct children of the data root (immich).

Fix: anchor to the nearest PRE-EXISTING ancestor (the rootless data root,
owned by the service user) and chown -R the entire newly-created subtree
to it. Extracted the walk into fresh_subtree_anchor() with a unit test
covering nested / direct / second-volume cases.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-06-26 08:58:57 -04:00
parent 38d2bbf570
commit fd3a4ee4ef

View File

@ -701,6 +701,27 @@ async fn remove_stale_podman_socket_path(socket_path: &str) {
}
}
/// For a bind-mount source we're about to `mkdir -p` (as root), return the
/// nearest pre-existing ancestor (whose ownership we copy) and the TOPMOST dir
/// that doesn't yet exist on the path to it (the root of the subtree mkdir will
/// create). Chowning that subtree to the anchor fixes nested bind sources
/// (`<dataroot>/<app>/<subdir>`) where `mkdir -p` would otherwise leave the
/// intermediate `<app>` dir root-owned. See `ensure_bind_mount_dirs`.
fn fresh_subtree_anchor(source: &Path) -> (Option<PathBuf>, PathBuf) {
let mut top = source.to_path_buf();
let mut cur = top.parent().map(Path::to_path_buf);
let mut anchor = None;
while let Some(p) = cur {
if p.exists() {
anchor = Some(p);
break;
}
cur = p.parent().map(Path::to_path_buf);
top = p;
}
(anchor, top)
}
/// True when `pid` names a live process (its `/proc/<pid>` entry exists).
/// `pid <= 0` is never alive. (Best-effort: a reused PID can read as alive, but
/// that only delays zombie detection a cycle — it never recreates a healthy one.)
@ -2874,6 +2895,17 @@ impl ProdContainerOrchestrator {
// so the ownership fix-up below only touches a dir we just made.
let source_existed = Path::new(&volume.source).exists();
// Capture — BEFORE the root mkdir — the nearest pre-existing ancestor
// (the "anchor" whose ownership we copy) and the TOPMOST dir `mkdir -p`
// will newly create. For a NESTED bind source like
// `<dataroot>/<app>/<subdir>` (jellyfin /config + /cache, netbird
// /data), `mkdir -p` creates the intermediate `<app>` dir root:root
// too, so referencing the *immediate* parent copied ROOT — leaving the
// dir unwritable and the app EACCES-crash-looping on reinstall. Anchor
// instead to the nearest dir that already existed (the rootless data
// root, owned by the service user) and chown the whole new subtree.
let (anchor, top_created) = fresh_subtree_anchor(Path::new(&volume.source));
let mkdir_status = host_sudo(&["mkdir", "-p", &volume.source])
.await
.with_context(|| format!("mkdir {}", volume.source))?;
@ -2889,23 +2921,19 @@ impl ProdContainerOrchestrator {
// An app that declares no `data_uid` runs as its own root inside the
// container, which rootless Podman maps to the host user running
// archipelago — so a root:root dir is UNWRITABLE from inside and the
// app EACCES-crash-loops the moment it tries to create a subdir
// (observed: immich upload dir `/var/lib/archipelago/immich` after a
// recreate). The in-container ownership self-heal only runs on RUNNING
// containers, so it never fires for an app that crashes on startup.
// Match the new dir to its parent's owner — the rootless data root
// (`/var/lib/archipelago`, owned by the service user) — via
// `--reference`, so there's no host-uid guessing. Only on fresh
// creation, and only when apply_data_uid won't already chown it.
// app EACCES-crash-loops the moment it tries to create a subdir. The
// in-container ownership self-heal only runs on RUNNING containers, so
// it never fires for an app that crashes on startup. Match the new
// subtree to the anchor's owner via `--reference` (no host-uid
// guessing). Only on fresh creation, and only when apply_data_uid
// won't already chown it.
if !source_existed && manifest.app.container.data_uid.is_none() {
if let Some(parent) = Path::new(&volume.source)
.parent()
.map(|p| p.display().to_string())
{
if let Some(anchor) = anchor {
match host_sudo(&[
"chown",
&format!("--reference={parent}"),
&volume.source,
"-R",
&format!("--reference={}", anchor.display()),
&top_created.display().to_string(),
])
.await
{
@ -4319,15 +4347,15 @@ app:
let data_dir = tempfile::tempdir().unwrap();
orch.insert_manifest_for_test(
pull_manifest_with_generated_file(
"meshtastic",
"docker.io/meshtastic/meshtasticd:daily-alpine",
"exampleapp",
"docker.io/example/exampleapp:latest",
data_dir.path().to_string_lossy().as_ref(),
),
PathBuf::from("/tmp/meshtastic"),
PathBuf::from("/tmp/exampleapp"),
)
.await;
orch.install("meshtastic").await.unwrap();
orch.install("exampleapp").await.unwrap();
let config_path = data_dir.path().join("config.yaml");
let config = std::fs::read_to_string(config_path).unwrap();
@ -4335,7 +4363,7 @@ app:
let calls = rt.calls();
assert!(calls
.iter()
.any(|c| c == "create_container:meshtastic:offset=0"));
.any(|c| c == "create_container:exampleapp:offset=0"));
}
#[tokio::test]
@ -4349,15 +4377,15 @@ app:
orch.insert_manifest_for_test(
pull_manifest_with_generated_file(
"meshtastic",
"docker.io/meshtastic/meshtasticd:daily-alpine",
"exampleapp",
"docker.io/example/exampleapp:latest",
data_dir.path().to_string_lossy().as_ref(),
),
PathBuf::from("/tmp/meshtastic"),
PathBuf::from("/tmp/exampleapp"),
)
.await;
orch.install("meshtastic").await.unwrap();
orch.install("exampleapp").await.unwrap();
let config = std::fs::read_to_string(config_path).unwrap();
assert_eq!(config, "key: operator\n");
@ -4374,15 +4402,15 @@ app:
orch.insert_manifest_for_test(
pull_manifest_with_generated_file_overwrite(
"meshtastic",
"docker.io/meshtastic/meshtasticd:daily-alpine",
"exampleapp",
"docker.io/example/exampleapp:latest",
data_dir.path().to_string_lossy().as_ref(),
),
PathBuf::from("/tmp/meshtastic"),
PathBuf::from("/tmp/exampleapp"),
)
.await;
orch.install("meshtastic").await.unwrap();
orch.install("exampleapp").await.unwrap();
let config = std::fs::read_to_string(config_path).unwrap();
assert_eq!(config, "key: new\n");
@ -4893,6 +4921,36 @@ app:
);
}
#[test]
fn fresh_subtree_anchor_handles_nested_and_direct() {
let tmp = tempfile::tempdir().unwrap();
let root = tmp.path(); // the pre-existing "data root"
// Direct child (immich-style): anchor is the data root, subtree top is
// the child itself.
let direct = root.join("immich");
let (anchor, top) = fresh_subtree_anchor(&direct);
assert_eq!(anchor.as_deref(), Some(root));
assert_eq!(top, direct);
// Nested (jellyfin-style /config): the intermediate `jellyfin` dir does
// NOT exist yet, so the anchor must skip past it to the data root and the
// subtree top is `jellyfin` — chowning that -R fixes both levels. The old
// code referenced the immediate parent (`jellyfin`), which mkdir -p makes
// root-owned → the EACCES bug.
let nested = root.join("jellyfin").join("config");
let (anchor, top) = fresh_subtree_anchor(&nested);
assert_eq!(anchor.as_deref(), Some(root));
assert_eq!(top, root.join("jellyfin"));
// Second volume of the same app: now `jellyfin` exists (created for the
// first volume), so the anchor is `jellyfin` and only `cache` is new.
std::fs::create_dir(root.join("jellyfin")).unwrap();
let (anchor, top) = fresh_subtree_anchor(&root.join("jellyfin").join("cache"));
assert_eq!(anchor.as_deref(), Some(root.join("jellyfin").as_path()));
assert_eq!(top, root.join("jellyfin").join("cache"));
}
#[test]
fn pid_is_alive_detects_live_and_dead_pids() {
// Our own process is alive.