diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 308fa868..31f521be 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -1262,6 +1262,11 @@ impl ProdContainerOrchestrator { async fn reconcile_all_with_mode(&self, mode: ReconcileMode) -> ReconcileReport { let user_stopped = crate::crash_recovery::load_user_stopped(&self.data_dir).await; + // Durable desired-state signal: the container names that were running at + // the last periodic snapshot. Used below to recreate a previously-running + // app whose container vanished (e.g. a wedged teardown cleared by a + // reboot) instead of leaving it down. See the immich .198 incident. + let was_running = crate::crash_recovery::load_last_running_names(&self.data_dir).await; let manifests: Vec = { let state = self.state.read().await; let dependency_required = dependency_manifests_required_by_active_apps( @@ -1295,6 +1300,34 @@ impl ProdContainerOrchestrator { continue; } match self.ensure_running_with_mode(&lm, mode).await { + // Desired-state recovery: the app has no container and was left + // "absent" by boot reconcile, BUT it was running at the last + // snapshot — so its container vanished unexpectedly (a wedged + // teardown cleared by a reboot, a lost container record after a + // crash). It isn't user-stopped (those are filtered out of + // `manifests` above) and it's still installed (manifest present), + // so recreate it rather than leave a previously-running app down. + // Match is exact: compute_container_name == the snapshot's podman + // name (incl. each stack member), so no false positives. The only + // "absent" Left reason is the optional-missing case, so this never + // fires for paused/unknown states. + Ok(ReconcileAction::Left(reason)) + if mode == ReconcileMode::ExistingOnly + && reason == "absent" + && was_running.contains(&compute_container_name(&lm.manifest)) => + { + tracing::warn!( + app_id = %app_id, + "previously-running app has no container after boot — recreating (desired-state recovery)" + ); + match self.install_fresh(&lm).await { + Ok(()) => report.record(&app_id, ReconcileAction::Installed), + Err(e) => { + tracing::error!(app_id = %app_id, error = %e, "desired-state recovery (recreate) failed"); + report.failures.push((app_id, e.to_string())); + } + } + } Ok(action) => report.record(&app_id, action), Err(e) => { tracing::error!(app_id = %app_id, error = %e, "reconcile failed"); @@ -2774,6 +2807,10 @@ impl ProdContainerOrchestrator { continue; } + // Whether the bind source already existed BEFORE we (root) create it, + // so the ownership fix-up below only touches a dir we just made. + let source_existed = Path::new(&volume.source).exists(); + let mkdir_status = host_sudo(&["mkdir", "-p", &volume.source]) .await .with_context(|| format!("mkdir {}", volume.source))?; @@ -2784,6 +2821,43 @@ impl ProdContainerOrchestrator { mkdir_status.code() )); } + + // A bind dir we JUST created is owned root:root (mkdir ran via sudo). + // An app that declares no `data_uid` runs as its own root inside the + // container, which rootless Podman maps to the host user running + // archipelago — so a root:root dir is UNWRITABLE from inside and the + // app EACCES-crash-loops the moment it tries to create a subdir + // (observed: immich upload dir `/var/lib/archipelago/immich` after a + // recreate). The in-container ownership self-heal only runs on RUNNING + // containers, so it never fires for an app that crashes on startup. + // Match the new dir to its parent's owner — the rootless data root + // (`/var/lib/archipelago`, owned by the service user) — via + // `--reference`, so there's no host-uid guessing. Only on fresh + // creation, and only when apply_data_uid won't already chown it. + if !source_existed && manifest.app.container.data_uid.is_none() { + if let Some(parent) = Path::new(&volume.source) + .parent() + .map(|p| p.display().to_string()) + { + match host_sudo(&[ + "chown", + &format!("--reference={parent}"), + &volume.source, + ]) + .await + { + Ok(s) if s.success() => {} + Ok(s) => tracing::warn!( + app_id = %manifest.app.id, dir = %volume.source, + "bind-dir ownership match exited {:?} (app may EACCES)", s.code() + ), + Err(e) => tracing::warn!( + app_id = %manifest.app.id, dir = %volume.source, + "bind-dir ownership match failed (non-fatal): {e}" + ), + } + } + } } Ok(()) } diff --git a/core/archipelago/src/crash_recovery.rs b/core/archipelago/src/crash_recovery.rs index 3f2cbe1a..62a1fbda 100644 --- a/core/archipelago/src/crash_recovery.rs +++ b/core/archipelago/src/crash_recovery.rs @@ -61,6 +61,22 @@ pub async fn load_user_stopped(data_dir: &Path) -> std::collections::HashSet std::collections::HashSet { + let path = data_dir.join(CONTAINER_STATE_FILE); + match fs::read_to_string(&path).await { + Ok(content) => match serde_json::from_str::(&content) { + Ok(snapshot) => snapshot.containers.into_iter().map(|c| c.name).collect(), + Err(_) => std::collections::HashSet::new(), + }, + Err(_) => std::collections::HashSet::new(), + } +} + /// Save the set of user-stopped containers to disk. pub async fn save_user_stopped(data_dir: &Path, stopped: &std::collections::HashSet) { let path = data_dir.join(USER_STOPPED_FILE); @@ -898,6 +914,43 @@ mod tests { assert_eq!(containers[1].name, "archy-mempool-web"); } + #[tokio::test] + async fn test_load_last_running_names_reads_snapshot_without_pid_gate() { + let tmp = TempDir::new().unwrap(); + // No PID file written — load_last_running_names must NOT require a crash. + let snapshot = ContainerSnapshot { + timestamp: 1000, + containers: vec![ + RunningContainerRecord { + name: "immich_server".to_string(), + image: "immich:2.7".to_string(), + }, + RunningContainerRecord { + name: "immich_postgres".to_string(), + image: "postgres:16".to_string(), + }, + ], + }; + fs::write( + tmp.path().join(CONTAINER_STATE_FILE), + serde_json::to_string(&snapshot).unwrap(), + ) + .await + .unwrap(); + + let names = load_last_running_names(tmp.path()).await; + assert_eq!(names.len(), 2); + assert!(names.contains("immich_server")); + assert!(names.contains("immich_postgres")); + assert!(!names.contains("immich_redis")); + } + + #[tokio::test] + async fn test_load_last_running_names_empty_when_absent() { + let tmp = TempDir::new().unwrap(); + assert!(load_last_running_names(tmp.path()).await.is_empty()); + } + #[tokio::test] async fn test_write_and_remove_pid_marker() { let tmp = TempDir::new().unwrap();