archy/core/archipelago/src/crash_recovery.rs

// Crash Recovery Module
// Detects unexpected shutdowns and restarts containers that were running before the crash.
//
// How it works:
// 1. On startup, write a PID file as a "running" marker
// 2. Periodically snapshot which containers are running to a state file
// 3. On clean shutdown, remove the PID file
// 4. On next startup, if the PID file exists → previous instance crashed
// 5. On crash recovery: read the saved container list, restart them, log actions

use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::process::Output;
use std::sync::atomic::{AtomicBool, Ordering};
use std::time::{Duration, Instant};
use tokio::fs;
use tracing::{info, warn};

const PID_FILE: &str = "archipelago.pid";
const CONTAINER_STATE_FILE: &str = "running-containers.json";
const USER_STOPPED_FILE: &str = "user-stopped.json";

/// Shared flag: true once boot recovery is complete. Health monitor should wait for this.
pub static RECOVERY_COMPLETE: AtomicBool = AtomicBool::new(false);

/// Process start time for uptime calculation.
static START_TIME: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();

/// Initialize the start time. Call once at startup.
pub fn init_start_time() {
    START_TIME.get_or_init(Instant::now);
}

/// Get uptime in seconds since process start.
pub fn uptime_seconds() -> u64 {
    START_TIME.get().map(|t| t.elapsed().as_secs()).unwrap_or(0)
}

/// Mark boot recovery as complete. Call after crash recovery + start_stopped_containers finish.
pub fn mark_recovery_complete() {
    RECOVERY_COMPLETE.store(true, Ordering::SeqCst);
    info!("Boot recovery complete — health monitor may proceed");
}

/// Check if boot recovery is done.
pub fn is_recovery_complete() -> bool {
    RECOVERY_COMPLETE.load(Ordering::SeqCst)
}

// ── User-stopped tracking ───────────────────────────────────────────────
// When a user explicitly stops a container via the UI, we record it here
// so crash recovery and health monitor don't auto-restart it.

/// Load the set of user-stopped containers from disk.
pub async fn load_user_stopped(data_dir: &Path) -> std::collections::HashSet<String> {
    let path = data_dir.join(USER_STOPPED_FILE);
    match fs::read_to_string(&path).await {
        Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
        Err(_) => std::collections::HashSet::new(),
    }
}

/// Save the set of user-stopped containers to disk.
pub async fn save_user_stopped(data_dir: &Path, stopped: &std::collections::HashSet<String>) {
    let path = data_dir.join(USER_STOPPED_FILE);
    if let Ok(json) = serde_json::to_string_pretty(stopped) {
        let _ = fs::write(&path, json).await;
    }
}

/// Mark a container as user-stopped (won't be auto-restarted).
pub async fn mark_user_stopped(data_dir: &Path, name: &str) {
    let mut stopped = load_user_stopped(data_dir).await;
    stopped.insert(name.to_string());
    save_user_stopped(data_dir, &stopped).await;
}

/// Clear user-stopped flag (container was manually started by user).
pub async fn clear_user_stopped(data_dir: &Path, name: &str) {
    let mut stopped = load_user_stopped(data_dir).await;
    if stopped.remove(name) {
        save_user_stopped(data_dir, &stopped).await;
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RunningContainerRecord {
    pub name: String,
    pub image: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
struct ContainerSnapshot {
    pub timestamp: u64,
    pub containers: Vec<RunningContainerRecord>,
}

/// Check if the previous instance crashed (PID file exists without a clean shutdown).
/// Returns the list of containers that were running before the crash, if any.
pub async fn check_for_crash(data_dir: &Path) -> Result<Option<Vec<RunningContainerRecord>>> {
    let pid_path = data_dir.join(PID_FILE);

    if !pid_path.exists() {
        return Ok(None);
    }

    // PID file exists — previous instance didn't shut down cleanly
    let old_pid = fs::read_to_string(&pid_path)
        .await
        .unwrap_or_default()
        .trim()
        .to_string();
    warn!(
        "Crash detected: previous instance (PID {}) did not shut down cleanly",
        old_pid
    );

    // Check if that PID is actually still running (zombie/stuck process)
    if !old_pid.is_empty() {
        if let Ok(pid) = old_pid.parse::<u32>() {
            if is_process_running(pid) {
                warn!(
                    "Previous process (PID {}) is still running — not a crash, skipping recovery",
                    pid
                );
                // Remove stale PID file and skip recovery
                let _ = fs::remove_file(&pid_path).await;
                return Ok(None);
            }
        }
    }

    // Load the saved container snapshot
    let state_path = data_dir.join(CONTAINER_STATE_FILE);
    let containers = if state_path.exists() {
        match fs::read_to_string(&state_path).await {
            Ok(content) => match serde_json::from_str::<ContainerSnapshot>(&content) {
                Ok(snapshot) => {
                    info!(
                        "Found {} containers from pre-crash snapshot (saved at {})",
                        snapshot.containers.len(),
                        snapshot.timestamp
                    );
                    snapshot.containers
                }
                Err(e) => {
                    warn!("Failed to parse container snapshot: {}", e);
                    Vec::new()
                }
            },
            Err(e) => {
                warn!("Failed to read container snapshot: {}", e);
                Vec::new()
            }
        }
    } else {
        info!("No container snapshot found — cannot determine which containers were running");
        Vec::new()
    };

    // Clean up the stale PID file
    let _ = fs::remove_file(&pid_path).await;

    if containers.is_empty() {
        Ok(None)
    } else {
        Ok(Some(containers))
    }
}

/// Write the PID file to mark the current instance as running.
pub async fn write_pid_marker(data_dir: &Path) -> Result<()> {
    let pid = std::process::id();
    let pid_path = data_dir.join(PID_FILE);
    fs::write(&pid_path, pid.to_string())
        .await
        .context("Failed to write PID marker")?;
    Ok(())
}

/// Remove the PID file on clean shutdown.
pub async fn remove_pid_marker(data_dir: &Path) {
    let pid_path = data_dir.join(PID_FILE);
    if let Err(e) = fs::remove_file(&pid_path).await {
        warn!("Failed to remove PID marker: {}", e);
    }
}

/// Save a snapshot of currently running containers to disk.
/// Called periodically so we know what to restart after a crash.
pub async fn save_container_snapshot(data_dir: &Path) -> Result<()> {
    let mut cmd = tokio::process::Command::new("podman");
    cmd.args(["ps", "--format", "json"]);
    let output = command_with_timeout(cmd, Duration::from_secs(30), "podman ps").await?;

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        anyhow::bail!("podman ps failed: {}", stderr);
    }

    let stdout = String::from_utf8_lossy(&output.stdout);
    let containers: Vec<serde_json::Value> = serde_json::from_str(&stdout).unwrap_or_default();

    let records: Vec<RunningContainerRecord> = containers
        .iter()
        .filter_map(|c| {
            let name = c.get("Names").and_then(|v| {
                // Podman returns Names as an array
                if let Some(arr) = v.as_array() {
                    arr.first().and_then(|n| n.as_str()).map(|s| s.to_string())
                } else {
                    v.as_str().map(|s| s.to_string())
                }
            })?;
            let image = c
                .get("Image")
                .and_then(|v| v.as_str())
                .unwrap_or("unknown")
                .to_string();
            Some(RunningContainerRecord { name, image })
        })
        .collect();

    let snapshot = ContainerSnapshot {
        timestamp: std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap_or_default()
            .as_secs(),
        containers: records,
    };

    let state_path = data_dir.join(CONTAINER_STATE_FILE);
    let json = serde_json::to_string_pretty(&snapshot)
        .context("Failed to serialize container snapshot")?;
    fs::write(&state_path, json)
        .await
        .context("Failed to write container snapshot")?;

    Ok(())
}

/// Recover containers that were running before a crash.
/// Attempts to start each container, logging success/failure.
pub async fn recover_containers(containers: &[RunningContainerRecord]) -> RecoveryReport {
    let mut report = RecoveryReport {
        total: containers.len(),
        recovered: 0,
        failed: Vec::new(),
    };

    for (i, record) in containers.iter().enumerate() {
        info!(
            "Recovering container: {} (image: {})",
            record.name, record.image
        );

        // Rate-limit container starts to avoid overwhelming podman on low-resource systems
        if i > 0 {
            tokio::time::sleep(std::time::Duration::from_secs(3)).await;
        }

        // Try up to 2 attempts with increasing timeout (120s first, 180s retry)
        let mut started = false;
        for attempt in 0..2u32 {
            let timeout_secs = if attempt == 0 { 120 } else { 180 };
            if attempt > 0 {
                info!(
                    "Retrying container {} (attempt {})",
                    record.name,
                    attempt + 1
                );
                tokio::time::sleep(std::time::Duration::from_secs(10)).await;
            }
            let mut cmd = tokio::process::Command::new("podman");
            cmd.args(["start", &record.name]);
            let result = command_with_timeout(
                cmd,
                Duration::from_secs(timeout_secs),
                &format!("podman start {}", record.name),
            )
            .await;

            match result {
                Ok(output) if output.status.success() => {
                    info!("Successfully restarted container: {}", record.name);
                    report.recovered += 1;
                    started = true;
                    break;
                }
                Ok(output) => {
                    let stderr = String::from_utf8_lossy(&output.stderr);
                    warn!(
                        "Failed to restart container {} (attempt {}): {}",
                        record.name,
                        attempt + 1,
                        stderr.trim()
                    );
                }
                Err(e) => {
                    warn!(
                        "Failed to start container {} ({}s, attempt {}): {}",
                        record.name,
                        timeout_secs,
                        attempt + 1,
                        e
                    );
                }
            }
        }
        if !started {
            report.failed.push(record.name.clone());
        }
    }

    report
}

#[derive(Debug)]
pub struct RecoveryReport {
    pub total: usize,
    pub recovered: usize,
    pub failed: Vec<String>,
}

/// Check if a process with the given PID is still running.
fn is_process_running(pid: u32) -> bool {
    // Check /proc/{pid} on Linux
    std::path::Path::new(&format!("/proc/{}", pid)).exists()
}

/// Start all stopped containers that were previously installed.
/// Runs on every startup to ensure containers come back after clean reboots.
/// The crash recovery (PID-based) handles dirty shutdowns; this handles clean ones.
/// Skips containers that the user intentionally stopped via the UI.
pub async fn start_stopped_containers(data_dir: &Path) -> RecoveryReport {
    start_stopped_containers_for(data_dir, false).await
}

/// Start stopped multi-container stack members after the backend is already
/// ready. These can take minutes after a reboot, so they must not block
/// systemd readiness.
pub async fn start_stopped_stack_containers(data_dir: &Path) -> RecoveryReport {
    start_stopped_app_stacks(data_dir).await
}

async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport {
    let user_stopped = load_user_stopped(data_dir).await;
    let mut report = RecoveryReport {
        total: 0,
        recovered: 0,
        failed: Vec::new(),
    };

    for stack in stack_recovery_specs() {
        if !stack_has_any_container(stack).await {
            continue;
        }

        info!(
            "Recovering stopped {} stack containers after boot",
            stack.name
        );
        repair_stack_network_aliases(stack).await;

        for container in stack.containers {
            if user_stopped.contains(*container) {
                info!("Skipping user-stopped container: {}", container);
                continue;
            }

            match container_state(container).await {
                Some(state) if state == "running" => continue,
                Some(_) => {}
                None => continue,
            }

            repair_stack_network_aliases(stack).await;
            wait_before_stack_container_recovery(stack, container).await;

            report.total += 1;
            if start_existing_container(container).await {
                report.recovered += 1;
            } else {
                report.failed.push((*container).to_string());
            }
        }
    }

    report
}

async fn wait_before_stack_container_recovery(stack: &StackRecoverySpec, container: &str) {
    if stack.name != "indeedhub" || container != "indeedhub" {
        return;
    }

    for _ in 0..60 {
        if indeedhub_recovery_dependencies_running().await {
            repair_stack_network_aliases(stack).await;
            break;
        }
        tokio::time::sleep(Duration::from_secs(2)).await;
    }

    for _ in 0..60 {
        let ready = podman_output(
            &["exec", "indeedhub-api", "getent", "hosts", "minio"],
            Duration::from_secs(5),
        )
        .await
        .map(|output| output.status.success())
        .unwrap_or(false);
        if ready {
            return;
        }
        tokio::time::sleep(Duration::from_secs(2)).await;
    }
}

async fn indeedhub_recovery_dependencies_running() -> bool {
    for name in ["indeedhub-redis", "indeedhub-minio", "indeedhub-api"] {
        if container_state(name).await.as_deref() != Some("running") {
            return false;
        }
    }
    true
}

async fn start_stopped_containers_for(
    data_dir: &Path,
    include_stack_members: bool,
) -> RecoveryReport {
    let mut cmd = tokio::process::Command::new("podman");
    cmd.args([
        "ps",
        "-a",
        "--filter",
        "status=exited",
        "--filter",
        "status=created",
        "--format",
        "{{.Names}}",
    ]);
    let output = match command_with_timeout(cmd, Duration::from_secs(60), "podman ps stopped").await
    {
        Ok(output) => output,
        Err(e) => {
            warn!("Failed listing stopped containers: {}", e);
            return RecoveryReport {
                total: 0,
                recovered: 0,
                failed: Vec::new(),
            };
        }
    };

    let all_names: Vec<String> = if output.status.success() {
        String::from_utf8_lossy(&output.stdout)
            .lines()
            .filter(|l| !l.is_empty())
            .map(|s| s.to_string())
            .collect()
    } else {
        Vec::new()
    };

    if all_names.is_empty() {
        return RecoveryReport {
            total: 0,
            recovered: 0,
            failed: Vec::new(),
        };
    }

    // Filter out user-stopped containers
    let user_stopped = load_user_stopped(data_dir).await;
    let names: Vec<String> = all_names
        .into_iter()
        .filter(|n| {
            if user_stopped.contains(n) {
                info!("Skipping user-stopped container: {}", n);
                false
            } else {
                true
            }
        })
        .collect();

    if names.is_empty() {
        return RecoveryReport {
            total: 0,
            recovered: 0,
            failed: Vec::new(),
        };
    }

    let names: Vec<String> = names
        .into_iter()
        .filter(|n| should_auto_start_stopped_container(n, include_stack_members))
        .collect();

    if names.is_empty() {
        return RecoveryReport {
            total: 0,
            recovered: 0,
            failed: Vec::new(),
        };
    }

    // Sort by startup tier: databases first, then core, then dependent services, then apps
    let mut records: Vec<RunningContainerRecord> = names
        .iter()
        .map(|n| RunningContainerRecord {
            name: n.clone(),
            image: String::new(),
        })
        .collect();
    records.sort_by_key(|r| container_boot_tier(&r.name));

    info!(
        "Starting {} stopped containers after boot (skipped {} user-stopped)...",
        records.len(),
        user_stopped.len()
    );
    recover_containers(&records).await
}

fn should_auto_start_stopped_container(name: &str, include_stack_members: bool) -> bool {
    // Keep generic boot recovery narrow. The Rust manifest reconciler owns
    // managed app stacks; starting every exited Podman container here races
    // it and resurrects legacy/orphan helper containers.
    if matches!(name, "filebrowser" | "nostr-rs-relay") {
        return true;
    }
    include_stack_members
        && matches!(
            name,
            "immich_postgres"
                | "immich_redis"
                | "immich_server"
                | "indeedhub-postgres"
                | "indeedhub-redis"
                | "indeedhub-minio"
                | "indeedhub-relay"
                | "indeedhub-api"
                | "indeedhub-ffmpeg"
                | "indeedhub"
                | "netbird-server"
                | "netbird-dashboard"
                | "netbird"
                | "saleor-db"
                | "saleor-cache"
                | "saleor-jaeger"
                | "saleor-mailpit"
                | "saleor-api"
                | "saleor-worker"
                | "saleor"
                | "saleor-storefront"
                | "saleor-storefront-app"
        )
}

struct StackRecoverySpec {
    name: &'static str,
    network: &'static str,
    aliases: &'static [(&'static str, &'static str)],
    containers: &'static [&'static str],
}

fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
    &[
        StackRecoverySpec {
            name: "immich",
            network: "immich-net",
            aliases: &[
                ("immich_postgres", "immich_postgres"),
                ("immich_redis", "immich_redis"),
                ("immich_server", "immich_server"),
            ],
            containers: &["immich_postgres", "immich_redis", "immich_server"],
        },
        StackRecoverySpec {
            name: "indeedhub",
            network: "indeedhub-net",
            aliases: &[
                ("indeedhub-postgres", "postgres"),
                ("indeedhub-redis", "redis"),
                ("indeedhub-minio", "minio"),
                ("indeedhub-relay", "relay"),
                ("indeedhub-api", "api"),
                ("indeedhub", "indeedhub"),
            ],
            containers: &[
                "indeedhub-postgres",
                "indeedhub-redis",
                "indeedhub-minio",
                "indeedhub-relay",
                "indeedhub-api",
                "indeedhub-ffmpeg",
                "indeedhub",
            ],
        },
        StackRecoverySpec {
            name: "netbird",
            network: "netbird-net",
            aliases: &[
                ("netbird-server", "netbird-server"),
                ("netbird-dashboard", "netbird-dashboard"),
                ("netbird", "netbird"),
            ],
            containers: &["netbird-server", "netbird-dashboard", "netbird"],
        },
        StackRecoverySpec {
            name: "saleor",
            network: "saleor-net",
            aliases: &[
                ("saleor-db", "db"),
                ("saleor-cache", "cache"),
                ("saleor-jaeger", "jaeger"),
                ("saleor-mailpit", "mailpit"),
                ("saleor-api", "api"),
                ("saleor-worker", "worker"),
                ("saleor", "saleor"),
                ("saleor-storefront", "storefront"),
                ("saleor-storefront-app", "storefront-app"),
            ],
            containers: &[
                "saleor-db",
                "saleor-cache",
                "saleor-jaeger",
                "saleor-mailpit",
                "saleor-api",
                "saleor-worker",
                "saleor",
                "saleor-storefront",
                "saleor-storefront-app",
            ],
        },
    ]
}

async fn stack_has_any_container(stack: &StackRecoverySpec) -> bool {
    for container in stack.containers {
        if container_state(container).await.is_some() {
            return true;
        }
    }
    false
}

async fn repair_stack_network_aliases(stack: &StackRecoverySpec) {
    let _ = podman_status(
        &["network", "create", stack.network],
        Duration::from_secs(15),
    )
    .await;

    for (container, alias) in stack.aliases {
        if container_state(container).await.is_none() {
            continue;
        }
        if network_alias_present(stack.network, container, alias).await {
            continue;
        }

        let _ = podman_status(
            &["network", "disconnect", "-f", stack.network, container],
            Duration::from_secs(15),
        )
        .await;
        let _ = podman_status(
            &[
                "network",
                "connect",
                "--alias",
                alias,
                stack.network,
                container,
            ],
            Duration::from_secs(15),
        )
        .await;
    }
}

async fn network_alias_present(network_name: &str, container: &str, alias: &str) -> bool {
    let output = match podman_output(
        &[
            "inspect",
            container,
            "--format",
            "{{json .NetworkSettings.Networks}}",
        ],
        Duration::from_secs(10),
    )
    .await
    {
        Ok(output) if output.status.success() => output,
        _ => return false,
    };

    let Ok(networks) = serde_json::from_slice::<serde_json::Value>(&output.stdout) else {
        return false;
    };
    networks
        .get(network_name)
        .and_then(|network| network.get("Aliases"))
        .and_then(|aliases| aliases.as_array())
        .map(|aliases| aliases.iter().any(|value| value.as_str() == Some(alias)))
        .unwrap_or(false)
}

async fn container_state(container: &str) -> Option<String> {
    let output = podman_output(
        &["inspect", container, "--format", "{{.State.Status}}"],
        Duration::from_secs(10),
    )
    .await
    .ok()?;
    output
        .status
        .success()
        .then(|| String::from_utf8_lossy(&output.stdout).trim().to_string())
}

async fn start_existing_container(container: &str) -> bool {
    info!("Recovering stack container: {}", container);
    let timeout = match container {
        "immich_server" | "netbird-server" => Duration::from_secs(120),
        _ => Duration::from_secs(90),
    };
    if container_state(container).await.as_deref() == Some("initialized") {
        cleanup_container_runtime_state(container).await;
    }
    match podman_output(&["start", container], timeout).await {
        Ok(output) if output.status.success() => {
            tokio::time::sleep(Duration::from_secs(3)).await;
            if container_state(container).await.as_deref() == Some("exited") {
                warn!("Stack container {} exited shortly after start", container);
                false
            } else {
                info!("Successfully recovered stack container: {}", container);
                true
            }
        }
        Ok(output) => {
            let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
            if stderr.contains("exec.fifo") || stderr.contains("failed to start container") {
                cleanup_container_runtime_state(container).await;
                if let Ok(retry) = podman_output(&["start", container], timeout).await {
                    if retry.status.success() {
                        info!(
                            "Successfully recovered stack container after cleanup: {}",
                            container
                        );
                        return true;
                    }
                    warn!(
                        "Failed to recover stack container {} after cleanup: {}",
                        container,
                        String::from_utf8_lossy(&retry.stderr).trim()
                    );
                    return false;
                }
            }
            warn!(
                "Failed to recover stack container {}: {}",
                container, stderr
            );
            false
        }
        Err(e) => {
            warn!("Failed to recover stack container {}: {}", container, e);
            false
        }
    }
}

async fn cleanup_container_runtime_state(container: &str) {
    let _ = podman_output(
        &["container", "cleanup", container],
        Duration::from_secs(30),
    )
    .await;
}

async fn podman_status(args: &[&str], timeout: Duration) -> Option<std::process::ExitStatus> {
    podman_output(args, timeout)
        .await
        .ok()
        .map(|output| output.status)
}

async fn podman_output(args: &[&str], timeout: Duration) -> Result<Output> {
    let mut cmd = tokio::process::Command::new("podman");
    cmd.args(args);
    command_with_timeout(cmd, timeout, &format!("podman {}", args.join(" "))).await
}

/// Simple tier ordering for boot recovery (mirrors health_monitor tiers).
fn container_boot_tier(name: &str) -> u8 {
    let id = name.strip_prefix("archy-").unwrap_or(name);
    match id {
        // Tier 0: Databases and data stores
        "btcpay-db" | "mempool-db" | "mysql-mempool" | "penpot-postgres" | "immich_postgres"
        | "immich_redis" | "penpot-valkey" | "endurain-db" | "nextcloud-db"
        | "indeedhub-postgres" | "indeedhub-redis" | "indeedhub-minio" => 0,
        // Tier 1: Core infrastructure
        "bitcoin-knots" | "bitcoin-core" | "bitcoin" => 1,
        // Tier 2: Dependent services
        "lnd" | "electrumx" | "mempool-electrs" | "electrs" | "nbxplorer" | "mempool-api"
        | "indeedhub-api" => 2,
        // Tier 4: Frontend/UI
        "mempool-web" | "bitcoin-ui" | "lnd-ui" | "electrs-ui" | "penpot-frontend"
        | "penpot-exporter" | "indeedhub" => 4,
        // Tier 3: Everything else
        _ => 3,
    }
}

/// Run the reconciliation script after boot to fix any config drift.
/// Ensures all containers match their canonical specs from container-specs.sh.
#[allow(dead_code)]
pub async fn run_boot_reconciliation() {
    let script = "/home/archipelago/archy/scripts/reconcile-containers.sh";
    if !std::path::Path::new(script).exists() {
        info!("Reconciliation script not found (dev mode?) — skipping boot reconciliation");
        return;
    }
    info!("Running boot reconciliation...");
    let cmd = tokio::process::Command::new(script);
    let result = command_with_timeout(cmd, Duration::from_secs(300), script).await;
    match result {
        Ok(output) if output.status.success() => {
            info!("Boot reconciliation complete");
        }
        Ok(output) => {
            let stderr = String::from_utf8_lossy(&output.stderr);
            warn!(
                "Boot reconciliation had failures: {}",
                stderr.chars().take(500).collect::<String>()
            );
        }
        Err(e) => warn!("Boot reconciliation failed: {}", e),
    }
}

async fn command_with_timeout(
    mut cmd: tokio::process::Command,
    timeout: Duration,
    description: &str,
) -> Result<Output> {
    cmd.kill_on_drop(true);
    tokio::time::timeout(timeout, cmd.output())
        .await
        .with_context(|| format!("{} timed out after {}s", description, timeout.as_secs()))?
        .with_context(|| format!("Failed to run {}", description))
}

/// Spawn a background task that periodically saves the container snapshot.
pub fn spawn_snapshot_task(data_dir: PathBuf) {
    tokio::spawn(async move {
        // Wait 2 minutes before first snapshot (let crash recovery finish and containers stabilize)
        tokio::time::sleep(std::time::Duration::from_secs(120)).await;

        let mut interval = tokio::time::interval(std::time::Duration::from_secs(120));
        interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
        loop {
            interval.tick().await;
            if let Err(e) = save_container_snapshot(&data_dir).await {
                tracing::debug!("Container snapshot (non-fatal): {}", e);
            }
        }
    });
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::TempDir;

    #[tokio::test]
    async fn test_no_crash_without_pid_file() {
        let tmp = TempDir::new().unwrap();
        let result = check_for_crash(tmp.path()).await.unwrap();
        assert!(result.is_none());
    }

    #[tokio::test]
    async fn test_crash_detected_with_pid_file() {
        let tmp = TempDir::new().unwrap();
        // Write a PID file with a non-existent PID
        fs::write(tmp.path().join(PID_FILE), "999999999")
            .await
            .unwrap();
        let result = check_for_crash(tmp.path()).await.unwrap();
        // No snapshot file → crash detected but no containers to recover
        assert!(result.is_none());
    }

    #[tokio::test]
    async fn test_crash_with_snapshot() {
        let tmp = TempDir::new().unwrap();
        // Write PID file
        fs::write(tmp.path().join(PID_FILE), "999999999")
            .await
            .unwrap();
        // Write container snapshot
        let snapshot = ContainerSnapshot {
            timestamp: 1000,
            containers: vec![
                RunningContainerRecord {
                    name: "archy-bitcoin-knots".to_string(),
                    image: "bitcoin-knots:27.1".to_string(),
                },
                RunningContainerRecord {
                    name: "archy-mempool-web".to_string(),
                    image: "mempool/frontend:3.0".to_string(),
                },
            ],
        };
        let json = serde_json::to_string(&snapshot).unwrap();
        fs::write(tmp.path().join(CONTAINER_STATE_FILE), json)
            .await
            .unwrap();

        let result = check_for_crash(tmp.path()).await.unwrap();
        assert!(result.is_some());
        let containers = result.unwrap();
        assert_eq!(containers.len(), 2);
        assert_eq!(containers[0].name, "archy-bitcoin-knots");
        assert_eq!(containers[1].name, "archy-mempool-web");
    }

    #[tokio::test]
    async fn test_write_and_remove_pid_marker() {
        let tmp = TempDir::new().unwrap();
        write_pid_marker(tmp.path()).await.unwrap();
        assert!(tmp.path().join(PID_FILE).exists());

        remove_pid_marker(tmp.path()).await;
        assert!(!tmp.path().join(PID_FILE).exists());
    }

    #[tokio::test]
    async fn test_pid_marker_contains_current_pid() {
        let tmp = TempDir::new().unwrap();
        write_pid_marker(tmp.path()).await.unwrap();
        let content = fs::read_to_string(tmp.path().join(PID_FILE)).await.unwrap();
        let saved_pid: u32 = content.parse().unwrap();
        assert_eq!(saved_pid, std::process::id());
    }

    #[tokio::test]
    async fn test_clean_shutdown_no_crash_on_restart() {
        let tmp = TempDir::new().unwrap();

        // Simulate: startup → write PID → clean shutdown → remove PID
        write_pid_marker(tmp.path()).await.unwrap();
        remove_pid_marker(tmp.path()).await;

        // Next startup should detect no crash
        let result = check_for_crash(tmp.path()).await.unwrap();
        assert!(result.is_none());
    }

    #[tokio::test]
    async fn test_corrupt_snapshot_handled() {
        let tmp = TempDir::new().unwrap();
        fs::write(tmp.path().join(PID_FILE), "999999999")
            .await
            .unwrap();
        fs::write(tmp.path().join(CONTAINER_STATE_FILE), "not valid json")
            .await
            .unwrap();

        // Should not crash, returns None (no recoverable containers)
        let result = check_for_crash(tmp.path()).await.unwrap();
        assert!(result.is_none());
    }

    #[test]
    fn generic_boot_recovery_skips_manifest_owned_and_legacy_stacks() {
        assert!(should_auto_start_stopped_container("filebrowser", false));
        assert!(should_auto_start_stopped_container("nostr-rs-relay", false));
        assert!(!should_auto_start_stopped_container("bitcoin-knots", false));
        assert!(!should_auto_start_stopped_container("lnd", false));
        assert!(!should_auto_start_stopped_container(
            "indeedhub-postgres",
            false
        ));
        assert!(should_auto_start_stopped_container(
            "indeedhub-postgres",
            true
        ));
    }
}