diff --git a/core/archipelago/src/api/rpc/package/install.rs b/core/archipelago/src/api/rpc/package/install.rs index 3fd8547a..927e796b 100644 --- a/core/archipelago/src/api/rpc/package/install.rs +++ b/core/archipelago/src/api/rpc/package/install.rs @@ -222,12 +222,12 @@ impl RpcHandler { // Pre-install: bitcoin.conf with rpcauth if matches!(package_id, "bitcoin" | "bitcoin-core" | "bitcoin-knots") { - self.write_bitcoin_conf(&rpc_user, &rpc_pass).await; + self.write_bitcoin_conf(&rpc_user, &rpc_pass).await?; } // Pre-install: lnd.conf with Bitcoin RPC credentials if package_id == "lnd" { - self.write_lnd_conf(&rpc_user, &rpc_pass).await; + self.write_lnd_conf(&rpc_user, &rpc_pass).await?; } // Pre-install: SearXNG settings.yml (required or container exits immediately) @@ -241,7 +241,12 @@ impl RpcHandler { "use_default_settings: true\ngeneral:\n instance_name: Archipelago Search\nserver:\n secret_key: \"{}\"\n bind_address: \"0.0.0.0\"\n port: 8080\n limiter: false\nui:\n default_theme: simple\n", secret_hex ); - let _ = tokio::fs::write(&settings_path, settings).await; + tokio::fs::create_dir_all(searx_dir) + .await + .context("Failed to create SearXNG config directory")?; + tokio::fs::write(&settings_path, settings) + .await + .context("Failed to write SearXNG settings.yml")?; info!("Created SearXNG settings.yml"); } } @@ -580,7 +585,7 @@ impl RpcHandler { } /// Write bitcoin.conf with rpcauth (salted HMAC hash, no plaintext password). - async fn write_bitcoin_conf(&self, rpc_user: &str, rpc_pass: &str) { + async fn write_bitcoin_conf(&self, rpc_user: &str, rpc_pass: &str) -> Result<()> { let bitcoin_dir = "/var/lib/archipelago/bitcoin"; let conf_path = format!("{}/bitcoin.conf", bitcoin_dir); @@ -607,20 +612,25 @@ listen=1\n\ printtoconsole=1\n", rpcauth_line ); - let _ = tokio::fs::create_dir_all(bitcoin_dir).await; - let _ = tokio::fs::write(&conf_path, bitcoin_conf).await; + tokio::fs::create_dir_all(bitcoin_dir) + .await + .context("Failed to create bitcoin data directory")?; + tokio::fs::write(&conf_path, bitcoin_conf) + .await + .context("Failed to write bitcoin.conf")?; info!("Created bitcoin.conf with rpcauth (no plaintext credentials)"); + Ok(()) } /// Write LND config file with Bitcoin RPC credentials. - async fn write_lnd_conf(&self, rpc_user: &str, rpc_pass: &str) { + async fn write_lnd_conf(&self, rpc_user: &str, rpc_pass: &str) -> Result<()> { let lnd_dir = "/var/lib/archipelago/lnd"; let conf_path = format!("{}/lnd.conf", lnd_dir); // Don't overwrite existing config (user may have customized it) if tokio::fs::try_exists(&conf_path).await.unwrap_or(false) { info!("lnd.conf already exists, skipping write"); - return; + return Ok(()); } let lnd_conf = format!( @@ -648,24 +658,25 @@ autopilot.active=false\n", user = rpc_user, pass = rpc_pass, ); - let _ = tokio::fs::create_dir_all(lnd_dir).await; - let _ = tokio::fs::write(&conf_path, lnd_conf).await; + tokio::fs::create_dir_all(lnd_dir) + .await + .context("Failed to create LND data directory")?; + tokio::fs::write(&conf_path, lnd_conf) + .await + .context("Failed to write lnd.conf")?; info!("Created lnd.conf with Bitcoin RPC credentials"); + Ok(()) } /// Run post-install hooks (Nextcloud trusted domains, Bitcoin UI container). /// Critical hooks (credential setup, config) are awaited; UI container builds are background. async fn run_post_install_hooks(&self, package_id: &str) { if package_id == "filebrowser" { - // Wait for filebrowser to start and initialize its database - tokio::time::sleep(std::time::Duration::from_secs(5)).await; - // Generate a random password (32 bytes, hex-encoded) let mut buf = [0u8; 32]; rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf); let password = hex::encode(buf); - // Get a JWT token with default credentials let client = match reqwest::Client::builder() .timeout(std::time::Duration::from_secs(10)) .build() @@ -677,53 +688,72 @@ autopilot.active=false\n", } }; - let login_res = client - .post("http://127.0.0.1:8083/api/login") - .json(&serde_json::json!({"username": "admin", "password": "admin"})) - .send() - .await; + // Retry loop: FileBrowser may take time to initialize its SQLite database + let mut password_changed = false; + for attempt in 0..6u32 { + let delay = if attempt == 0 { 5 } else { 10 }; + tokio::time::sleep(std::time::Duration::from_secs(delay)).await; - let token = match login_res { - Ok(resp) if resp.status().is_success() => { - match resp.text().await { - Ok(t) => t.trim_matches('"').to_string(), - Err(e) => { - tracing::warn!("FileBrowser login response parse failed: {}", e); - return; + // Try to log in with default credentials + let login_res = client + .post("http://127.0.0.1:8083/api/login") + .json(&serde_json::json!({"username": "admin", "password": "admin"})) + .send() + .await; + + let token = match login_res { + Ok(resp) if resp.status().is_success() => { + match resp.text().await { + Ok(t) => t.trim_matches('"').to_string(), + Err(_) => continue, } } - } - _ => { - tracing::warn!("FileBrowser not ready for password change — keeping default"); - return; - } - }; + _ => { + debug!("FileBrowser not ready (attempt {}/6)", attempt + 1); + continue; + } + }; - // Change admin password via filebrowser API - let change_res = client - .put("http://127.0.0.1:8083/api/users/1") - .header("X-Auth", &token) - .json(&serde_json::json!({"password": password})) - .send() - .await; + // Change admin password + let change_res = client + .put("http://127.0.0.1:8083/api/users/1") + .header("X-Auth", &token) + .json(&serde_json::json!({"password": password})) + .send() + .await; - match change_res { - Ok(resp) if resp.status().is_success() => { - let secret_dir = "/var/lib/archipelago/secrets/filebrowser"; - let _ = tokio::fs::create_dir_all(secret_dir).await; - let _ = tokio::fs::write( - format!("{}/password", secret_dir), - &password, - ).await; - info!("FileBrowser admin password secured (default credentials replaced)"); - } - Ok(resp) => { - tracing::warn!("FileBrowser password change failed: {}", resp.status()); - } - Err(e) => { - tracing::warn!("FileBrowser password change error: {}", e); + match change_res { + Ok(resp) if resp.status().is_success() => { + let secret_dir = "/var/lib/archipelago/secrets/filebrowser"; + if let Err(e) = tokio::fs::create_dir_all(secret_dir).await { + tracing::warn!("Failed to create filebrowser secrets dir: {}", e); + } + let pw_path = format!("{}/password", secret_dir); + if let Err(e) = tokio::fs::write(&pw_path, &password).await { + tracing::warn!("Failed to write filebrowser password: {}", e); + } + // Set restrictive permissions on the password file + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = std::fs::set_permissions( + &pw_path, + std::fs::Permissions::from_mode(0o600), + ); + } + info!("FileBrowser admin password secured (default credentials replaced)"); + password_changed = true; + break; + } + _ => continue, } } + if !password_changed { + tracing::warn!( + "FileBrowser password could not be changed after 6 attempts — \ + default credentials (admin/admin) remain active" + ); + } } if package_id == "nextcloud" { diff --git a/core/archipelago/src/api/rpc/package/runtime.rs b/core/archipelago/src/api/rpc/package/runtime.rs index 4206f950..438d0143 100644 --- a/core/archipelago/src/api/rpc/package/runtime.rs +++ b/core/archipelago/src/api/rpc/package/runtime.rs @@ -48,7 +48,11 @@ impl RpcHandler { install_log(&format!("START: {} (containers: {:?})", package_id, to_start)).await; let mut errors = Vec::new(); - for name in &to_start { + for (i, name) in to_start.iter().enumerate() { + // Brief delay between dependent containers to allow initialization + if i > 0 { + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } tracing::info!("Starting container: {}", name); let out = tokio::process::Command::new("podman") .args(["start", name]) @@ -66,6 +70,45 @@ impl RpcHandler { if !errors.is_empty() { return Err(anyhow::anyhow!("Start failed: {}", errors.join("; "))); } + + // Verify containers actually reached running state (podman start can + // succeed even if the container exits immediately after) + tokio::time::sleep(std::time::Duration::from_secs(3)).await; + for name in &to_start { + let status = tokio::process::Command::new("podman") + .args(["inspect", name, "--format", "{{.State.Status}}"]) + .output() + .await; + if let Ok(o) = status { + let state = String::from_utf8_lossy(&o.stdout).trim().to_string(); + if state == "exited" { + let logs = tokio::process::Command::new("podman") + .args(["logs", "--tail", "5", name]) + .output() + .await; + let log_text = logs + .map(|o| { + let combined = format!( + "{}{}", + String::from_utf8_lossy(&o.stdout), + String::from_utf8_lossy(&o.stderr) + ); + combined.chars().take(200).collect::() + }) + .unwrap_or_default(); + tracing::error!("Container {} exited after start: {}", name, log_text); + install_log(&format!("START EXITED: {} — {}", name, log_text)).await; + errors.push(format!("{}: exited after start", name)); + } + } + } + + if !errors.is_empty() { + return Err(anyhow::anyhow!( + "Containers exited after start: {}", + errors.join("; ") + )); + } Ok(serde_json::Value::Null) } diff --git a/image-recipe/build-auto-installer-iso.sh b/image-recipe/build-auto-installer-iso.sh index 440b73e2..1d901af0 100755 --- a/image-recipe/build-auto-installer-iso.sh +++ b/image-recipe/build-auto-installer-iso.sh @@ -1368,12 +1368,14 @@ FBUNBUNDLED cat > "$WORK_DIR/archipelago-first-boot-containers.service" <<'FBCSERVICE' [Unit] Description=Create core Archipelago containers on first boot -After=archipelago-setup-tor.service network-online.target podman.service +After=archipelago-load-images.service archipelago-setup-tor.service network-online.target podman.service +Wants=archipelago-load-images.service ConditionPathExists=/opt/archipelago/scripts/first-boot-containers.sh ConditionPathExists=!/var/lib/archipelago/.first-boot-containers-done [Service] Type=oneshot +TimeoutStartSec=900 ExecStart=/opt/archipelago/scripts/first-boot-containers.sh ExecStartPost=/usr/bin/touch /var/lib/archipelago/.first-boot-containers-done RemainAfterExit=yes @@ -1395,12 +1397,14 @@ else cat > "$WORK_DIR/archipelago-first-boot-containers.service" <<'FBCSERVICE' [Unit] Description=Create core Archipelago containers on first boot -After=archipelago-setup-tor.service network-online.target podman.service +After=archipelago-load-images.service archipelago-setup-tor.service network-online.target podman.service +Wants=archipelago-load-images.service ConditionPathExists=/opt/archipelago/scripts/first-boot-containers.sh ConditionPathExists=!/var/lib/archipelago/.first-boot-containers-done [Service] Type=oneshot +TimeoutStartSec=900 ExecStart=/opt/archipelago/scripts/first-boot-containers.sh ExecStartPost=/usr/bin/touch /var/lib/archipelago/.first-boot-containers-done RemainAfterExit=yes diff --git a/scripts/container-specs.sh b/scripts/container-specs.sh index 77907582..ae9ffa17 100755 --- a/scripts/container-specs.sh +++ b/scripts/container-specs.sh @@ -412,11 +412,13 @@ load_spec_searxng() { SPEC_IMAGE="${SEARXNG_IMAGE}" SPEC_PORTS="8888:8080" SPEC_MEMORY="$(mem_limit searxng)" + SPEC_VOLUMES="/var/lib/archipelago/searxng:/etc/searxng" SPEC_HEALTH_CMD="curl -sf http://localhost:8080/ || exit 1" SPEC_READONLY="true" - SPEC_TMPFS="/tmp:rw,noexec,nosuid,size=256m /run:rw,noexec,nosuid,size=64m /etc/searxng:rw,noexec,nosuid,size=16m" + SPEC_TMPFS="/tmp:rw,noexec,nosuid,size=256m /run:rw,noexec,nosuid,size=64m" SPEC_TIER="3" SPEC_CAPS="" + SPEC_DATA_DIR="/var/lib/archipelago/searxng" } load_spec_onlyoffice() { diff --git a/scripts/first-boot-containers.sh b/scripts/first-boot-containers.sh index 8f7ba09d..1ce46f65 100644 --- a/scripts/first-boot-containers.sh +++ b/scripts/first-boot-containers.sh @@ -233,8 +233,19 @@ chmod 700 /run/user/1000 runuser -u archipelago -- env XDG_RUNTIME_DIR=/run/user/1000 \ systemctl --user start podman.socket 2>/dev/null || true -# Ensure network exists (matches deploy) +# Ensure archy-net exists — critical for inter-container DNS (mempool→bitcoin, etc.) $DOCKER network create archy-net 2>/dev/null || true +if ! $DOCKER network exists archy-net 2>/dev/null; then + log "WARNING: archy-net creation failed, retrying in 5s..." + sleep 5 + $DOCKER network create archy-net 2>>"$LOG" + if ! $DOCKER network exists archy-net 2>/dev/null; then + log "FATAL: Cannot create archy-net — inter-container DNS will not work." + log " All containers requiring archy-net will fail. Exiting." + exit 1 + fi +fi +log "archy-net network ready" # Rootless podman UID mapping: fix data dir ownership so container processes # can write. Rootless podman maps container UIDs via subuid (container UID N @@ -299,6 +310,43 @@ mem_limit() { esac } +# ── Verify critical images are loaded ────────────────────────────────── +# archipelago-load-images.service should have loaded these from tarballs. +# If any are missing (corrupt tarball, disk full, etc.), try re-loading. +log "Verifying container images..." +MISSING_IMAGES="" +for img_var in BITCOIN_KNOTS_IMAGE MARIADB_IMAGE ELECTRUMX_IMAGE \ + MEMPOOL_BACKEND_IMAGE MEMPOOL_WEB_IMAGE BTCPAY_POSTGRES_IMAGE \ + NBXPLORER_IMAGE BTCPAY_IMAGE LND_IMAGE FEDIMINT_IMAGE \ + FEDIMINT_GATEWAY_IMAGE HOMEASSISTANT_IMAGE GRAFANA_IMAGE \ + UPTIME_KUMA_IMAGE JELLYFIN_IMAGE VAULTWARDEN_IMAGE \ + NEXTCLOUD_IMAGE SEARXNG_IMAGE FILEBROWSER_IMAGE; do + img="${!img_var}" + if [ -z "$img" ]; then + continue # Variable not defined in image-versions.sh + fi + if ! $DOCKER images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null | grep -qF "$img"; then + MISSING_IMAGES="$MISSING_IMAGES $img_var" + fi +done +if [ -n "$MISSING_IMAGES" ]; then + log "WARNING: Missing images:$MISSING_IMAGES" + log "Attempting to re-load from /opt/archipelago/container-images/..." + RELOAD_COUNT=0 + for tarfile in /opt/archipelago/container-images/*.tar; do + if [ -f "$tarfile" ]; then + if $DOCKER load -i "$tarfile" 2>>"$LOG"; then + RELOAD_COUNT=$((RELOAD_COUNT + 1)) + else + log " Failed to load: $tarfile" + fi + fi + done + log "Re-loaded $RELOAD_COUNT image tarballs" +else + log "All critical images verified" +fi + # ── Tier 1: Databases & Core Infrastructure ────────────────────────────── log "=== Tier 1: Databases & Core Infrastructure ===" @@ -337,13 +385,16 @@ else $DOCKER network connect archy-net bitcoin-knots 2>/dev/null || true log "Bitcoin Knots already running" fi -# Wait for Bitcoin Knots RPC to be responsive +# Check Bitcoin Knots RPC (informational — containers created regardless) +# Dependent containers use --restart=unless-stopped and the health monitor +# will auto-restart them once Bitcoin becomes responsive. if wait_for_container "Bitcoin Knots RPC" "$DOCKER exec bitcoin-knots bitcoin-cli -rpcuser='$BITCOIN_RPC_USER' -rpcpassword='$BITCOIN_RPC_PASS' getblockchaininfo" 60; then BITCOIN_READY=true - log "Bitcoin Knots is ready — dependent containers will proceed" + log "Bitcoin Knots is ready" else BITCOIN_READY=false - log "WARNING: Bitcoin Knots NOT ready — skipping dependent containers (electrumx, lnd, mempool, btcpay, fedimint)" + log "Bitcoin Knots not yet responsive (normal during IBD) — creating dependent containers anyway" + log " They will auto-restart via health monitor once Bitcoin is ready" fi track_container "bitcoin-knots" @@ -355,7 +406,8 @@ if ! $DOCKER exec bitcoin-knots bitcoin-cli "-rpcuser=$BITCOIN_RPC_USER" "-rpcpa fi # 2. Mempool stack (matches deploy) — depends on Bitcoin -if [ "$BITCOIN_READY" = "true" ]; then +# Note: containers created regardless of BITCOIN_READY — they will restart +# automatically once Bitcoin becomes responsive (--restart=unless-stopped). if ! $DOCKER ps -a --format '{{.Names}}' 2>/dev/null | grep -qE 'archy-mempool-db|mysql-mempool'; then log "Creating mysql-mempool..." mkdir -p /var/lib/archipelago/mysql-mempool @@ -624,9 +676,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q fedimint-gateway; th fi track_container "fedimint-gateway" -else - log "SKIPPED: mempool stack, electrumx, btcpay stack, lnd, fedimint (Bitcoin not ready)" -fi # end BITCOIN_READY +# (Bitcoin-dependent containers created above regardless of BITCOIN_READY) # ── Tier 3: Applications (independent — always attempt) ─────────────────── log "=== Tier 3: Applications ===" @@ -742,12 +792,33 @@ fi track_container "nextcloud" if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q searxng; then log "Creating SearXNG..." + # SearXNG requires settings.yml or it exits immediately + SEARXNG_CONF="/var/lib/archipelago/searxng" + if [ ! -f "$SEARXNG_CONF/settings.yml" ]; then + mkdir -p "$SEARXNG_CONF" + SEARX_SECRET=$(openssl rand -hex 32) + cat > "$SEARXNG_CONF/settings.yml" </dev/null + log " Created SearXNG settings.yml" + fi $DOCKER run -d --name searxng --restart unless-stopped \ --health-cmd="curl -sf http://localhost:8080/ || exit 1" --health-interval=120s --health-timeout=5s --health-retries=3 \ --memory=$(mem_limit searxng) \ --cap-drop ALL --security-opt no-new-privileges:true \ --read-only --tmpfs /tmp:rw,noexec,nosuid,size=256m --tmpfs /run:rw,noexec,nosuid,size=64m \ -p 8888:8080 \ + -v /var/lib/archipelago/searxng:/etc/searxng \ "${SEARXNG_IMAGE}" 2>>"$LOG" || true fi track_container "searxng" @@ -979,8 +1050,29 @@ elif [ -x "/opt/archipelago/scripts/container-doctor.sh" ]; then bash "/opt/archipelago/scripts/container-doctor.sh" --local 2>&1 | tee -a "$LOG" fi -# 12. Final summary +# 11b. If any containers failed, run the reconciler to attempt recovery FAILED=$((TOTAL - SUCCESS)) +if [ "$FAILED" -gt 0 ]; then + log "Attempting to recover $FAILED failed container(s) via reconciler..." + RECONCILE_SCRIPT="" + if [ -x "$SCRIPT_DIR/reconcile-containers.sh" ]; then + RECONCILE_SCRIPT="$SCRIPT_DIR/reconcile-containers.sh" + elif [ -x "/opt/archipelago/scripts/reconcile-containers.sh" ]; then + RECONCILE_SCRIPT="/opt/archipelago/scripts/reconcile-containers.sh" + fi + if [ -n "$RECONCILE_SCRIPT" ]; then + runuser -u archipelago -- bash "$RECONCILE_SCRIPT" 2>&1 | tee -a "$LOG" + # Recount after reconciliation + SUCCESS=0 + for name in $($DOCKER ps --format '{{.Names}}' 2>/dev/null); do + SUCCESS=$((SUCCESS + 1)) + done + FAILED=$((TOTAL - SUCCESS)) + log "After reconciliation: $SUCCESS running, $FAILED still failed" + fi +fi + +# 12. Final summary log "=============================================" log " FIRST-BOOT CONTAINER SUMMARY" log "=============================================" @@ -988,7 +1080,7 @@ log " Total tracked: $TOTAL" log " Running: $SUCCESS" log " Failed: $FAILED" if [ "$BITCOIN_READY" != "true" ]; then - log " Bitcoin: NOT READY (dependent containers skipped)" + log " Bitcoin: NOT READY (dependent containers will auto-restart when ready)" fi if [ -n "$FAILED_LIST" ]; then log " Failed list: $FAILED_LIST"