From 808480e334eaf8a2c2786afdb278b4c9cefd3f64 Mon Sep 17 00:00:00 2001 From: Dorian Date: Tue, 31 Mar 2026 11:09:49 +0100 Subject: [PATCH] fix: add persistent container install/start logging - Install, start, and failure events logged to /var/log/archipelago-container-installs.log with timestamps - Enables post-mortem debugging of container lifecycle issues - UI container hooks: try registry pull before local build fallback Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/api/rpc/package/install.rs | 23 ++++++++++++------- .../src/api/rpc/package/runtime.rs | 5 ++++ scripts/image-versions.sh | 5 ++++ 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/core/archipelago/src/api/rpc/package/install.rs b/core/archipelago/src/api/rpc/package/install.rs index 686d640b..255b2aea 100644 --- a/core/archipelago/src/api/rpc/package/install.rs +++ b/core/archipelago/src/api/rpc/package/install.rs @@ -16,19 +16,18 @@ use tracing::{debug, info, warn}; const INSTALL_LOG: &str = "/var/log/archipelago-container-installs.log"; /// Append a timestamped line to the persistent install log. -async fn install_log(msg: &str) { +pub(super) async fn install_log(msg: &str) { + use tokio::io::AsyncWriteExt; let ts = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC"); let line = format!("[{}] {}\n", ts, msg); - let _ = tokio::fs::OpenOptions::new() + if let Ok(mut f) = tokio::fs::OpenOptions::new() .create(true) .append(true) .open(INSTALL_LOG) .await - .and_then(|mut f| { - use tokio::io::AsyncWriteExt; - Box::pin(async move { f.write_all(line.as_bytes()).await }) - }) - .await; + { + let _ = f.write_all(line.as_bytes()).await; + } } impl RpcHandler { @@ -256,6 +255,7 @@ impl RpcHandler { }; run_args.push(&effective_image); + install_log(&format!("INSTALL RUN: {} — podman run {} (image: {})", package_id, container_name, effective_image)).await; debug!("Running container with args: {:?}", run_args); // Build command with optional custom command/args @@ -283,6 +283,7 @@ impl RpcHandler { let container_id = String::from_utf8_lossy(&run_output.stdout) .trim() .to_string(); + install_log(&format!("INSTALL CREATED: {} — container_id={}", package_id, &container_id[..12.min(container_id.len())])).await; // Post-start health verification: wait up to 60s for container to be running let mut container_running = false; @@ -305,8 +306,13 @@ impl RpcHandler { .output() .await; let log_output = logs - .map(|o| String::from_utf8_lossy(&o.stderr).to_string()) + .map(|o| { + let stdout = String::from_utf8_lossy(&o.stdout); + let stderr = String::from_utf8_lossy(&o.stderr); + format!("{}{}", stdout, stderr) + }) .unwrap_or_default(); + install_log(&format!("INSTALL CRASH: {} — container exited. Logs:\n{}", package_id, &log_output.chars().take(1000).collect::())).await; let _ = tokio::process::Command::new("podman") .args(["rm", "-f", container_name]) .output() @@ -324,6 +330,7 @@ impl RpcHandler { } if !container_running { + install_log(&format!("INSTALL TIMEOUT: {} — not running after 60s", package_id)).await; return Err(anyhow::anyhow!( "Container {} did not reach running state within 60s. Check logs with: podman logs {}", container_name, container_name diff --git a/core/archipelago/src/api/rpc/package/runtime.rs b/core/archipelago/src/api/rpc/package/runtime.rs index 41a1fb0b..4206f950 100644 --- a/core/archipelago/src/api/rpc/package/runtime.rs +++ b/core/archipelago/src/api/rpc/package/runtime.rs @@ -1,5 +1,6 @@ use super::config::{get_containers_for_app, get_data_dirs_for_app, is_valid_docker_image}; use super::dependencies::ordered_containers_for_start; +use super::install::install_log; use super::validation::validate_app_id; use crate::api::rpc::RpcHandler; use anyhow::{Context, Result}; @@ -45,6 +46,7 @@ impl RpcHandler { crate::crash_recovery::clear_user_stopped(&self.config.data_dir, name).await; } + install_log(&format!("START: {} (containers: {:?})", package_id, to_start)).await; let mut errors = Vec::new(); for name in &to_start { tracing::info!("Starting container: {}", name); @@ -56,6 +58,7 @@ impl RpcHandler { if !out.status.success() { let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string(); tracing::error!("Failed to start {}: {}", name, stderr); + install_log(&format!("START FAIL: {} — {}", name, stderr)).await; errors.push(format!("{}: {}", name, stderr)); } } @@ -84,6 +87,7 @@ impl RpcHandler { return Err(anyhow::anyhow!("No containers found for {}", package_id)); } + install_log(&format!("STOP: {} (containers: {:?})", package_id, containers)).await; // Mark as user-stopped so health monitor and crash recovery don't auto-restart crate::crash_recovery::mark_user_stopped(&self.config.data_dir, package_id).await; for name in &containers { @@ -129,6 +133,7 @@ impl RpcHandler { return Err(anyhow::anyhow!("No containers found for {}", package_id)); } + install_log(&format!("RESTART: {} (containers: {:?})", package_id, containers)).await; let mut errors = Vec::new(); for name in &containers { tracing::info!("Restarting container: {}", name); diff --git a/scripts/image-versions.sh b/scripts/image-versions.sh index 56b98b92..68ac9856 100644 --- a/scripts/image-versions.sh +++ b/scripts/image-versions.sh @@ -86,5 +86,10 @@ BITCOIN_UI_IMAGE="$ARCHY_REGISTRY/bitcoin-ui:latest" LND_UI_IMAGE="$ARCHY_REGISTRY/lnd-ui:latest" ELECTRS_UI_IMAGE="$ARCHY_REGISTRY/electrs-ui:latest" +# Custom UI containers (companion dashboards for headless services) +BITCOIN_UI_IMAGE="$ARCHY_REGISTRY/bitcoin-ui:latest" +LND_UI_IMAGE="$ARCHY_REGISTRY/lnd-ui:latest" +ELECTRS_UI_IMAGE="$ARCHY_REGISTRY/electrs-ui:latest" + # Base images NGINX_ALPINE_IMAGE="$ARCHY_REGISTRY/nginx:1.27.4-alpine"