fix: add persistent container install/start logging
- Install, start, and failure events logged to /var/log/archipelago-container-installs.log with timestamps - Enables post-mortem debugging of container lifecycle issues - UI container hooks: try registry pull before local build fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9a556d7819
commit
808480e334
@ -16,19 +16,18 @@ use tracing::{debug, info, warn};
|
||||
const INSTALL_LOG: &str = "/var/log/archipelago-container-installs.log";
|
||||
|
||||
/// Append a timestamped line to the persistent install log.
|
||||
async fn install_log(msg: &str) {
|
||||
pub(super) async fn install_log(msg: &str) {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
let ts = chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC");
|
||||
let line = format!("[{}] {}\n", ts, msg);
|
||||
let _ = tokio::fs::OpenOptions::new()
|
||||
if let Ok(mut f) = tokio::fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(INSTALL_LOG)
|
||||
.await
|
||||
.and_then(|mut f| {
|
||||
use tokio::io::AsyncWriteExt;
|
||||
Box::pin(async move { f.write_all(line.as_bytes()).await })
|
||||
})
|
||||
.await;
|
||||
{
|
||||
let _ = f.write_all(line.as_bytes()).await;
|
||||
}
|
||||
}
|
||||
|
||||
impl RpcHandler {
|
||||
@ -256,6 +255,7 @@ impl RpcHandler {
|
||||
};
|
||||
run_args.push(&effective_image);
|
||||
|
||||
install_log(&format!("INSTALL RUN: {} — podman run {} (image: {})", package_id, container_name, effective_image)).await;
|
||||
debug!("Running container with args: {:?}", run_args);
|
||||
|
||||
// Build command with optional custom command/args
|
||||
@ -283,6 +283,7 @@ impl RpcHandler {
|
||||
let container_id = String::from_utf8_lossy(&run_output.stdout)
|
||||
.trim()
|
||||
.to_string();
|
||||
install_log(&format!("INSTALL CREATED: {} — container_id={}", package_id, &container_id[..12.min(container_id.len())])).await;
|
||||
|
||||
// Post-start health verification: wait up to 60s for container to be running
|
||||
let mut container_running = false;
|
||||
@ -305,8 +306,13 @@ impl RpcHandler {
|
||||
.output()
|
||||
.await;
|
||||
let log_output = logs
|
||||
.map(|o| String::from_utf8_lossy(&o.stderr).to_string())
|
||||
.map(|o| {
|
||||
let stdout = String::from_utf8_lossy(&o.stdout);
|
||||
let stderr = String::from_utf8_lossy(&o.stderr);
|
||||
format!("{}{}", stdout, stderr)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
install_log(&format!("INSTALL CRASH: {} — container exited. Logs:\n{}", package_id, &log_output.chars().take(1000).collect::<String>())).await;
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["rm", "-f", container_name])
|
||||
.output()
|
||||
@ -324,6 +330,7 @@ impl RpcHandler {
|
||||
}
|
||||
|
||||
if !container_running {
|
||||
install_log(&format!("INSTALL TIMEOUT: {} — not running after 60s", package_id)).await;
|
||||
return Err(anyhow::anyhow!(
|
||||
"Container {} did not reach running state within 60s. Check logs with: podman logs {}",
|
||||
container_name, container_name
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
use super::config::{get_containers_for_app, get_data_dirs_for_app, is_valid_docker_image};
|
||||
use super::dependencies::ordered_containers_for_start;
|
||||
use super::install::install_log;
|
||||
use super::validation::validate_app_id;
|
||||
use crate::api::rpc::RpcHandler;
|
||||
use anyhow::{Context, Result};
|
||||
@ -45,6 +46,7 @@ impl RpcHandler {
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
|
||||
install_log(&format!("START: {} (containers: {:?})", package_id, to_start)).await;
|
||||
let mut errors = Vec::new();
|
||||
for name in &to_start {
|
||||
tracing::info!("Starting container: {}", name);
|
||||
@ -56,6 +58,7 @@ impl RpcHandler {
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to start {}: {}", name, stderr);
|
||||
install_log(&format!("START FAIL: {} — {}", name, stderr)).await;
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
@ -84,6 +87,7 @@ impl RpcHandler {
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
install_log(&format!("STOP: {} (containers: {:?})", package_id, containers)).await;
|
||||
// Mark as user-stopped so health monitor and crash recovery don't auto-restart
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, package_id).await;
|
||||
for name in &containers {
|
||||
@ -129,6 +133,7 @@ impl RpcHandler {
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
install_log(&format!("RESTART: {} (containers: {:?})", package_id, containers)).await;
|
||||
let mut errors = Vec::new();
|
||||
for name in &containers {
|
||||
tracing::info!("Restarting container: {}", name);
|
||||
|
||||
@ -86,5 +86,10 @@ BITCOIN_UI_IMAGE="$ARCHY_REGISTRY/bitcoin-ui:latest"
|
||||
LND_UI_IMAGE="$ARCHY_REGISTRY/lnd-ui:latest"
|
||||
ELECTRS_UI_IMAGE="$ARCHY_REGISTRY/electrs-ui:latest"
|
||||
|
||||
# Custom UI containers (companion dashboards for headless services)
|
||||
BITCOIN_UI_IMAGE="$ARCHY_REGISTRY/bitcoin-ui:latest"
|
||||
LND_UI_IMAGE="$ARCHY_REGISTRY/lnd-ui:latest"
|
||||
ELECTRS_UI_IMAGE="$ARCHY_REGISTRY/electrs-ui:latest"
|
||||
|
||||
# Base images
|
||||
NGINX_ALPINE_IMAGE="$ARCHY_REGISTRY/nginx:1.27.4-alpine"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user