fix: container orchestration overhaul — names, errors, Tor, restart
Container name resolution: - New all_container_names() — single source of truth for every app's container name variants (bitcoin-knots/bitcoin/bitcoin-core, etc.) - Covers all historical naming patterns and multi-container stacks Start/Stop/Restart: - No more silent failures (let _ = podman...). Every operation logs the command, checks exit status, and returns real errors to the UI. - Restart uses stop+start fallback when podman restart fails (handles rootless podman loopback adapter errors) - "No containers found" error when app doesn't exist Tor helper: - Install archipelago-tor-helper.path + .service in rootfs - Enable the path unit so backend can manage Tor as non-root - Copy tor-helper.sh to /opt/archipelago/scripts/ Verified: container with proper caps can stop/start/restart cleanly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
fbabbd0722
commit
610e51500b
@ -281,6 +281,66 @@ pub(super) fn get_memory_limit(app_id: &str) -> &'static str {
|
||||
}
|
||||
|
||||
/// Get all container names for an app (handles multi-container apps like mempool)
|
||||
/// All known container name variants for a given app ID.
|
||||
/// This is the single source of truth for container name resolution.
|
||||
/// Every name that could appear in `podman ps` for this app must be listed here.
|
||||
pub(super) fn all_container_names(package_id: &str) -> Vec<String> {
|
||||
let base = package_id.to_string();
|
||||
let archy = format!("archy-{}", package_id);
|
||||
|
||||
match package_id {
|
||||
// Bitcoin: multiple historical names
|
||||
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => vec![
|
||||
"bitcoin-knots".into(), "bitcoin".into(), "bitcoin-core".into(),
|
||||
"archy-bitcoin-knots".into(), "archy-bitcoin".into(),
|
||||
"bitcoin-ui".into(),
|
||||
],
|
||||
// LND + UI
|
||||
"lnd" => vec!["lnd".into(), "archy-lnd".into(), "archy-lnd-ui".into()],
|
||||
// Electrumx: multiple aliases
|
||||
"electrumx" | "electrs" | "mempool-electrs" => vec![
|
||||
"electrumx".into(), "electrs".into(), "mempool-electrs".into(),
|
||||
"archy-electrumx".into(), "archy-electrs-ui".into(),
|
||||
],
|
||||
// Mempool: multi-container stack
|
||||
"mempool" | "mempool-web" => vec![
|
||||
"mempool".into(), "mempool-web".into(), "mempool-api".into(),
|
||||
"archy-mempool-web".into(), "archy-mempool-api".into(),
|
||||
"archy-mempool-db".into(), "mysql-mempool".into(),
|
||||
],
|
||||
// BTCPay: multi-container + multiple aliases
|
||||
"btcpay-server" | "btcpayserver" | "btcpay" => vec![
|
||||
"btcpay-server".into(), "btcpay".into(), "btcpayserver".into(),
|
||||
"archy-btcpay".into(), "archy-btcpay-db".into(), "archy-nbxplorer".into(),
|
||||
],
|
||||
// Home Assistant: two naming conventions
|
||||
"homeassistant" | "home-assistant" => vec![
|
||||
"homeassistant".into(), "home-assistant".into(),
|
||||
"archy-homeassistant".into(),
|
||||
],
|
||||
// Fedimint: multiple related containers
|
||||
"fedimint" => vec![
|
||||
"fedimint".into(), "fedimintd".into(),
|
||||
"fedimint-ui".into(), "archy-fedimint".into(),
|
||||
"fedimint-gateway".into(),
|
||||
],
|
||||
"fedimint-gateway" => vec!["fedimint-gateway".into()],
|
||||
// Immich: multi-container
|
||||
"immich" => vec![
|
||||
"immich_postgres".into(), "immich_redis".into(), "immich_server".into(),
|
||||
],
|
||||
// Penpot: multi-container
|
||||
"penpot" | "penpot-frontend" => vec![
|
||||
"penpot-postgres".into(), "penpot-valkey".into(),
|
||||
"penpot-backend".into(), "penpot-exporter".into(), "penpot-frontend".into(),
|
||||
],
|
||||
// Default: exact name + archy- prefix
|
||||
_ => vec![base, archy],
|
||||
}
|
||||
}
|
||||
|
||||
/// Find all running/stopped containers that belong to a given app.
|
||||
/// Uses the canonical name list from all_container_names().
|
||||
pub(super) async fn get_containers_for_app(package_id: &str) -> Result<Vec<String>> {
|
||||
validate_app_id(package_id)?;
|
||||
let output = tokio::process::Command::new("podman")
|
||||
@ -291,48 +351,11 @@ pub(super) async fn get_containers_for_app(package_id: &str) -> Result<Vec<Strin
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let all: Vec<&str> = stdout.lines().filter(|s| !s.is_empty()).collect();
|
||||
|
||||
let patterns: Vec<String> = match package_id {
|
||||
"mempool" | "mempool-web" => {
|
||||
vec![
|
||||
"electrumx".into(),
|
||||
"mempool-electrs".into(),
|
||||
"mempool-api".into(),
|
||||
"archy-mempool-api".into(),
|
||||
"archy-mempool-web".into(),
|
||||
"mempool".into(),
|
||||
"archy-mempool-db".into(),
|
||||
"mysql-mempool".into(),
|
||||
]
|
||||
}
|
||||
"fedimint" => vec![
|
||||
"fedimint".into(),
|
||||
"fedimint-ui".into(),
|
||||
"archy-fedimint".into(),
|
||||
"fedimint-gateway".into(),
|
||||
],
|
||||
"fedimint-gateway" => vec!["fedimint-gateway".into()],
|
||||
"immich" => vec![
|
||||
"immich_postgres".into(),
|
||||
"immich_redis".into(),
|
||||
"immich_server".into(),
|
||||
],
|
||||
"penpot" | "penpot-frontend" => vec![
|
||||
"penpot-postgres".into(),
|
||||
"penpot-valkey".into(),
|
||||
"penpot-backend".into(),
|
||||
"penpot-exporter".into(),
|
||||
"penpot-frontend".into(),
|
||||
],
|
||||
_ => vec![package_id.to_string(), format!("archy-{}", package_id)],
|
||||
};
|
||||
|
||||
let patterns = all_container_names(package_id);
|
||||
let mut result = Vec::new();
|
||||
for name in all {
|
||||
for pat in &patterns {
|
||||
if name == pat {
|
||||
result.push(name.to_string());
|
||||
break;
|
||||
}
|
||||
if patterns.iter().any(|p| p == name) {
|
||||
result.push(name.to_string());
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
|
||||
@ -34,6 +34,10 @@ impl RpcHandler {
|
||||
validate_app_id(package_id)?;
|
||||
|
||||
let to_start = ordered_containers_for_start(package_id).await?;
|
||||
if to_start.is_empty() {
|
||||
tracing::warn!("package.start {}: no containers found", package_id);
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
// Clear user-stopped flag — user explicitly started this app
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, package_id).await;
|
||||
@ -41,13 +45,24 @@ impl RpcHandler {
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
|
||||
for name in to_start {
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["start", &name])
|
||||
let mut errors = Vec::new();
|
||||
for name in &to_start {
|
||||
tracing::info!("Starting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await;
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to start {}: {}", name, stderr);
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Start failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
}
|
||||
|
||||
@ -63,31 +78,36 @@ impl RpcHandler {
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing package id"))?;
|
||||
validate_app_id(package_id)?;
|
||||
|
||||
// Mark as user-stopped so health monitor and crash recovery don't auto-restart
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, package_id).await;
|
||||
|
||||
let containers = get_containers_for_app(package_id).await?;
|
||||
if containers.is_empty() {
|
||||
let container_name = format!("archy-{}", package_id);
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, &container_name)
|
||||
.await;
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(&container_name), &container_name])
|
||||
.output()
|
||||
.await;
|
||||
return Ok(serde_json::Value::Null);
|
||||
tracing::warn!("package.stop {}: no containers found", package_id);
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
// Mark as user-stopped so health monitor and crash recovery don't auto-restart
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, package_id).await;
|
||||
for name in &containers {
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
for name in containers {
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(&name), &name])
|
||||
|
||||
let mut errors = Vec::new();
|
||||
for name in &containers {
|
||||
tracing::info!("Stopping container: {} (timeout: {}s)", name, stop_timeout_secs(name));
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await;
|
||||
.await
|
||||
.context(format!("Failed to exec podman stop {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to stop {}: {}", name, stderr);
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Stop failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
}
|
||||
|
||||
@ -105,21 +125,47 @@ impl RpcHandler {
|
||||
|
||||
let containers = get_containers_for_app(package_id).await?;
|
||||
if containers.is_empty() {
|
||||
let container_name = format!("archy-{}", package_id);
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["restart", &container_name])
|
||||
.output()
|
||||
.await;
|
||||
return Ok(serde_json::Value::Null);
|
||||
tracing::warn!("package.restart {}: no containers found", package_id);
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
for name in containers {
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["restart", &name])
|
||||
let mut errors = Vec::new();
|
||||
for name in &containers {
|
||||
tracing::info!("Restarting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["restart", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await;
|
||||
.await
|
||||
.context(format!("Failed to exec podman restart {}", name))?;
|
||||
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::warn!("podman restart {} failed: {}, trying stop+start", name, stderr);
|
||||
|
||||
// Fallback: stop then start (handles rootless podman loopback issues)
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await;
|
||||
let start_out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
|
||||
if !start_out.status.success() {
|
||||
let start_err = String::from_utf8_lossy(&start_out.stderr).trim().to_string();
|
||||
tracing::error!("stop+start {} also failed: {}", name, start_err);
|
||||
errors.push(format!("{}: {}", name, start_err));
|
||||
} else {
|
||||
tracing::info!("Restarted {} via stop+start fallback", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Restart failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
}
|
||||
|
||||
|
||||
@ -339,12 +339,15 @@ COPY archipelago-doctor.service /etc/systemd/system/archipelago-doctor.service
|
||||
COPY archipelago-doctor.timer /etc/systemd/system/archipelago-doctor.timer
|
||||
COPY archipelago-reconcile.service /etc/systemd/system/archipelago-reconcile.service
|
||||
COPY archipelago-reconcile.timer /etc/systemd/system/archipelago-reconcile.timer
|
||||
COPY archipelago-tor-helper.service /etc/systemd/system/archipelago-tor-helper.service
|
||||
COPY archipelago-tor-helper.path /etc/systemd/system/archipelago-tor-helper.path
|
||||
|
||||
# Copy container doctor + reconcile scripts (referenced by the services above)
|
||||
RUN mkdir -p /home/archipelago/archy/scripts
|
||||
COPY container-doctor.sh /home/archipelago/archy/scripts/container-doctor.sh
|
||||
COPY reconcile-containers.sh /home/archipelago/archy/scripts/reconcile-containers.sh
|
||||
RUN chmod +x /home/archipelago/archy/scripts/*.sh && \
|
||||
COPY tor-helper.sh /opt/archipelago/scripts/tor-helper.sh
|
||||
RUN chmod +x /home/archipelago/archy/scripts/*.sh /opt/archipelago/scripts/*.sh && \
|
||||
chown -R archipelago:archipelago /home/archipelago/archy
|
||||
|
||||
# Enable services
|
||||
@ -357,7 +360,8 @@ RUN systemctl enable NetworkManager || true && \
|
||||
systemctl enable chrony || true && \
|
||||
systemctl enable archipelago-update.timer || true && \
|
||||
systemctl enable archipelago-doctor.timer || true && \
|
||||
systemctl enable archipelago-reconcile.timer || true
|
||||
systemctl enable archipelago-reconcile.timer || true && \
|
||||
systemctl enable archipelago-tor-helper.path || true
|
||||
|
||||
# Remove policy-rc.d so services can start on first boot
|
||||
RUN rm -f /usr/sbin/policy-rc.d
|
||||
@ -424,7 +428,7 @@ NGINXCONF
|
||||
cp "$SCRIPT_DIR/configs/archipelago-reconcile.service" "$WORK_DIR/archipelago-reconcile.service"
|
||||
cp "$SCRIPT_DIR/configs/archipelago-reconcile.timer" "$WORK_DIR/archipelago-reconcile.timer"
|
||||
# Copy the actual scripts the services reference
|
||||
for s in container-doctor.sh reconcile-containers.sh; do
|
||||
for s in container-doctor.sh reconcile-containers.sh tor-helper.sh; do
|
||||
if [ -f "$SCRIPT_DIR/../scripts/$s" ]; then
|
||||
cp "$SCRIPT_DIR/../scripts/$s" "$WORK_DIR/$s"
|
||||
fi
|
||||
@ -432,6 +436,13 @@ NGINXCONF
|
||||
echo " Using container doctor + reconcile timers from configs/"
|
||||
fi
|
||||
|
||||
# Copy Tor helper path-activated service (allows backend to manage Tor as non-root)
|
||||
if [ -f "$SCRIPT_DIR/configs/archipelago-tor-helper.service" ]; then
|
||||
cp "$SCRIPT_DIR/configs/archipelago-tor-helper.service" "$WORK_DIR/archipelago-tor-helper.service"
|
||||
cp "$SCRIPT_DIR/configs/archipelago-tor-helper.path" "$WORK_DIR/archipelago-tor-helper.path"
|
||||
echo " Using tor-helper path unit from configs/"
|
||||
fi
|
||||
|
||||
# Use archipelago.service from configs/ (User=root for Podman container access)
|
||||
if [ -f "$SCRIPT_DIR/configs/archipelago.service" ]; then
|
||||
cp "$SCRIPT_DIR/configs/archipelago.service" "$WORK_DIR/archipelago.service"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user