archy/core/container/src/podman_client.rs
Dorian 1e283daf13 fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy

UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry

Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)

Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0

Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00

551 lines
20 KiB
Rust

//! Podman container management via the REST API unix socket.
//!
//! Connects to the rootless Podman API at /run/user/{UID}/podman/podman.sock.
//! All operations are non-blocking async via tokio + hyper.
//! Falls back to CLI only for image pulls (long-running streaming operations).
use crate::manifest::AppManifest;
use anyhow::{Context, Result};
use hyper::{Body, Request, Uri};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use thiserror::Error;
use tokio::net::UnixStream;
const API_VERSION: &str = "v4.0.0";
const DEFAULT_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
const LONG_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(120);
#[derive(Debug, Error)]
pub enum PodmanError {
#[error("Podman API error: {0}")]
ApiError(String),
#[error("Container not found: {0}")]
NotFound(String),
#[error("Podman socket not available: {0}")]
SocketUnavailable(String),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContainerStatus {
pub id: String,
pub name: String,
pub state: ContainerState,
pub health: Option<String>,
pub exit_code: Option<i32>,
pub started_at: Option<String>,
pub image: String,
pub created: String,
pub ports: Vec<String>,
pub lan_address: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum ContainerState {
Created,
Running,
Stopped,
Exited,
Paused,
Unknown(String),
}
impl From<&str> for ContainerState {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"created" => ContainerState::Created,
"running" => ContainerState::Running,
"stopped" => ContainerState::Stopped,
"exited" => ContainerState::Exited,
"paused" => ContainerState::Paused,
other => ContainerState::Unknown(other.to_string()),
}
}
}
/// Parse health status from podman's Status string (e.g., "Up 5 minutes (healthy)")
fn parse_health_from_status(status: &str) -> Option<String> {
if let Some(start) = status.rfind('(') {
if let Some(end) = status.rfind(')') {
if start < end {
return Some(status[start + 1..end].to_string());
}
}
}
None
}
pub struct PodmanClient {
socket_path: PathBuf,
}
impl PodmanClient {
pub fn new(user: String) -> Self {
// Determine socket path based on user
let uid = Self::get_uid(&user);
let socket_path = PathBuf::from(format!("/run/user/{}/podman/podman.sock", uid));
Self { socket_path }
}
fn get_uid(user: &str) -> u32 {
// Try to get UID from /etc/passwd
if let Ok(content) = std::fs::read_to_string("/etc/passwd") {
for line in content.lines() {
let parts: Vec<&str> = line.split(':').collect();
if parts.len() >= 3 && parts[0] == user {
if let Ok(uid) = parts[2].parse() {
return uid;
}
}
}
}
// Default to 1000 (standard first user)
1000
}
/// Map container name to its UI launch URL
pub fn lan_address_for(name: &str) -> Option<String> {
let url = match name {
"bitcoin-knots" | "bitcoin-ui" => "http://localhost:8334",
"lnd" | "archy-lnd-ui" => "http://localhost:8081",
"homeassistant" => "http://localhost:8123",
"archy-mempool-web" | "mempool" => "http://localhost:4080",
"btcpay-server" => "http://localhost:23000",
"grafana" => "http://localhost:3000",
"searxng" => "http://localhost:8888",
"ollama" => "http://localhost:11434",
"onlyoffice" => "http://localhost:8044",
"penpot" => "http://localhost:9001",
"nextcloud" => "http://localhost:8085",
"vaultwarden" => "http://localhost:8082",
"jellyfin" => "http://localhost:8096",
"photoprism" => "http://localhost:2342",
"immich_server" | "immich" => "http://localhost:2283",
"filebrowser" => "http://localhost:8083",
"nginx-proxy-manager" => "http://localhost:8181",
"portainer" => "http://localhost:9000",
"uptime-kuma" => "http://localhost:3001",
"fedimint" | "fedimintd" => "http://localhost:8175",
"fedimint-gateway" => "http://localhost:8176",
"nostr-rs-relay" => "http://localhost:18081",
"indeedhub" => "http://localhost:7777",
"dwn" => "http://localhost:3100",
"endurain" => "http://localhost:8080",
"electrs" | "archy-electrs-ui" => "http://localhost:50002",
_ => return None,
};
Some(url.to_string())
}
// ─── API Client ──────────────────────────────────────────────
/// Send a request to the Podman API via unix socket.
async fn api_request(
&self,
method: &str,
path: &str,
body: Option<serde_json::Value>,
timeout: std::time::Duration,
) -> Result<serde_json::Value> {
let socket_path = self.socket_path.clone();
// Connect to the unix socket (30s timeout — podman can be slow under load on boot)
let stream = tokio::time::timeout(
std::time::Duration::from_secs(30),
UnixStream::connect(&socket_path),
)
.await
.map_err(|_| anyhow::anyhow!("Podman socket connection timed out (30s)"))?
.context(format!("Cannot connect to Podman socket at {}", socket_path.display()))?;
// Build the hyper client with the unix stream
let (mut sender, conn) = hyper::client::conn::Builder::new()
.handshake::<_, Body>(stream)
.await
.context("Podman API handshake failed")?;
// Spawn the connection handler
tokio::spawn(async move {
if let Err(e) = conn.await {
tracing::debug!("Podman API connection ended: {}", e);
}
});
// Build the request
let uri: Uri = format!("/{}/{}", API_VERSION, path.trim_start_matches('/'))
.parse()
.context("Invalid API path")?;
let req = match method {
"POST" => {
let body_str = match body {
Some(b) => serde_json::to_string(&b)
.context("Failed to serialize request body to JSON")?,
None => String::new(),
};
Request::builder()
.method("POST")
.uri(uri)
.header("Host", "localhost")
.header("Content-Type", "application/json")
.body(Body::from(body_str))
.context("Failed to build POST request")?
}
"DELETE" => {
Request::builder()
.method("DELETE")
.uri(uri)
.header("Host", "localhost")
.body(Body::empty())
.context("Failed to build DELETE request")?
}
_ => {
Request::builder()
.method("GET")
.uri(uri)
.header("Host", "localhost")
.body(Body::empty())
.context("Failed to build GET request")?
}
};
// Send with timeout
let resp = tokio::time::timeout(timeout, sender.send_request(req))
.await
.map_err(|_| anyhow::anyhow!("Podman API request timed out after {}s", timeout.as_secs()))?
.context("Podman API request failed")?;
let status = resp.status();
let body_bytes = hyper::body::to_bytes(resp.into_body())
.await
.context("Failed to read Podman API response")?;
if status == hyper::StatusCode::NOT_FOUND {
return Err(anyhow::anyhow!("Not found"));
}
if !status.is_success() {
let error_text = String::from_utf8_lossy(&body_bytes);
return Err(anyhow::anyhow!("Podman API {} {}: {}", status.as_u16(), status.canonical_reason().unwrap_or(""), error_text));
}
// Some endpoints return empty body on success (start/stop/restart)
if body_bytes.is_empty() {
return Ok(serde_json::json!({"ok": true}));
}
serde_json::from_slice(&body_bytes)
.context("Failed to parse Podman API JSON response")
}
/// Simple POST with no body (start/stop/restart)
async fn api_post_action(&self, path: &str) -> Result<()> {
self.api_request("POST", path, None, DEFAULT_TIMEOUT).await?;
Ok(())
}
// ─── Container Operations ────────────────────────────────────
pub async fn pull_image(&self, image: &str, _signature: Option<&str>) -> Result<()> {
// Image pull uses CLI — it's a streaming operation that the API handles differently
let mut cmd = tokio::process::Command::new("podman");
cmd.arg("pull").arg(image);
let output = tokio::time::timeout(
std::time::Duration::from_secs(600), // 10 min for large images
cmd.output(),
)
.await
.map_err(|_| anyhow::anyhow!("Image pull timed out after 10 minutes"))?
.context("Failed to execute podman pull")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!("Failed to pull image: {}", stderr));
}
Ok(())
}
pub async fn create_container(
&self,
manifest: &AppManifest,
name: &str,
) -> Result<String> {
// Build the container spec for the API
let mut port_mappings = Vec::new();
for port in &manifest.app.ports {
port_mappings.push(serde_json::json!({
"container_port": port.container,
"host_port": port.host,
"protocol": "tcp",
}));
}
let mut mounts = Vec::new();
for volume in &manifest.app.volumes {
mounts.push(serde_json::json!({
"destination": volume.target,
"source": volume.source,
"type": "bind",
"options": volume.options,
}));
}
let mut env_map = serde_json::Map::new();
for env in &manifest.app.environment {
if let Some((k, v)) = env.split_once('=') {
env_map.insert(k.to_string(), serde_json::Value::String(v.to_string()));
}
}
let cap_add: Vec<String> = manifest.app.security.capabilities.clone();
let cap_drop = vec!["ALL".to_string()];
let body = serde_json::json!({
"name": name,
"image": manifest.app.container.image,
"portmappings": port_mappings,
"mounts": mounts,
"env": env_map,
"devices": manifest.app.devices.iter().map(|d| {
serde_json::json!({"path": d})
}).collect::<Vec<_>>(),
"resource_limits": {
"memory": {
"limit": manifest.app.resources.memory_limit.as_ref()
.and_then(|m| parse_memory_limit(m))
.unwrap_or(0),
},
"cpu": {
"quota": manifest.app.resources.cpu_limit
.map(|c| (c as i64) * 100000)
.unwrap_or(0),
"period": 100000u64,
}
},
"cap_add": cap_add,
"cap_drop": cap_drop,
"read_only_filesystem": manifest.app.security.readonly_root,
"no_new_privileges": true,
"restart_policy": "unless-stopped",
"restart_tries": 5,
"netns": {
"nsmode": match manifest.app.security.network_policy.as_str() {
"host" => "host",
_ => "bridge",
}
},
});
let result = self.api_request(
"POST",
"libpod/containers/create",
Some(body),
LONG_TIMEOUT,
).await?;
let id = result["Id"].as_str()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.context("Podman API returned no container ID — creation may have failed")?;
Ok(id)
}
pub async fn start_container(&self, name: &str) -> Result<()> {
self.api_post_action(&format!("libpod/containers/{}/start", name)).await
}
pub async fn stop_container(&self, name: &str) -> Result<()> {
self.api_request(
"POST",
&format!("libpod/containers/{}/stop?t=10", name),
None,
DEFAULT_TIMEOUT,
).await.map(|_| ())
}
pub async fn restart_container(&self, name: &str) -> Result<()> {
self.api_request(
"POST",
&format!("libpod/containers/{}/restart?t=10", name),
None,
DEFAULT_TIMEOUT,
).await.map(|_| ())
}
pub async fn remove_container(&self, name: &str) -> Result<()> {
self.api_request(
"DELETE",
&format!("libpod/containers/{}?force=true", name),
None,
DEFAULT_TIMEOUT,
).await.map(|_| ())
}
pub async fn get_container_status(&self, name: &str) -> Result<ContainerStatus> {
let data = self.api_request(
"GET",
&format!("libpod/containers/{}/json", name),
None,
DEFAULT_TIMEOUT,
).await?;
let state_str = data["State"]["Status"].as_str().unwrap_or("unknown");
let health = data["State"]["Health"]["Status"].as_str().map(|s| s.to_string());
let started_at = data["State"]["StartedAt"].as_str().map(|s| s.to_string());
let container_name = data["Name"].as_str().unwrap_or(name).to_string();
// Parse port bindings
let ports = parse_port_bindings(&data["HostConfig"]["PortBindings"]);
let lan_address = Self::lan_address_for(&container_name);
let exit_code = data["State"]["ExitCode"].as_i64().map(|c| c as i32);
Ok(ContainerStatus {
id: data["Id"].as_str().unwrap_or("").to_string(),
name: container_name,
state: ContainerState::from(state_str),
health,
exit_code,
started_at,
image: data["ImageName"].as_str()
.or_else(|| data["Config"]["Image"].as_str())
.unwrap_or("").to_string(),
created: data["Created"].as_str().unwrap_or("").to_string(),
ports,
lan_address,
})
}
pub async fn get_container_logs(&self, name: &str, lines: u32) -> Result<Vec<String>> {
// Logs endpoint returns raw text, not JSON — use CLI for this
let mut cmd = tokio::process::Command::new("podman");
cmd.arg("logs")
.arg("--tail")
.arg(lines.to_string())
.arg(name);
let output = tokio::time::timeout(DEFAULT_TIMEOUT, cmd.output())
.await
.map_err(|_| anyhow::anyhow!("Container logs timed out"))?
.context("Failed to get container logs")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(anyhow::anyhow!("Failed to get logs: {}", stderr));
}
// Podman logs go to both stdout and stderr
let mut all_output = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr);
if !stderr.is_empty() {
all_output.push_str(&stderr);
}
Ok(all_output.lines().map(|s| s.to_string()).collect())
}
pub async fn list_containers(&self) -> Result<Vec<ContainerStatus>> {
let data = self.api_request(
"GET",
"libpod/containers/json?all=true",
None,
DEFAULT_TIMEOUT,
).await?;
let containers = data.as_array()
.ok_or_else(|| anyhow::anyhow!("Expected array from containers/json"))?;
let mut result = Vec::with_capacity(containers.len());
for c in containers {
let name = if let Some(names) = c["Names"].as_array() {
names.get(0).and_then(|v| v.as_str()).unwrap_or("").to_string()
} else {
c["Names"].as_str().unwrap_or("").to_string()
};
let ports = if let Some(ports_array) = c["Ports"].as_array() {
ports_array.iter().filter_map(|port| {
let host_port = port["host_port"].as_u64()?;
let container_port = port["container_port"].as_u64()?;
let protocol = port["protocol"].as_str().unwrap_or("tcp");
Some(format!("0.0.0.0:{}->{}/{}", host_port, container_port, protocol))
}).collect()
} else {
vec![]
};
let status_str = c["Status"].as_str().unwrap_or("");
let health = parse_health_from_status(status_str)
.or_else(|| c["Health"].as_str().map(|s| s.to_string()));
let started_at = c["StartedAt"].as_str()
.or_else(|| c["Started"].as_str())
.map(|s| s.to_string());
let lan_address = Self::lan_address_for(&name);
let exit_code = c["ExitCode"].as_i64()
.or_else(|| c["State"]["ExitCode"].as_i64())
.map(|c| c as i32);
result.push(ContainerStatus {
id: c["Id"].as_str().unwrap_or("").to_string(),
name,
state: ContainerState::from(c["State"].as_str().unwrap_or("unknown")),
health,
exit_code,
started_at,
image: c["Image"].as_str().unwrap_or("").to_string(),
created: c["Created"].as_str().unwrap_or("").to_string(),
ports,
lan_address,
});
}
Ok(result)
}
/// Check if the Podman socket is available and responding.
pub async fn health_check(&self) -> bool {
self.api_request("GET", "libpod/info", None, std::time::Duration::from_secs(5))
.await
.is_ok()
}
}
// ─── Helpers ─────────────────────────────────────────────────────
fn parse_port_bindings(bindings: &serde_json::Value) -> Vec<String> {
let mut ports = Vec::new();
if let Some(obj) = bindings.as_object() {
for (container_port, host_bindings) in obj {
if let Some(arr) = host_bindings.as_array() {
for binding in arr {
let host_ip = binding["HostIp"].as_str().unwrap_or("0.0.0.0");
let host_port = binding["HostPort"].as_str().unwrap_or("");
if !host_port.is_empty() {
ports.push(format!("{}:{}->{}", host_ip, host_port, container_port));
}
}
}
}
}
ports
}
fn parse_memory_limit(limit: &str) -> Option<i64> {
let limit = limit.trim().to_lowercase();
if limit.ends_with('g') {
limit.trim_end_matches('g').parse::<f64>().ok().map(|v| (v * 1_073_741_824.0) as i64)
} else if limit.ends_with('m') {
limit.trim_end_matches('m').parse::<f64>().ok().map(|v| (v * 1_048_576.0) as i64)
} else if limit.ends_with('k') {
limit.trim_end_matches('k').parse::<f64>().ok().map(|v| (v * 1024.0) as i64)
} else {
limit.parse::<i64>().ok()
}
}