backend: harden rootless app lifecycle orchestration

This commit is contained in:
archipelago 2026-06-11 00:24:32 -04:00
parent 09ec64932f
commit c393b96da3
56 changed files with 7543 additions and 1994 deletions

View File

@ -128,6 +128,22 @@ impl ApiHandler {
hyper::Body::from(r#"{"ok":true,"handled":"connection_accepted"}"#),
));
}
if let Some(handled) =
crate::api::rpc::bitcoin_relay::record_incoming_relay_message(
std::path::Path::new("/var/lib/archipelago"),
from,
incoming.from_name.as_deref(),
&val,
)
.await?
{
return Ok(build_response(
StatusCode::OK,
"application/json",
hyper::Body::from(format!(r#"{{"ok":true,"handled":"{}"}}"#, handled)),
));
}
}
let safe_from = sanitize_log_string(from);

View File

@ -189,6 +189,27 @@ impl RpcHandler {
.map(|f| f as u64)
.unwrap_or(0);
let latest = self.metrics_store.latest().await;
let (cpu_pct, mem_pct, disk_pct): (f64, f64, f64) = latest
.map(|s| {
let mem_total = s.system.mem_total_bytes as f64;
let disk_total = s.system.disk_total_bytes as f64;
(
s.system.cpu_percent,
if mem_total > 0.0 {
(s.system.mem_used_bytes as f64 / mem_total) * 100.0
} else {
0.0
},
if disk_total > 0.0 {
(s.system.disk_used_bytes as f64 / disk_total) * 100.0
} else {
0.0
},
)
})
.unwrap_or((0.0, 0.0, 0.0));
// Recent alerts from metrics store
let recent_alerts: Vec<serde_json::Value> = self
.metrics_store
@ -210,6 +231,9 @@ impl RpcHandler {
"uptime_secs": uptime_secs,
"cpu_cores": cpu_cores,
"ram_mb": total_ram_mb,
"cpu_pct": (cpu_pct * 10.0).round() / 10.0,
"mem_pct": (mem_pct * 10.0).round() / 10.0,
"disk_pct": (disk_pct * 10.0).round() / 10.0,
"containers": containers,
"container_count": data.package_data.len(),
"running_count": data.package_data.values()

View File

@ -79,7 +79,8 @@ impl RpcHandler {
.and_then(|v| v.as_bool())
.unwrap_or(true);
self.auth_manager
let outcome = self
.auth_manager
.change_password(current_password, new_password, also_change_ssh)
.await?;
@ -88,7 +89,12 @@ impl RpcHandler {
self.session_store.invalidate_all_except(token).await;
}
Ok(serde_json::json!({ "success": true, "session_rotated": true }))
Ok(serde_json::json!({
"success": true,
"session_rotated": true,
"ssh_updated": outcome.ssh_updated,
"ssh_error": outcome.ssh_error,
}))
}
pub(super) async fn handle_auth_is_setup(&self) -> Result<serde_json::Value> {

View File

@ -0,0 +1,900 @@
use super::RpcHandler;
use crate::container::docker_packages;
use crate::data_model::{Notification, NotificationLevel};
use crate::{bitcoin_status, identity, peers};
use anyhow::{Context, Result};
use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
use hmac::{Hmac, Mac};
use rand::RngCore;
use serde::{Deserialize, Serialize};
use serde_json::json;
use sha2::Sha256;
use std::path::{Path, PathBuf};
use tokio::fs;
const RELAY_DIR: &str = "bitcoin-relay";
const RELAY_STATE_FILE: &str = "state.json";
const TXRELAY_USER: &str = "txrelay";
const TXRELAY_PASSWORD_FILE: &str = "bitcoin-rpc-txrelay-password";
const TXRELAY_RPCAUTH_FILE: &str = "bitcoin-rpc-txrelay-rpcauth";
const TXRELAY_CLIENT_ENV_FILE: &str = "bitcoin-rpc-txrelay-client.env";
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
struct BitcoinRelayState {
settings: BitcoinRelaySettings,
requests: Vec<BitcoinRelayRequest>,
updated_at: Option<String>,
}
impl Default for BitcoinRelayState {
fn default() -> Self {
Self {
settings: BitcoinRelaySettings::default(),
requests: Vec::new(),
updated_at: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
struct BitcoinRelaySettings {
enabled_for_peers: bool,
allow_peer_requests: bool,
allow_http: bool,
allow_https: bool,
allow_tor: bool,
selected_peer_pubkey: Option<String>,
http_endpoint: Option<String>,
https_endpoint: Option<String>,
tor_endpoint: Option<String>,
}
impl Default for BitcoinRelaySettings {
fn default() -> Self {
Self {
enabled_for_peers: false,
allow_peer_requests: false,
allow_http: false,
allow_https: true,
allow_tor: false,
selected_peer_pubkey: None,
http_endpoint: None,
https_endpoint: None,
tor_endpoint: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct BitcoinRelayRequest {
id: String,
direction: RelayRequestDirection,
status: RelayRequestStatus,
peer_pubkey: String,
peer_onion: String,
peer_name: Option<String>,
message: Option<String>,
approved_endpoint: Option<String>,
credential_secret_path: Option<String>,
created_at: String,
updated_at: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
enum RelayRequestDirection {
Incoming,
Outbound,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
enum RelayRequestStatus {
Pending,
Approved,
Rejected,
}
#[derive(Debug, Serialize)]
struct TrustedRelayPeer {
pubkey: String,
onion: String,
name: Option<String>,
relay_approved: bool,
}
#[derive(Debug, Clone)]
struct TxRelayCredentials {
username: String,
password: String,
}
impl RpcHandler {
pub(super) async fn handle_bitcoin_relay_status(&self) -> Result<serde_json::Value> {
let mut state = load_relay_state(&self.config.data_dir).await?;
hydrate_tor_endpoint(&self.config.data_dir, &mut state).await;
let known_peers = peers::load_peers(&self.config.data_dir)
.await
.unwrap_or_default();
let trusted_nodes = trusted_relay_peers(&known_peers, &state);
let local_node = local_sync_status().await;
let credential_status = txrelay_credential_status(&self.config.data_dir).await;
Ok(json!({
"settings": state.settings,
"trusted_nodes": trusted_nodes,
"requests": state.requests,
"local_node": local_node,
"credentials": credential_status,
}))
}
pub(super) async fn handle_bitcoin_relay_update_settings(
&self,
params: Option<serde_json::Value>,
) -> Result<serde_json::Value> {
let params = params.unwrap_or_default();
let mut state = load_relay_state(&self.config.data_dir).await?;
let known_peers = peers::load_peers(&self.config.data_dir)
.await
.unwrap_or_default();
update_bool(
&params,
"enabled_for_peers",
&mut state.settings.enabled_for_peers,
);
update_bool(
&params,
"allow_peer_requests",
&mut state.settings.allow_peer_requests,
);
update_bool(&params, "allow_http", &mut state.settings.allow_http);
update_bool(&params, "allow_https", &mut state.settings.allow_https);
update_bool(&params, "allow_tor", &mut state.settings.allow_tor);
update_endpoint(&params, "http_endpoint", &mut state.settings.http_endpoint)?;
update_endpoint(
&params,
"https_endpoint",
&mut state.settings.https_endpoint,
)?;
update_endpoint(&params, "tor_endpoint", &mut state.settings.tor_endpoint)?;
if state.settings.enabled_for_peers {
ensure_txrelay_credentials(&self.config.data_dir).await?;
}
if params.get("selected_peer_pubkey").is_some() {
let selected = params
.get("selected_peer_pubkey")
.and_then(|v| v.as_str())
.map(str::trim)
.filter(|s| !s.is_empty());
if let Some(pubkey) = selected {
if !known_peers.iter().any(|p| p.pubkey == pubkey) {
anyhow::bail!("Selected relay peer is not in trusted nodes");
}
state.settings.selected_peer_pubkey = Some(pubkey.to_string());
} else {
state.settings.selected_peer_pubkey = None;
}
}
state.updated_at = Some(now());
save_relay_state(&self.config.data_dir, &state).await?;
self.notify(
"Bitcoin relay settings updated",
"Transaction relay sharing preferences were saved.",
)
.await;
self.handle_bitcoin_relay_status().await
}
pub(super) async fn handle_bitcoin_relay_request_peer(
&self,
params: Option<serde_json::Value>,
) -> Result<serde_json::Value> {
let params = params.unwrap_or_default();
let peer_pubkey = params
.get("peer_pubkey")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required parameter: peer_pubkey"))?;
let message = params
.get("message")
.and_then(|v| v.as_str())
.map(sanitize_optional_text)
.transpose()?;
let peer = peers::load_peers(&self.config.data_dir)
.await
.unwrap_or_default()
.into_iter()
.find(|p| p.pubkey == peer_pubkey)
.ok_or_else(|| anyhow::anyhow!("Peer is not in trusted nodes"))?;
let mut state = load_relay_state(&self.config.data_dir).await?;
let existing = state.requests.iter_mut().find(|r| {
r.direction == RelayRequestDirection::Outbound
&& r.peer_pubkey == peer.pubkey
&& r.status == RelayRequestStatus::Pending
});
let request_id = if let Some(req) = existing {
req.message = message.clone();
req.updated_at = now();
req.id.clone()
} else {
let timestamp = now();
let req = BitcoinRelayRequest {
id: uuid::Uuid::new_v4().to_string(),
direction: RelayRequestDirection::Outbound,
status: RelayRequestStatus::Pending,
peer_pubkey: peer.pubkey.clone(),
peer_onion: peer.onion.clone(),
peer_name: peer.name.clone(),
message: message.clone(),
approved_endpoint: None,
credential_secret_path: None,
created_at: timestamp.clone(),
updated_at: timestamp,
};
let id = req.id.clone();
state.requests.push(req);
id
};
state.updated_at = Some(now());
save_relay_state(&self.config.data_dir, &state).await?;
if let Err(e) = self
.send_relay_peer_message(
&peer,
json!({
"type": "bitcoin_relay_request",
"request_id": request_id,
"message": message,
}),
)
.await
{
tracing::warn!(peer = %peer.onion, error = %e, "Failed to send Bitcoin relay request");
}
self.notify(
"Bitcoin relay request sent",
"A trusted peer was asked to approve transaction relay access.",
)
.await;
Ok(json!({ "ok": true, "request_id": request_id }))
}
pub(super) async fn handle_bitcoin_relay_approve_request(
&self,
params: Option<serde_json::Value>,
) -> Result<serde_json::Value> {
self.update_relay_request_status(params, RelayRequestStatus::Approved)
.await
}
pub(super) async fn handle_bitcoin_relay_reject_request(
&self,
params: Option<serde_json::Value>,
) -> Result<serde_json::Value> {
self.update_relay_request_status(params, RelayRequestStatus::Rejected)
.await
}
pub(super) async fn handle_bitcoin_relay_create_tor_service(
&self,
) -> Result<serde_json::Value> {
let params = json!({
"name": "bitcoin-rpc",
"local_port": 80,
"remote_port": 80,
});
let created = match self.handle_tor_create_service(Some(params)).await {
Ok(v) => v,
Err(e) if e.to_string().contains("already exists") => {
self.handle_tor_get_onion_address(Some(json!({ "name": "bitcoin-rpc" })))
.await?
}
Err(e) => return Err(e),
};
let onion = created
.get("onion_address")
.and_then(|v| v.as_str())
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
if let Some(onion) = onion {
let mut state = load_relay_state(&self.config.data_dir).await?;
state.settings.allow_tor = true;
state.settings.tor_endpoint = Some(format!("http://{onion}/"));
state.updated_at = Some(now());
save_relay_state(&self.config.data_dir, &state).await?;
}
self.notify(
"Bitcoin relay Tor service enabled",
"A Tor endpoint was created for Bitcoin transaction relay access.",
)
.await;
Ok(created)
}
async fn update_relay_request_status(
&self,
params: Option<serde_json::Value>,
status: RelayRequestStatus,
) -> Result<serde_json::Value> {
let params = params.unwrap_or_default();
let request_id = params
.get("id")
.or_else(|| params.get("request_id"))
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing required parameter: id"))?;
let mut state = load_relay_state(&self.config.data_dir).await?;
let serving_endpoint = if status == RelayRequestStatus::Approved {
preferred_endpoint(&state.settings)
} else {
None
};
let request_direction = state
.requests
.iter()
.find(|r| r.id == request_id)
.ok_or_else(|| anyhow::anyhow!("Request not found: {}", request_id))?
.direction;
if status == RelayRequestStatus::Approved
&& request_direction == RelayRequestDirection::Incoming
&& serving_endpoint.is_none()
{
anyhow::bail!(
"Configure an HTTP, HTTPS, or Tor relay endpoint before approving access"
);
}
let credentials = if status == RelayRequestStatus::Approved {
Some(ensure_txrelay_credentials(&self.config.data_dir).await?)
} else {
None
};
let (peer_pubkey, peer_onion, peer_name, direction) = {
let req = state
.requests
.iter_mut()
.find(|r| r.id == request_id)
.ok_or_else(|| anyhow::anyhow!("Request not found: {}", request_id))?;
req.status = status;
req.updated_at = now();
if let Some(endpoint) = &serving_endpoint {
req.approved_endpoint = Some(endpoint.clone());
}
(
req.peer_pubkey.clone(),
req.peer_onion.clone(),
req.peer_name.clone(),
req.direction,
)
};
let peer = peers::load_peers(&self.config.data_dir)
.await
.unwrap_or_default()
.into_iter()
.find(|p| p.pubkey == peer_pubkey);
let peer_name = peer_name.unwrap_or_else(|| peer_onion.clone());
state.updated_at = Some(now());
save_relay_state(&self.config.data_dir, &state).await?;
if let Some(peer) = peer {
let message_type = match status {
RelayRequestStatus::Approved => "bitcoin_relay_approved",
RelayRequestStatus::Rejected => "bitcoin_relay_rejected",
RelayRequestStatus::Pending => "bitcoin_relay_pending",
};
if let Err(e) = self
.send_relay_peer_message(
&peer,
relay_response_payload(
message_type,
request_id,
direction,
serving_endpoint.as_deref(),
credentials.as_ref(),
),
)
.await
{
tracing::warn!(peer = %peer.onion, error = %e, "Failed to send Bitcoin relay response");
}
}
let title = match status {
RelayRequestStatus::Approved => "Bitcoin relay request approved",
RelayRequestStatus::Rejected => "Bitcoin relay request rejected",
RelayRequestStatus::Pending => "Bitcoin relay request updated",
};
self.notify(
title,
&format!("Relay access request for {peer_name} was updated."),
)
.await;
Ok(json!({ "ok": true, "request_id": request_id }))
}
async fn send_relay_peer_message(
&self,
peer: &peers::KnownPeer,
mut payload: serde_json::Value,
) -> Result<()> {
let (data, _) = self.state_manager.get_snapshot().await;
let my_pubkey = data.server_info.pubkey.clone();
let my_did = identity::did_key_from_pubkey_hex(&my_pubkey).ok();
let my_onion = docker_packages::read_tor_address("archipelago")
.await
.unwrap_or_default();
payload["from_did"] = my_did.map(serde_json::Value::String).unwrap_or_default();
payload["from_pubkey"] = serde_json::Value::String(my_pubkey.clone());
payload["from_onion"] = serde_json::Value::String(my_onion);
payload["from_name"] = data
.server_info
.name
.clone()
.map(serde_json::Value::String)
.unwrap_or_default();
let to_fips_npub =
crate::federation::fips_npub_for_onion(&self.config.data_dir, &peer.onion).await;
let identity_dir = self.config.data_dir.join("identity");
let signing_key = crate::identity::NodeIdentity::load_or_create(&identity_dir)
.await
.ok();
crate::node_message::send_to_peer(
&peer.onion,
to_fips_npub.as_deref(),
&my_pubkey,
&payload.to_string(),
signing_key.as_ref().map(|i| i.signing_key()),
Some(&peer.pubkey),
data.server_info.name.as_deref(),
)
.await
}
async fn notify(&self, title: &str, message: &str) {
let (mut data, _) = self.state_manager.get_snapshot().await;
data.notifications.push(Notification {
id: format!("bitcoin-relay-{}", uuid::Uuid::new_v4()),
level: NotificationLevel::Info,
title: title.to_string(),
message: message.to_string(),
timestamp: now(),
app_id: Some("bitcoin-knots".to_string()),
});
let len = data.notifications.len();
if len > 30 {
data.notifications.drain(0..len - 30);
}
self.state_manager.update_data(data).await;
}
}
pub(crate) async fn record_incoming_relay_message(
data_dir: &Path,
from_pubkey: &str,
from_name: Option<&str>,
payload: &serde_json::Value,
) -> Result<Option<&'static str>> {
let msg_type = payload.get("type").and_then(|v| v.as_str()).unwrap_or("");
match msg_type {
"bitcoin_relay_request" => {
let from_onion = payload
.get("from_onion")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let message = payload
.get("message")
.and_then(|v| v.as_str())
.map(sanitize_optional_text)
.transpose()?;
let remote_request_id = payload
.get("request_id")
.and_then(|v| v.as_str())
.unwrap_or_default();
let mut state = load_relay_state(data_dir).await?;
if !state.settings.allow_peer_requests {
return Ok(Some("bitcoin_relay_request_disabled"));
}
if !state.requests.iter().any(|r| {
r.direction == RelayRequestDirection::Incoming
&& r.peer_pubkey == from_pubkey
&& r.status == RelayRequestStatus::Pending
}) {
let timestamp = now();
state.requests.push(BitcoinRelayRequest {
id: if remote_request_id.is_empty() {
uuid::Uuid::new_v4().to_string()
} else {
remote_request_id.to_string()
},
direction: RelayRequestDirection::Incoming,
status: RelayRequestStatus::Pending,
peer_pubkey: from_pubkey.to_string(),
peer_onion: from_onion,
peer_name: from_name.map(String::from),
message,
approved_endpoint: None,
credential_secret_path: None,
created_at: timestamp.clone(),
updated_at: timestamp,
});
state.updated_at = Some(now());
save_relay_state(data_dir, &state).await?;
}
Ok(Some("bitcoin_relay_request"))
}
"bitcoin_relay_approved" | "bitcoin_relay_rejected" => {
let request_id = payload.get("request_id").and_then(|v| v.as_str());
let mut state = load_relay_state(data_dir).await?;
let status = if msg_type == "bitcoin_relay_approved" {
RelayRequestStatus::Approved
} else {
RelayRequestStatus::Rejected
};
let approved_access = if status == RelayRequestStatus::Approved {
save_peer_relay_access(data_dir, from_pubkey, payload).await?
} else {
None
};
if let Some(req) = state.requests.iter_mut().find(|r| {
r.direction == RelayRequestDirection::Outbound
&& r.peer_pubkey == from_pubkey
&& request_id.map(|id| id == r.id).unwrap_or(true)
}) {
req.status = status;
req.updated_at = now();
if let Some((endpoint, secret_path)) = approved_access {
req.approved_endpoint = Some(endpoint);
req.credential_secret_path = Some(secret_path);
}
state.updated_at = Some(now());
save_relay_state(data_dir, &state).await?;
}
Ok(Some(if msg_type == "bitcoin_relay_approved" {
"bitcoin_relay_approved"
} else {
"bitcoin_relay_rejected"
}))
}
_ => Ok(None),
}
}
fn trusted_relay_peers(
known_peers: &[peers::KnownPeer],
state: &BitcoinRelayState,
) -> Vec<TrustedRelayPeer> {
known_peers
.iter()
.map(|peer| TrustedRelayPeer {
pubkey: peer.pubkey.clone(),
onion: peer.onion.clone(),
name: peer.name.clone(),
relay_approved: state.requests.iter().any(|req| {
req.peer_pubkey == peer.pubkey && req.status == RelayRequestStatus::Approved
}),
})
.collect()
}
async fn txrelay_credential_status(data_dir: &Path) -> serde_json::Value {
let (password_path, rpcauth_path, client_env_path) = txrelay_secret_paths(data_dir);
let password_available = fs::metadata(&password_path).await.is_ok();
let rpcauth_available = fs::metadata(&rpcauth_path).await.is_ok();
let client_env_available = fs::metadata(&client_env_path).await.is_ok();
json!({
"username": TXRELAY_USER,
"available": password_available && rpcauth_available && client_env_available,
"password_available": password_available,
"rpcauth_available": rpcauth_available,
"client_env_available": client_env_available,
"client_env_path": client_env_path.display().to_string(),
"restart_hint": "If this was just generated, restart Bitcoin Core/Knots so bitcoind loads the txrelay rpcauth whitelist.",
})
}
async fn ensure_txrelay_credentials(data_dir: &Path) -> Result<TxRelayCredentials> {
let (password_path, rpcauth_path, client_env_path) = txrelay_secret_paths(data_dir);
let password = match read_trimmed(&password_path).await {
Some(value) => value,
None => {
let generated = generate_random_password();
write_secret_file(&password_path, &generated).await?;
generated
}
};
let rpcauth = match read_trimmed(&rpcauth_path).await {
Some(value) => value,
None => {
let generated = generate_rpcauth(TXRELAY_USER, &password);
write_secret_file(&rpcauth_path, &generated).await?;
generated
}
};
let client_env = format!(
"BITCOIN_RPC_TXRELAY_USER={}\nBITCOIN_RPC_TXRELAY_PASSWORD={}\nBITCOIN_RPC_TXRELAY_RPCAUTH={}\n",
TXRELAY_USER, password, rpcauth
);
write_secret_file(&client_env_path, &client_env).await?;
Ok(TxRelayCredentials {
username: TXRELAY_USER.to_string(),
password,
})
}
fn txrelay_secret_paths(data_dir: &Path) -> (PathBuf, PathBuf, PathBuf) {
let secrets_dir = data_dir.join("secrets");
(
secrets_dir.join(TXRELAY_PASSWORD_FILE),
secrets_dir.join(TXRELAY_RPCAUTH_FILE),
secrets_dir.join(TXRELAY_CLIENT_ENV_FILE),
)
}
async fn read_trimmed(path: &Path) -> Option<String> {
fs::read_to_string(path)
.await
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
async fn write_secret_file(path: &Path, contents: &str) -> Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).await?;
}
fs::write(path, contents).await?;
set_private_permissions(path).await;
Ok(())
}
async fn set_private_permissions(path: &Path) {
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
let _ = fs::set_permissions(path, std::fs::Permissions::from_mode(0o600)).await;
}
}
fn generate_random_password() -> String {
let mut bytes = [0u8; 32];
rand::rngs::OsRng.fill_bytes(&mut bytes);
BASE64.encode(bytes)
}
fn generate_rpcauth(username: &str, password: &str) -> String {
let mut salt_bytes = [0u8; 16];
rand::rngs::OsRng.fill_bytes(&mut salt_bytes);
let salt_hex = hex::encode(salt_bytes);
let mut mac =
Hmac::<Sha256>::new_from_slice(salt_hex.as_bytes()).expect("HMAC accepts any key length");
mac.update(password.as_bytes());
let hash_hex = hex::encode(mac.finalize().into_bytes());
format!("{username}:{salt_hex}${hash_hex}")
}
fn preferred_endpoint(settings: &BitcoinRelaySettings) -> Option<String> {
if settings.allow_https {
if let Some(endpoint) = settings.https_endpoint.clone() {
return Some(endpoint);
}
}
if settings.allow_tor {
if let Some(endpoint) = settings.tor_endpoint.clone() {
return Some(endpoint);
}
}
if settings.allow_http {
if let Some(endpoint) = settings.http_endpoint.clone() {
return Some(endpoint);
}
}
settings
.https_endpoint
.clone()
.or_else(|| settings.tor_endpoint.clone())
.or_else(|| settings.http_endpoint.clone())
}
fn relay_response_payload(
message_type: &str,
request_id: &str,
request_direction: RelayRequestDirection,
endpoint: Option<&str>,
credentials: Option<&TxRelayCredentials>,
) -> serde_json::Value {
let mut payload = json!({
"type": message_type,
"request_id": request_id,
});
if message_type == "bitcoin_relay_approved"
&& request_direction == RelayRequestDirection::Incoming
{
if let (Some(endpoint), Some(credentials)) = (endpoint, credentials) {
payload["relay_access"] = json!({
"endpoint": endpoint,
"username": &credentials.username,
"password": &credentials.password,
});
}
}
payload
}
async fn save_peer_relay_access(
data_dir: &Path,
from_pubkey: &str,
payload: &serde_json::Value,
) -> Result<Option<(String, String)>> {
let Some(access) = payload.get("relay_access") else {
return Ok(None);
};
let endpoint = access
.get("endpoint")
.and_then(|v| v.as_str())
.map(validate_endpoint)
.transpose()?;
let username = access.get("username").and_then(|v| v.as_str());
let password = access.get("password").and_then(|v| v.as_str());
let (Some(endpoint), Some(username), Some(password)) = (endpoint, username, password) else {
return Ok(None);
};
validate_env_value(username)?;
validate_env_value(password)?;
let secret_path = data_dir.join("secrets").join(format!(
"bitcoin-relay-peer-{}.env",
safe_pubkey_fragment(from_pubkey)
));
let contents = format!(
"BITCOIN_RELAY_PEER_PUBKEY={}\nBITCOIN_RELAY_ENDPOINT={}\nBITCOIN_RELAY_USERNAME={}\nBITCOIN_RELAY_PASSWORD={}\n",
from_pubkey, endpoint, username, password
);
write_secret_file(&secret_path, &contents).await?;
Ok(Some((endpoint, secret_path.display().to_string())))
}
fn validate_env_value(value: &str) -> Result<()> {
if value.is_empty() || value.len() > 1024 || value.contains('\n') || value.contains('\r') {
anyhow::bail!("Invalid relay credential value");
}
Ok(())
}
fn safe_pubkey_fragment(pubkey: &str) -> String {
let fragment = pubkey
.chars()
.filter(|c| c.is_ascii_hexdigit())
.take(24)
.collect::<String>();
if fragment.is_empty() {
"unknown".to_string()
} else {
fragment
}
}
async fn hydrate_tor_endpoint(data_dir: &Path, state: &mut BitcoinRelayState) {
if state.settings.tor_endpoint.is_some() {
return;
}
if let Some(onion) = docker_packages::read_tor_address("bitcoin-rpc").await {
let onion = onion.trim().trim_end_matches('/').to_string();
if !onion.is_empty() {
state.settings.tor_endpoint = Some(format!("http://{onion}/"));
let _ = save_relay_state(data_dir, state).await;
}
}
}
async fn local_sync_status() -> serde_json::Value {
let status = bitcoin_status::get_bitcoin_status().await;
let blockchain = status.blockchain_info.as_ref();
let blocks = blockchain
.and_then(|v| v.get("blocks"))
.and_then(|v| v.as_u64())
.unwrap_or(0);
let headers = blockchain
.and_then(|v| v.get("headers"))
.and_then(|v| v.as_u64())
.unwrap_or(0);
let initial_block_download = blockchain
.and_then(|v| v.get("initialblockdownload"))
.and_then(|v| v.as_bool())
.unwrap_or(true);
let synced =
status.ok && headers > 0 && blocks >= headers.saturating_sub(1) && !initial_block_download;
json!({
"synced": synced,
"blocks": blocks,
"headers": headers,
"chain": blockchain
.and_then(|v| v.get("chain"))
.and_then(|v| v.as_str())
.unwrap_or("unknown"),
"status_ok": status.ok,
"status_stale": status.stale,
"error": status.error,
})
}
async fn load_relay_state(data_dir: &Path) -> Result<BitcoinRelayState> {
let path = state_path(data_dir);
if !path.exists() {
return Ok(BitcoinRelayState::default());
}
let content = fs::read_to_string(&path)
.await
.with_context(|| format!("Failed to read {}", path.display()))?;
Ok(serde_json::from_str(&content).unwrap_or_default())
}
async fn save_relay_state(data_dir: &Path, state: &BitcoinRelayState) -> Result<()> {
let dir = data_dir.join(RELAY_DIR);
fs::create_dir_all(&dir).await?;
let content = serde_json::to_string_pretty(state)?;
fs::write(dir.join(RELAY_STATE_FILE), content).await?;
Ok(())
}
fn state_path(data_dir: &Path) -> PathBuf {
data_dir.join(RELAY_DIR).join(RELAY_STATE_FILE)
}
fn update_bool(params: &serde_json::Value, key: &str, target: &mut bool) {
if let Some(value) = params.get(key).and_then(|v| v.as_bool()) {
*target = value;
}
}
fn update_endpoint(
params: &serde_json::Value,
key: &str,
target: &mut Option<String>,
) -> Result<()> {
if !params.get(key).is_some() {
return Ok(());
}
let endpoint = params
.get(key)
.and_then(|v| v.as_str())
.map(str::trim)
.filter(|s| !s.is_empty());
*target = endpoint.map(validate_endpoint).transpose()?;
Ok(())
}
fn validate_endpoint(endpoint: &str) -> Result<String> {
if endpoint.len() > 512 || endpoint.contains('\n') || endpoint.contains('\r') {
anyhow::bail!("Invalid endpoint");
}
let lower = endpoint.to_ascii_lowercase();
if !(lower.starts_with("http://") || lower.starts_with("https://")) {
anyhow::bail!("Endpoint must start with http:// or https://");
}
Ok(endpoint.to_string())
}
fn sanitize_optional_text(value: &str) -> Result<String> {
let value = value.trim();
if value.len() > 500 || value.contains('\0') {
anyhow::bail!("Invalid message");
}
Ok(value.to_string())
}
fn now() -> String {
chrono::Utc::now().to_rfc3339()
}

View File

@ -4,8 +4,9 @@ use super::RpcHandler;
use anyhow::{Context, Result};
use std::time::Duration;
const PODMAN_INSPECT_TIMEOUT: Duration = Duration::from_secs(10);
const PODMAN_PS_TIMEOUT: Duration = Duration::from_secs(10);
const PODMAN_INSPECT_TIMEOUT: Duration = Duration::from_secs(5);
const PODMAN_PS_TIMEOUT: Duration = Duration::from_secs(5);
const ORCHESTRATOR_HEALTH_TIMEOUT: Duration = Duration::from_secs(5);
impl RpcHandler {
pub(super) async fn handle_container_install(
@ -171,36 +172,60 @@ impl RpcHandler {
// between "installed" and "not-installed" in the UI.
let (data, _) = self.state_manager.get_snapshot().await;
if data.server_info.status_info.containers_scanned && !data.package_data.is_empty() {
let containers: Vec<serde_json::Value> = data
.package_data
.iter()
.map(|(id, pkg)| {
let mut containers = Vec::with_capacity(data.package_data.len());
for (id, pkg) in &data.package_data {
// Keep this mapping in sync with the UI's
// ContainerStatus.state union in
// neode-ui/src/api/container-client.ts. The UI maps
// transitional variants to single-button labels
// (Stopping… / Starting… / Restarting…).
let state = match &pkg.state {
crate::data_model::PackageState::Running => "running",
crate::data_model::PackageState::Stopped => "stopped",
crate::data_model::PackageState::Exited => "exited",
crate::data_model::PackageState::Starting => "starting",
crate::data_model::PackageState::Stopping => "stopping",
crate::data_model::PackageState::Restarting => "restarting",
crate::data_model::PackageState::Installing => "installing",
crate::data_model::PackageState::Installed => "installed",
crate::data_model::PackageState::Updating => "updating",
crate::data_model::PackageState::Removing => "removing",
crate::data_model::PackageState::CreatingBackup => "creating-backup",
crate::data_model::PackageState::RestoringBackup => "restoring-backup",
crate::data_model::PackageState::BackingUp => "backing-up",
let mut state = match &pkg.state {
crate::data_model::PackageState::Running => "running".to_string(),
crate::data_model::PackageState::Stopped => "stopped".to_string(),
crate::data_model::PackageState::Exited => "exited".to_string(),
crate::data_model::PackageState::Starting => "starting".to_string(),
crate::data_model::PackageState::Stopping => "stopping".to_string(),
crate::data_model::PackageState::Restarting => "restarting".to_string(),
crate::data_model::PackageState::Installing => "installing".to_string(),
crate::data_model::PackageState::Installed => "installed".to_string(),
crate::data_model::PackageState::Updating => "updating".to_string(),
crate::data_model::PackageState::Removing => "removing".to_string(),
crate::data_model::PackageState::CreatingBackup => {
"creating-backup".to_string()
}
crate::data_model::PackageState::RestoringBackup => {
"restoring-backup".to_string()
}
crate::data_model::PackageState::BackingUp => "backing-up".to_string(),
};
// Scanner backoff preserves cached package_data. Refresh stable
// states so callers do not see stale `running`/`exited` after
// health-monitor recovery or Quadlet --rm container removal.
if state == "running" && requires_launch_port_for_health(id) {
if !self.cached_reachable_health(id).await?.is_some() {
state = live_state_for_app(id)
.await
.unwrap_or("starting".to_string());
}
} else if should_refresh_cached_state(&state) {
if launch_port_reachable(id).await {
state = "running".to_string();
} else {
if let Some(live) = live_state_for_app(id).await {
state = live;
} else if quadlet_service_active(id).await {
state = "starting".to_string();
}
}
}
let lan = pkg
.installed
.as_ref()
.and_then(|i| i.interface_addresses.get("main"))
.and_then(|a| a.lan_address.as_deref());
serde_json::json!({
containers.push(serde_json::json!({
"id": id,
"name": id,
"state": state,
@ -208,9 +233,8 @@ impl RpcHandler {
"created": "",
"ports": [],
"lan_address": lan,
})
})
.collect();
}));
}
return Ok(serde_json::json!(containers));
}
@ -383,15 +407,33 @@ impl RpcHandler {
// If app_id is provided, get health for that app.
if let Some(params) = params {
if let Some(app_id) = params.get("app_id").and_then(|v| v.as_str()) {
if let Some(health) = self.cached_reachable_health(app_id).await? {
return Ok(serde_json::json!({ app_id: health }));
}
if let Some(health) = self.cached_state_health(app_id).await {
return Ok(serde_json::json!({ app_id: health }));
}
if requires_launch_port_for_health(app_id) {
return Ok(serde_json::json!({ app_id: "starting" }));
}
if let Some(health) = self.stack_health(app_id).await? {
return Ok(serde_json::json!({ app_id: health }));
}
let mut last_err: Option<anyhow::Error> = None;
for candidate in status_app_id_candidates(app_id) {
match orchestrator.health(&candidate).await {
Ok(health) => return Ok(serde_json::json!({ app_id: health })),
Err(e) => last_err = Some(e),
match tokio::time::timeout(
ORCHESTRATOR_HEALTH_TIMEOUT,
orchestrator.health(&candidate),
)
.await
{
Ok(Ok(health)) => return Ok(serde_json::json!({ app_id: health })),
Ok(Err(e)) => last_err = Some(e),
Err(_) => {}
}
}
for name in status_container_name_candidates(app_id) {
@ -424,14 +466,19 @@ impl RpcHandler {
.and_then(|s| s.strip_suffix("-dev"))
.or_else(|| container.name.strip_prefix("archy-"))
.unwrap_or(container.name.as_str());
match orchestrator.health(app_id_candidate).await {
Ok(health) => {
match tokio::time::timeout(
ORCHESTRATOR_HEALTH_TIMEOUT,
orchestrator.health(app_id_candidate),
)
.await
{
Ok(Ok(health)) => {
health_map.insert(
app_id_candidate.to_string(),
serde_json::Value::String(health),
);
}
Err(_) => {
Ok(Err(_)) | Err(_) => {
health_map.insert(
app_id_candidate.to_string(),
serde_json::Value::String("unknown".to_string()),
@ -443,6 +490,65 @@ impl RpcHandler {
Ok(serde_json::Value::Object(health_map))
}
async fn cached_state_health(&self, app_id: &str) -> Option<&'static str> {
let (data, _) = self.state_manager.get_snapshot().await;
let Some(pkg) = data.package_data.get(app_id) else {
if data.server_info.status_info.containers_scanned {
return Some("stopped");
}
return None;
};
match pkg.state {
crate::data_model::PackageState::Running => None,
crate::data_model::PackageState::Installing
| crate::data_model::PackageState::Installed
| crate::data_model::PackageState::Starting => Some("starting"),
crate::data_model::PackageState::Stopping
| crate::data_model::PackageState::Stopped
| crate::data_model::PackageState::Exited => Some("stopped"),
crate::data_model::PackageState::Removing => Some("removing"),
crate::data_model::PackageState::Restarting
| crate::data_model::PackageState::Updating
| crate::data_model::PackageState::CreatingBackup
| crate::data_model::PackageState::RestoringBackup
| crate::data_model::PackageState::BackingUp => Some("starting"),
}
}
async fn cached_reachable_health(&self, app_id: &str) -> Result<Option<String>> {
let (data, _) = self.state_manager.get_snapshot().await;
let pkg = data.package_data.get(app_id);
if matches!(
pkg.map(|pkg| &pkg.state),
Some(crate::data_model::PackageState::Removing)
) {
return Ok(None);
}
let url = pkg
.and_then(|pkg| pkg.installed.as_ref())
.and_then(|i| i.interface_addresses.get("main"))
.and_then(|a| a.lan_address.as_deref())
.map(ToOwned::to_owned)
.or_else(|| health_probe_url_for_app(app_id));
let Some(url) = url else {
return Ok(None);
};
if url.starts_with("http://") || url.starts_with("https://") {
return Ok(http_launch_url_reachable(&url)
.await
.then(|| "healthy".to_string()));
}
let Some(port) = port_from_url(&url) else {
return Ok(None);
};
Ok(launch_port_reachable_by_port(port)
.await
.then(|| "healthy".to_string()))
}
async fn stack_health(&self, app_id: &str) -> Result<Option<String>> {
let Some(members) = stack_health_members(app_id) else {
return Ok(None);
@ -469,8 +575,14 @@ impl RpcHandler {
}
if saw_unknown {
if let Some(health) = self.cached_reachable_health(app_id).await? {
return Ok(Some(health));
}
Ok(Some("unknown".to_string()))
} else if saw_starting {
if let Some(health) = self.cached_reachable_health(app_id).await? {
return Ok(Some(health));
}
Ok(Some("starting".to_string()))
} else {
Ok(Some("healthy".to_string()))
@ -482,7 +594,9 @@ async fn member_health(
orchestrator: &dyn crate::container::traits::ContainerOrchestrator,
app_id: &str,
) -> Result<String> {
if let Ok(health) = orchestrator.health(app_id).await {
if let Ok(Ok(health)) =
tokio::time::timeout(ORCHESTRATOR_HEALTH_TIMEOUT, orchestrator.health(app_id)).await
{
return Ok(health);
}
for name in status_container_name_candidates(app_id) {
@ -508,10 +622,8 @@ fn stack_health_members(app_id: &str) -> Option<&'static [&'static str]> {
"indeedhub-minio",
"indeedhub-relay",
"indeedhub-api",
"indeedhub-ffmpeg",
"indeedhub",
]),
"fedimint" => Some(&["fedimint"]),
_ => None,
}
}
@ -583,6 +695,115 @@ fn status_container_name_candidates(app_id: &str) -> Vec<String> {
out
}
fn should_refresh_cached_state(state: &str) -> bool {
matches!(state, "exited" | "stopped" | "stopping")
}
async fn live_state_for_app(app_id: &str) -> Option<String> {
for name in status_container_name_candidates(app_id) {
if let Some(live) = inspect_container_state_value(&name).await {
if let Some(live_state) = live.get("state").and_then(|v| v.as_str()) {
return Some(live_state.to_string());
}
}
}
None
}
async fn quadlet_service_active(app_id: &str) -> bool {
for name in status_container_name_candidates(app_id) {
let service = format!("{name}.service");
let mut cmd = tokio::process::Command::new("systemctl");
cmd.args(["--user", "is-active", "--quiet", &service]);
cmd.kill_on_drop(true);
if matches!(
tokio::time::timeout(Duration::from_secs(2), cmd.status()).await,
Ok(Ok(status)) if status.success()
) {
return true;
}
}
false
}
fn health_probe_url_for_app(app_id: &str) -> Option<String> {
let port = match app_id {
"bitcoin-ui" => 8334,
"botfights" => 9100,
"btcpay-server" | "btcpay" | "btcpayserver" => 23000,
"dwn" => 3100,
"electrumx" | "electrs" | "mempool-electrs" | "electrs-ui" => 50002,
"fedimint" | "fedimintd" => 8175,
"filebrowser" => 8083,
"gitea" => 3001,
"grafana" => 3000,
"homeassistant" | "home-assistant" => 8123,
"immich" | "immich_server" => 2283,
"indeedhub" => 7778,
"jellyfin" => 8096,
"lnd" | "lnd-ui" => 18083,
"mempool" | "mempool-web" => 4080,
"nginx-proxy-manager" => 8081,
"ollama" => 11434,
"photoprism" => 2342,
"portainer" => 9000,
"searxng" => 8888,
"tailscale" => 8240,
"uptime-kuma" => 3002,
"vaultwarden" => 8082,
_ => return None,
};
Some(format!("http://localhost:{port}"))
}
fn requires_launch_port_for_health(app_id: &str) -> bool {
matches!(app_id, "fedimint" | "fedimintd" | "fedimint-gateway")
}
async fn launch_port_reachable(app_id: &str) -> bool {
let Some(port) = health_probe_url_for_app(app_id).and_then(|url| port_from_url(&url)) else {
return false;
};
launch_port_reachable_by_port(port).await
}
async fn launch_port_reachable_by_port(port: u16) -> bool {
matches!(
tokio::time::timeout(
Duration::from_secs(2),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await,
Ok(Ok(_))
)
}
async fn http_launch_url_reachable(url: &str) -> bool {
let Ok(client) = reqwest::Client::builder()
.timeout(Duration::from_secs(2))
.redirect(reqwest::redirect::Policy::none())
.build()
else {
return false;
};
match client.get(url).send().await {
Ok(response) => {
let status = response.status();
status.is_success() || status.is_redirection()
}
Err(_) => false,
}
}
fn port_from_url(url: &str) -> Option<u16> {
let after_colon = url.rsplit_once(':')?.1;
let port = after_colon
.chars()
.take_while(|c| c.is_ascii_digit())
.collect::<String>();
port.parse::<u16>().ok()
}
async fn inspect_container_state_value(name: &str) -> Option<serde_json::Value> {
if let Some(v) = ps_container_state_value(name).await {
return Some(v);

View File

@ -98,6 +98,20 @@ impl RpcHandler {
// Bitcoin & Lightning deep data
"bitcoin.getinfo" => self.handle_bitcoin_getinfo().await,
"bitcoin.relay-status" => self.handle_bitcoin_relay_status().await,
"bitcoin.relay-update-settings" => {
self.handle_bitcoin_relay_update_settings(params).await
}
"bitcoin.relay-request-peer" => self.handle_bitcoin_relay_request_peer(params).await,
"bitcoin.relay-approve-request" => {
self.handle_bitcoin_relay_approve_request(params).await
}
"bitcoin.relay-reject-request" => {
self.handle_bitcoin_relay_reject_request(params).await
}
"bitcoin.relay-create-tor-service" => {
self.handle_bitcoin_relay_create_tor_service().await
}
"bitcoin.init-wallet-from-seed" => {
self.handle_bitcoin_init_wallet_from_seed(params).await
}

View File

@ -23,10 +23,15 @@ impl RpcHandler {
.await
.context("Failed to parse newaddress response")?;
if let Some(error) = body.get("error").and_then(|v| v.as_str()) {
anyhow::bail!("LND could not generate an address: {}", error);
}
let address = body
.get("address")
.and_then(|v| v.as_str())
.unwrap_or("")
.filter(|addr| !addr.trim().is_empty())
.ok_or_else(|| anyhow::anyhow!("LND did not return a Bitcoin address. The wallet may still be locked, uninitialized, or waiting for Bitcoin to sync."))?
.to_string();
Ok(serde_json::json!({ "address": address }))

View File

@ -2,6 +2,7 @@ mod analytics;
mod auth;
mod backup_rpc;
mod bitcoin;
pub(crate) mod bitcoin_relay;
mod container;
mod content;
mod credentials;
@ -302,6 +303,7 @@ impl RpcHandler {
| "system.stats"
| "tor.status"
| "tor.onion-addresses"
| "bitcoin.relay-status"
| "federation.list-nodes"
| "system.get-settings"
| "system.get-node-key"

View File

@ -3,7 +3,7 @@ use crate::port_allocator::PortAllocator;
use anyhow::{Context, Result};
use std::time::Duration;
const PODMAN_LIST_TIMEOUT: Duration = Duration::from_secs(15);
const PODMAN_LIST_TIMEOUT: Duration = Duration::from_secs(60);
fn is_platform_managed_app(app_id: &str) -> bool {
matches!(
@ -31,7 +31,6 @@ fn is_platform_managed_app(app_id: &str) -> bool {
| "fedimint"
| "fedimint-gateway"
| "indeedhub"
| "saleor"
| "immich"
)
}
@ -501,15 +500,6 @@ pub(super) fn all_container_names(package_id: &str) -> Vec<String> {
"netbird-dashboard".into(),
"netbird-server".into(),
],
"saleor" => vec![
"saleor-db".into(),
"saleor-cache".into(),
"saleor-api".into(),
"saleor-worker".into(),
"saleor-jaeger".into(),
"saleor-mailpit".into(),
"saleor".into(),
],
"nostr-vpn" => vec![
"nostr-vpn".into(),
"archy-nostr-vpn".into(),
@ -599,7 +589,6 @@ pub(super) fn get_data_dirs_for_app(package_id: &str) -> Vec<String> {
format!("{}/penpot-postgres", base),
],
"netbird" => vec![format!("{}/netbird", base)],
"saleor" => vec![format!("{}/saleor", base), format!("{}/saleor-db", base)],
_ => vec![format!("{}/{}", base, package_id)],
}
}
@ -977,6 +966,7 @@ pub(super) async fn get_app_config(
vec![
"/var/lib/archipelago/portainer:/data".to_string(),
"/run/user/1000/podman/podman.sock:/var/run/docker.sock".to_string(),
"/var/lib/archipelago/portainer/compose:/data/compose".to_string(),
],
vec![],
None,
@ -1006,7 +996,7 @@ pub(super) async fn get_app_config(
Some(vec![
"sh".to_string(),
"-c".to_string(),
"tailscaled --tun=userspace-networking & sleep 2; tailscale web --listen 0.0.0.0:8240 & wait".to_string(),
"tailscaled --tun=userspace-networking & for i in $(seq 1 30); do [ -S /var/run/tailscale/tailscaled.sock ] && break; sleep 1; done; tailscale web --listen 0.0.0.0:8240 & wait".to_string(),
]),
),
"fedimint" => (
@ -1079,13 +1069,6 @@ pub(super) async fn get_app_config(
None,
None,
),
"saleor" => (
vec!["9010:80".to_string(), "8000:8000".to_string()],
vec!["/var/lib/archipelago/saleor:/app/media".to_string()],
vec![],
None,
None,
),
"nostr-rs-relay" => (
vec!["18081:8080".to_string()],
vec!["/var/lib/archipelago/nostr-rs-relay:/usr/src/app/db".to_string()],

View File

@ -289,15 +289,6 @@ pub(super) fn startup_order(package_id: &str) -> &'static [&'static str] {
&["archy-btcpay-db", "archy-nbxplorer", "btcpay-server"]
}
"netbird" => &["netbird-server", "netbird-dashboard", "netbird"],
"saleor" => &[
"saleor-db",
"saleor-cache",
"saleor-jaeger",
"saleor-mailpit",
"saleor-api",
"saleor-worker",
"saleor",
],
"penpot" | "penpot-frontend" => &[
"penpot-postgres",
"penpot-valkey",

View File

@ -13,11 +13,12 @@ use crate::api::rpc::RpcHandler;
use crate::data_model::InstallPhase;
use crate::update::host_sudo;
use anyhow::{Context, Result};
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
use tokio::time::{timeout, Duration};
use tracing::{debug, info, warn};
const INSTALL_LOG: &str = "/var/log/archipelago/container-installs.log";
const IMAGE_INSPECT_TIMEOUT: Duration = Duration::from_secs(10);
/// Append a timestamped line to the persistent install log.
pub(in crate::api::rpc) async fn install_log(msg: &str) {
@ -34,6 +35,36 @@ pub(in crate::api::rpc) async fn install_log(msg: &str) {
}
}
async fn local_podman_image_exists(image: &str) -> Result<bool> {
let mut cmd = tokio::process::Command::new("podman");
cmd.args(["image", "inspect", image]);
cmd.kill_on_drop(true);
let output = timeout(IMAGE_INSPECT_TIMEOUT, cmd.output())
.await
.with_context(|| {
format!(
"podman image inspect {} timed out after {}s",
image,
IMAGE_INSPECT_TIMEOUT.as_secs()
)
})?
.with_context(|| format!("Failed to execute podman image inspect {}", image))?;
match output.status.code() {
Some(0) => Ok(true),
Some(1) => Ok(false),
Some(code) => Err(anyhow::anyhow!(
"podman image inspect {} exited with {}: {}",
image,
code,
String::from_utf8_lossy(&output.stderr).trim()
)),
None => Err(anyhow::anyhow!(
"podman image inspect {} terminated by signal",
image
)),
}
}
pub(super) async fn patch_indeedhub_nostr_provider() {
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
@ -244,10 +275,6 @@ impl RpcHandler {
if package_id == "netbird" {
return self.install_netbird_stack().await;
}
if package_id == "saleor" {
return self.install_saleor_stack().await;
}
// Dependency checks. Prefer the scanner's cached package state so a
// congested Podman API does not turn an already-running dependency into
// a false install failure. Fall back to a bounded direct Podman probe
@ -447,6 +474,7 @@ impl RpcHandler {
Ok(container_name) => {
self.set_install_phase(package_id, InstallPhase::WaitingHealthy)
.await;
ensure_host_port_listener(package_id, &container_name, &[]).await?;
crate::api::rpc::package::runtime::reconcile_companions_for(package_id)
.await;
install_log(&format!(
@ -652,10 +680,6 @@ impl RpcHandler {
self.write_lnd_conf(&rpc_user, &rpc_pass).await?;
}
if package_id == "portainer" {
ensure_user_podman_socket().await?;
}
// Pre-install: SearXNG settings.yml (required or container exits immediately)
if package_id == "searxng" {
let searx_dir = "/var/lib/archipelago/searxng";
@ -748,16 +772,10 @@ impl RpcHandler {
.await;
debug!("Running container with args: {:?}", run_args);
// Build command with optional custom command/args
let mut cmd = tokio::process::Command::new("podman");
cmd.args(&run_args);
if let Some(custom_cmd) = custom_command {
cmd.arg(custom_cmd);
} else if let Some(args) = custom_args {
cmd.args(args);
}
let mut run_output = cmd.output().await.context("Failed to run container")?;
let command_tail = install_command_tail(custom_command.as_deref(), custom_args.as_ref());
let mut run_output = podman_run_for_install(package_id, &run_args, &command_tail)
.await
.context("Failed to run container")?;
if !run_output.status.success() {
let stderr = String::from_utf8_lossy(&run_output.stderr).to_string();
@ -766,7 +784,9 @@ impl RpcHandler {
.args(["rm", "-f", container_name])
.output()
.await;
run_output = cmd.output().await.context("Failed to rerun container")?;
run_output = podman_run_for_install(package_id, &run_args, &command_tail)
.await
.context("Failed to rerun container")?;
}
}
@ -922,12 +942,7 @@ impl RpcHandler {
let is_local_image = docker_image.starts_with("localhost/");
let has_local_fallback = if !is_local_image {
let local_tag = format!("localhost/{}:latest", package_id);
let check = tokio::process::Command::new("podman")
.args(["images", "-q", &local_tag])
.output()
.await
.ok();
check.is_some_and(|o| !String::from_utf8_lossy(&o.stdout).trim().is_empty())
local_podman_image_exists(&local_tag).await.unwrap_or(false)
} else {
false
};
@ -942,14 +957,9 @@ impl RpcHandler {
);
} else {
// Local image — verify it exists
let images_output = tokio::process::Command::new("podman")
.args(["images", "-q", docker_image])
.output()
if !local_podman_image_exists(docker_image)
.await
.context("Failed to check local image")?;
if String::from_utf8_lossy(&images_output.stdout)
.trim()
.is_empty()
.context("Failed to check local image")?
{
return Err(anyhow::anyhow!(
"Local image {} not found. Build the image first \
@ -1139,12 +1149,10 @@ impl RpcHandler {
}
// Verify image exists locally after pull.
let verify = tokio::process::Command::new("podman")
.args(["images", "-q", docker_image])
.output()
if !local_podman_image_exists(docker_image)
.await
.context("Failed to verify pulled image")?;
if String::from_utf8_lossy(&verify.stdout).trim().is_empty() {
.context("Failed to verify pulled image")?
{
return Err(anyhow::anyhow!(
"Image {} not found locally after pull",
docker_image
@ -1278,11 +1286,13 @@ impl RpcHandler {
// set `prune=N` in bitcoin.conf themselves after install.
let bitcoin_conf = format!(
"\
# rpcauth: salted hash only no plaintext password in config or CLI\n\
# rpcauth: salted hash only - no plaintext password in config or CLI\n\
{}\n\
server=1\n\
rpcallowip=0.0.0.0/0\n\
listen=1\n\
rpcthreads=16\n\
rpcworkqueue=256\n\
printtoconsole=1\n",
rpcauth_line
);
@ -1871,29 +1881,34 @@ autopilot.active=false\n",
.unwrap_or_default();
super::validation::validate_app_id(app_id)?;
match app_id {
"saleor" => {
if app_id == "filebrowser" {
let password =
tokio::fs::read_to_string("/var/lib/archipelago/secrets/saleor-admin-password")
tokio::fs::read_to_string("/var/lib/archipelago/secrets/filebrowser/password")
.await
.unwrap_or_default()
.trim()
.to_string();
if password.is_empty() {
return Ok(serde_json::json!({ "credentials": [] }));
}
Ok(serde_json::json!({
"title": "Saleor admin login",
"description": "Saleor opens to its own dashboard login. Use this generated admin account to sign in.",
.map(|p| p.trim().to_string())
.unwrap_or_else(|_| "admin".to_string());
return Ok(serde_json::json!({
"title": "File Browser credentials",
"description": "Use these credentials when File Browser asks you to sign in.",
"credentials": [
{ "label": "Email", "value": "admin@example.com", "sensitive": false },
{ "label": "Username", "value": "admin" },
{ "label": "Password", "value": password, "sensitive": true }
]
}))
}));
}
_ => Ok(serde_json::json!({ "credentials": [] })),
if app_id == "photoprism" {
return Ok(serde_json::json!({
"title": "PhotoPrism credentials",
"description": "Use these credentials when PhotoPrism asks you to sign in.",
"credentials": [
{ "label": "Username", "value": "admin" },
{ "label": "Password", "value": "archipelago", "sensitive": true }
]
}));
}
Ok(serde_json::json!({ "credentials": [] }))
}
}
@ -1914,10 +1929,128 @@ async fn cleanup_stale_package_ports(package_id: &str) {
cleanup_stale_pasta_port("8444").await;
}
"nextcloud" => cleanup_stale_pasta_port("8085").await,
"portainer" => cleanup_stale_pasta_port("9000").await,
_ => {}
}
}
fn install_command_tail(
custom_cmd: Option<&str>,
custom_args: Option<&Vec<String>>,
) -> Vec<String> {
if let Some(cmd) = custom_cmd {
vec![cmd.to_string()]
} else if let Some(args) = custom_args {
args.clone()
} else {
Vec::new()
}
}
async fn podman_run_for_install(
package_id: &str,
run_args: &[&str],
command_tail: &[String],
) -> Result<std::process::Output> {
if should_scope_podman_run(package_id) {
match podman_create_then_scoped_start(package_id, run_args, command_tail).await {
Ok(output) => return Ok(output),
Err(err) => {
tracing::warn!(package_id, error = %err, "scoped podman create/start failed; falling back to direct podman run");
}
}
}
let mut cmd = tokio::process::Command::new("podman");
cmd.args(run_args);
cmd.args(command_tail);
cmd.output().await.context("Failed to run podman")
}
async fn podman_create_then_scoped_start(
package_id: &str,
run_args: &[&str],
command_tail: &[String],
) -> Result<std::process::Output> {
let container_name = run_args
.windows(2)
.find_map(|pair| (pair[0] == "--name").then_some(pair[1]))
.unwrap_or(package_id);
let mut create_args = Vec::with_capacity(run_args.len() + command_tail.len());
for (idx, arg) in run_args.iter().enumerate() {
if idx == 0 && *arg == "run" {
create_args.push("create".to_string());
} else if *arg != "-d" {
create_args.push((*arg).to_string());
}
}
create_args.extend(command_tail.iter().cloned());
let mut create = tokio::process::Command::new("podman");
create.args(&create_args);
let create_output = create
.output()
.await
.context("Failed to run podman create")?;
if !create_output.status.success() {
return Ok(create_output);
}
let mut scoped_start = tokio::process::Command::new("systemd-run");
scoped_start.args([
"--user",
"--scope",
"--quiet",
"--collect",
"podman",
"start",
container_name,
]);
match scoped_start.output().await {
Ok(output) if output.status.success() => Ok(create_output),
Ok(output) => {
tracing::warn!(
package_id,
container = container_name,
stderr = %String::from_utf8_lossy(&output.stderr).trim(),
"scoped podman start after create failed; trying direct podman start"
);
let mut direct_start = tokio::process::Command::new("podman");
direct_start.args(["start", container_name]);
let direct_output = direct_start
.output()
.await
.context("Failed to run fallback podman start")?;
if direct_output.status.success() {
Ok(create_output)
} else {
Ok(direct_output)
}
}
Err(err) => Err(err).context("Failed to run scoped podman start"),
}
}
fn should_scope_podman_run(package_id: &str) -> bool {
matches!(
package_id,
"botfights"
| "filebrowser"
| "gitea"
| "grafana"
| "homeassistant"
| "home-assistant"
| "jellyfin"
| "nginx-proxy-manager"
| "nostr-rs-relay"
| "photoprism"
| "portainer"
| "searxng"
| "uptime-kuma"
| "vaultwarden"
)
}
async fn cleanup_start_conflict(package_id: &str, stderr: &str) -> bool {
if stderr.contains("name is already in use") || stderr.contains("name \"") {
return true;
@ -1968,6 +2101,12 @@ async fn cleanup_start_conflict(package_id: &str, stderr: &str) -> bool {
cleanup_stale_pasta_port("8085").await;
true
}
"portainer"
if stderr.contains("pasta failed") || stderr.contains("address already in use") =>
{
cleanup_stale_pasta_port("9000").await;
true
}
_ => false,
}
}
@ -2026,7 +2165,7 @@ async fn ensure_host_port_listener(
return Ok(());
};
if wait_for_host_port(port, 10).await {
if wait_for_host_port(package_id, port, 10).await {
return Ok(());
}
@ -2052,7 +2191,7 @@ async fn ensure_host_port_listener(
));
}
if wait_for_host_port(port, 60).await {
if wait_for_host_port(package_id, port, 60).await {
install_log(&format!(
"INSTALL REPAIR OK: {} — host port {} is listening after restart",
package_id, port
@ -2084,31 +2223,6 @@ fn published_host_port(container_name: &str) -> Option<u16> {
})
}
async fn ensure_user_podman_socket() -> Result<()> {
let socket_path = "/run/user/1000/podman/podman.sock";
if tokio::fs::try_exists(socket_path).await.unwrap_or(false) {
return Ok(());
}
let status = tokio::process::Command::new("systemctl")
.args(["--user", "restart", "podman.socket"])
.status()
.await
.context("spawn systemctl --user restart podman.socket")?;
if !status.success() {
anyhow::bail!("systemctl --user restart podman.socket exited {status}");
}
for _ in 0..20 {
if tokio::fs::try_exists(socket_path).await.unwrap_or(false) {
return Ok(());
}
tokio::time::sleep(Duration::from_millis(250)).await;
}
anyhow::bail!("podman socket {socket_path} did not appear after restart")
}
fn required_host_port(package_id: &str) -> Option<u16> {
match package_id {
"grafana" => Some(3000),
@ -2118,17 +2232,21 @@ fn required_host_port(package_id: &str) -> Option<u16> {
"gitea" => Some(3001),
"nextcloud" => Some(8085),
"nginx-proxy-manager" => Some(8081),
"portainer" => Some(9000),
_ => None,
}
}
async fn wait_for_host_port(port: u16, timeout_secs: u64) -> bool {
async fn wait_for_host_port(package_id: &str, port: u16, timeout_secs: u64) -> bool {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
loop {
if tokio::net::TcpStream::connect(("127.0.0.1", port))
let ready = match package_id {
"uptime-kuma" => http_host_port_ready(port, "/").await,
_ => tokio::net::TcpStream::connect(("127.0.0.1", port))
.await
.is_ok()
{
.is_ok(),
};
if ready {
return true;
}
@ -2140,6 +2258,36 @@ async fn wait_for_host_port(port: u16, timeout_secs: u64) -> bool {
}
}
async fn http_host_port_ready(port: u16, path: &str) -> bool {
let Ok(Ok(mut stream)) = tokio::time::timeout(
Duration::from_secs(3),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await
else {
return false;
};
let request = format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
if stream.write_all(request.as_bytes()).await.is_err() {
return false;
}
let mut buf = [0u8; 128];
let Ok(Ok(n)) = tokio::time::timeout(Duration::from_secs(3), stream.read(&mut buf)).await
else {
return false;
};
if n == 0 {
return false;
}
let head = String::from_utf8_lossy(&buf[..n]);
head.starts_with("HTTP/1.1 2")
|| head.starts_with("HTTP/1.1 3")
|| head.starts_with("HTTP/1.0 2")
|| head.starts_with("HTTP/1.0 3")
}
/// Resolve the host gateway IP for --add-host flag.
/// Resolve the default gateway IP from the routing table for --add-host flag.
/// Explicit IP avoids issues with "host-gateway" in rootless Podman.
@ -2235,6 +2383,18 @@ set -eu
conf=/var/lib/archipelago/bitcoin/bitcoin.conf
[ -f "$conf" ] || exit 0
changed=0
tmp=$(mktemp)
awk -F= '
/^(server|txindex|rpcbind|rpcallowip|rpcport|listen|bind|dbcache|rpcthreads|rpcworkqueue)=/ {
if (seen[$1]++) next
}
{ print }
' "$conf" > "$tmp"
if ! cmp -s "$conf" "$tmp"; then
cat "$tmp" > "$conf"
changed=1
fi
rm -f "$tmp"
ensure_line() {
line="$1"
key="${line%%=*}"
@ -2246,6 +2406,8 @@ ensure_line() {
ensure_line server=1
ensure_line rpcallowip=0.0.0.0/0
ensure_line listen=1
ensure_line rpcthreads=16
ensure_line rpcworkqueue=256
[ "$changed" -eq 0 ] && exit 0
exit 2
"#;
@ -2272,6 +2434,7 @@ fn should_try_orchestrator_install(package_id: &str, orchestrator_available: boo
fn orchestrator_install_app_id(package_id: &str) -> &str {
match package_id {
"electrs" | "mempool-electrs" => "electrumx",
"home-assistant" => "homeassistant",
_ => package_id,
}
}
@ -2299,6 +2462,16 @@ fn uses_orchestrator_install_flow(package_id: &str) -> bool {
| "archy-btcpay-db"
| "archy-nbxplorer"
| "btcpay-server"
| "homeassistant"
| "home-assistant"
| "nextcloud"
| "vaultwarden"
| "jellyfin"
| "photoprism"
| "uptime-kuma"
| "gitea"
| "portainer"
| "meshtastic"
)
}
@ -2336,6 +2509,16 @@ mod tests {
"archy-btcpay-db",
"archy-nbxplorer",
"btcpay-server",
"homeassistant",
"home-assistant",
"nextcloud",
"vaultwarden",
"jellyfin",
"photoprism",
"uptime-kuma",
"gitea",
"portainer",
"meshtastic",
] {
assert!(uses_orchestrator_install_flow(app));
assert!(should_try_orchestrator_install(app, true));
@ -2364,6 +2547,10 @@ mod tests {
assert_eq!(orchestrator_install_app_id("bitcoin-core"), "bitcoin-core");
assert_eq!(orchestrator_install_app_id("electrs"), "electrumx");
assert_eq!(orchestrator_install_app_id("mempool-electrs"), "electrumx");
assert_eq!(
orchestrator_install_app_id("home-assistant"),
"homeassistant"
);
assert_eq!(orchestrator_install_app_id("lnd"), "lnd");
}

View File

@ -2,15 +2,18 @@ use super::config::{
get_app_capabilities, get_containers_for_app, get_data_dirs_for_app, get_health_check_args,
get_memory_limit, is_valid_docker_image,
};
use super::dependencies::ordered_containers_for_start;
use super::dependencies::{ordered_containers_for_start, startup_order};
use super::install::install_log;
use super::validation::validate_app_id;
use crate::api::rpc::RpcHandler;
use crate::data_model::PackageState;
use anyhow::{Context, Result};
use archipelago_container::AppManifest;
use std::path::Path;
use std::process::Output;
use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tracing::warn;
const PODMAN_CONTROL_TIMEOUT: Duration = Duration::from_secs(30);
@ -53,7 +56,11 @@ impl RpcHandler {
.ok_or_else(|| anyhow::anyhow!("Missing package id"))?;
validate_app_id(package_id)?;
let to_start = ordered_containers_for_start(package_id).await?;
let to_start = if self.orchestrator.is_some() && uses_single_orchestrator_app(package_id) {
vec![orchestrator_app_id(package_id).to_string()]
} else {
ordered_containers_for_start(package_id).await?
};
if to_start.is_empty() {
tracing::warn!("package.start {}: no containers found", package_id);
return Err(anyhow::anyhow!("No containers found for {}", package_id));
@ -124,7 +131,16 @@ impl RpcHandler {
.ok_or_else(|| anyhow::anyhow!("Missing package id"))?;
validate_app_id(package_id)?;
let containers = get_containers_for_app(package_id).await?;
let single_orchestrator_app =
self.orchestrator.is_some() && uses_single_orchestrator_app(package_id);
let mut containers = if single_orchestrator_app {
vec![orchestrator_app_id(package_id).to_string()]
} else {
get_containers_for_app(package_id).await?
};
if !single_orchestrator_app {
containers.reverse();
}
if containers.is_empty() {
tracing::warn!("package.stop {}: no containers found", package_id);
return Err(anyhow::anyhow!("No containers found for {}", package_id));
@ -190,7 +206,13 @@ impl RpcHandler {
.ok_or_else(|| anyhow::anyhow!("Missing package id"))?;
validate_app_id(package_id)?;
let containers = get_containers_for_app(package_id).await?;
let single_orchestrator_app =
self.orchestrator.is_some() && uses_single_orchestrator_app(package_id);
let containers = if single_orchestrator_app {
vec![orchestrator_app_id(package_id).to_string()]
} else {
get_containers_for_app(package_id).await?
};
if containers.is_empty() {
tracing::warn!("package.restart {}: no containers found", package_id);
return Err(anyhow::anyhow!("No containers found for {}", package_id));
@ -206,7 +228,11 @@ impl RpcHandler {
let package_id_owned = package_id.to_string();
let companion_app_id = package_id_owned.clone();
let to_restart = ordered_containers_for_start(package_id).await?;
let to_restart = if single_orchestrator_app {
vec![orchestrator_app_id(package_id).to_string()]
} else {
ordered_containers_for_start(package_id).await?
};
let state_manager = Arc::clone(&self.state_manager);
let orchestrator = self.orchestrator.clone();
let pre_state =
@ -323,7 +349,9 @@ impl RpcHandler {
match rm_out {
Ok(o) if o.status.success() => removed += 1,
Ok(o) => {
// If normal rm fails (e.g., still running), force as fallback
// If normal rm fails (e.g., still running/stopping/removing),
// force with targeted cleanup fallbacks. This is deliberately
// container-scoped; never prune the store during uninstall.
let stderr = String::from_utf8_lossy(&o.stderr);
tracing::warn!(
"Uninstall {}: rm {} failed ({}), trying force",
@ -331,28 +359,36 @@ impl RpcHandler {
name,
stderr.trim()
);
let force_rm = podman_control(&["rm", "-f", name]).await;
match force_rm {
Ok(o2) if o2.status.success() => removed += 1,
_ => {
let msg = format!("Failed to remove {}: {}", name, stderr.trim());
tracing::error!("Uninstall {}: {}", package_id, msg);
errors.push(msg);
}
}
}
match force_remove_runtime_container(name).await {
Ok(()) => removed += 1,
Err(e) => {
let msg = format!("Failed to remove {}: {}", name, e);
let msg =
format!("Failed to remove {}: {}; {}", name, stderr.trim(), e);
tracing::error!("Uninstall {}: {}", package_id, msg);
errors.push(msg);
}
}
}
Err(e) => match force_remove_runtime_container(name).await {
Ok(()) => removed += 1,
Err(force_err) => {
let msg = format!("Failed to remove {}: {}; {}", name, e, force_err);
tracing::error!("Uninstall {}: {}", package_id, msg);
errors.push(msg);
}
},
}
}
self.set_uninstall_stage(package_id, "Cleaning up volumes")
.await;
// Clean up dangling volumes associated with removed containers
let _ = podman_control(&["volume", "prune", "-f"]).await;
// Avoid global Podman volume prune on production nodes: store-wide
// Podman cleanup commands can block app health under load. App data is
// removed explicitly below when preserve_data=false.
tracing::info!(
package_id = %package_id,
"Skipping global podman volume prune during uninstall"
);
// Clean up app-specific networks (only if no other containers use them)
let app_networks: Vec<&str> = match package_id {
@ -600,9 +636,25 @@ async fn do_package_start(to_start: &[String]) -> Result<()> {
if i > 0 {
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
if let Err(e) = ensure_startable_container_state(name).await {
tracing::error!(container = %name, error = %e, "container is not startable");
errors.push(format!("{}: {}", name, e));
continue;
}
match inspect_runtime_container_state(name).await {
Ok(Some(state)) if state == "running" => {
tracing::debug!(container = %name, "container already running during package start");
continue;
}
Ok(_) => {}
Err(e) => {
tracing::warn!(container = %name, error = %e, "failed to re-inspect before package start")
}
}
repair_before_package_start(name).await;
wait_before_package_start(name).await;
tracing::info!("Starting container: {}", name);
let out = podman_control(&["start", name])
let out = podman_start_container(name)
.await
.context(format!("Failed to exec podman start {}", name))?;
if !out.status.success() {
@ -669,6 +721,7 @@ async fn do_orchestrator_package_start(
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
repair_before_package_start(name).await;
wait_before_package_start(name).await;
match orchestrator.start(name).await {
Ok(()) => wait_after_orchestrator_start(name).await,
Err(e) if is_unknown_app_id_error(&e) => {
@ -681,10 +734,13 @@ async fn do_orchestrator_package_start(
}
}
}
if errors.is_empty() {
Ok(())
} else {
if !errors.is_empty() {
Err(anyhow::anyhow!("Start failed: {}", errors.join("; ")))
} else {
for name in to_start {
ensure_runtime_host_port_listener(name).await?;
}
Ok(())
}
}
@ -703,6 +759,137 @@ async fn podman_control(args: &[&str]) -> Result<Output> {
podman_with_timeout(args, podman_control_timeout(args)).await
}
async fn force_remove_runtime_container(container_name: &str) -> Result<()> {
for args in [
vec!["rm", "-f", container_name],
vec!["rm", "-f", "--time", "0", container_name],
] {
let output = podman_control(&args).await?;
if output.status.success()
|| is_missing_container_error(&String::from_utf8_lossy(&output.stderr))
{
return Ok(());
}
}
let _ = podman_control(&["container", "cleanup", container_name]).await;
let output = podman_control(&["rm", "-f", container_name]).await?;
if output.status.success()
|| is_missing_container_error(&String::from_utf8_lossy(&output.stderr))
{
return Ok(());
}
Err(anyhow::anyhow!(
"force remove failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
))
}
async fn force_stop_runtime_container(container_name: &str) -> Result<()> {
for args in [
vec!["stop", "-t", "0", container_name],
vec!["kill", container_name],
] {
let output = podman_control(&args).await?;
if output.status.success()
|| is_missing_container_error(&String::from_utf8_lossy(&output.stderr))
{
return Ok(());
}
}
for _ in 0..15 {
match inspect_runtime_container_state(container_name).await? {
None => return Ok(()),
Some(state) if matches!(state.as_str(), "exited" | "stopped" | "configured") => {
return Ok(())
}
Some(_) => tokio::time::sleep(Duration::from_secs(2)).await,
}
}
Err(anyhow::anyhow!(
"container did not reach stopped state after force stop"
))
}
async fn ensure_startable_container_state(container_name: &str) -> Result<()> {
let Some(state) = inspect_runtime_container_state(container_name).await? else {
return Ok(());
};
match state.as_str() {
"configured" | "created" | "exited" | "stopped" | "running" | "paused" => Ok(()),
"removing" => {
wait_for_container_absent_or_startable(container_name, Duration::from_secs(60)).await
}
other => Err(anyhow::anyhow!(
"container is in unsupported state before start: {}",
other
)),
}
}
async fn wait_for_container_absent_or_startable(
container_name: &str,
timeout: Duration,
) -> Result<()> {
let deadline = std::time::Instant::now() + timeout;
loop {
match inspect_runtime_container_state(container_name).await? {
None => return Ok(()),
Some(state)
if matches!(
state.as_str(),
"configured" | "created" | "exited" | "stopped" | "running" | "paused"
) =>
{
return Ok(())
}
Some(state) if state == "removing" && std::time::Instant::now() < deadline => {
tokio::time::sleep(Duration::from_secs(2)).await;
}
Some(state) if state == "removing" => {
force_remove_runtime_container(container_name).await?;
return Ok(());
}
Some(state) => {
return Err(anyhow::anyhow!(
"container is in unsupported state before start: {}",
state
))
}
}
}
}
async fn inspect_runtime_container_state(container_name: &str) -> Result<Option<String>> {
let output = podman_with_timeout(
&["inspect", container_name, "--format", "{{.State.Status}}"],
Duration::from_secs(10),
)
.await?;
if output.status.success() {
return Ok(Some(
String::from_utf8_lossy(&output.stdout).trim().to_string(),
));
}
let stderr = String::from_utf8_lossy(&output.stderr);
if is_missing_container_error(&stderr) {
Ok(None)
} else {
Err(anyhow::anyhow!("inspect failed: {}", stderr.trim()))
}
}
fn is_missing_container_error(stderr: &str) -> bool {
stderr.contains("no such container")
|| stderr.contains("no container with name")
|| stderr.contains("does not exist")
|| stderr.contains("not found")
}
fn podman_control_timeout(args: &[&str]) -> Duration {
args.windows(2)
.find_map(|pair| {
@ -714,6 +901,13 @@ fn podman_control_timeout(args: &[&str]) -> Duration {
.unwrap_or(PODMAN_CONTROL_TIMEOUT)
}
fn podman_start_timeout(container_name: &str) -> Duration {
match container_name {
"immich_server" | "netbird-server" => Duration::from_secs(120),
_ => PODMAN_CONTROL_TIMEOUT,
}
}
async fn podman_with_timeout(args: &[&str], timeout: Duration) -> Result<Output> {
let mut cmd = tokio::process::Command::new("podman");
cmd.args(args);
@ -732,12 +926,48 @@ async fn command_with_timeout(
.with_context(|| format!("Failed to exec {}", description))
}
async fn podman_start_container(container_name: &str) -> Result<Output> {
if !runtime_host_ports(container_name).is_empty() {
let mut cmd = tokio::process::Command::new("systemd-run");
cmd.args([
"--user",
"--scope",
"--quiet",
"--collect",
"podman",
"start",
])
.arg(container_name);
let scoped = command_with_timeout(
cmd,
podman_start_timeout(container_name),
&format!("systemd-run --user --scope podman start {container_name}"),
)
.await;
if scoped.as_ref().is_ok_and(|out| out.status.success()) {
return scoped;
}
if let Err(err) = &scoped {
tracing::warn!(
container = %container_name,
error = %err,
"scoped podman start failed; falling back to direct podman start"
);
}
}
podman_with_timeout(
&["start", container_name],
podman_start_timeout(container_name),
)
.await
}
async fn do_orchestrator_package_stop(
orchestrator: &dyn crate::container::traits::ContainerOrchestrator,
containers: &[String],
) -> Result<()> {
let mut errors = Vec::new();
for name in containers.iter().rev() {
for name in containers {
match orchestrator.stop(name).await {
Ok(()) => {}
Err(e) if is_unknown_app_id_error(&e) => {
@ -758,6 +988,44 @@ async fn do_orchestrator_package_stop(
}
}
fn orchestrator_app_id(package_id: &str) -> &str {
match package_id {
"electrs" | "mempool-electrs" => "electrumx",
"home-assistant" => "homeassistant",
_ => package_id,
}
}
fn uses_single_orchestrator_app(package_id: &str) -> bool {
startup_order(package_id).is_empty()
&& matches!(
package_id,
"bitcoin-ui"
| "electrs-ui"
| "lnd-ui"
| "bitcoin-core"
| "bitcoin-knots"
| "lnd"
| "fedimint"
| "fedimint-gateway"
| "filebrowser"
| "electrumx"
| "electrs"
| "mempool-electrs"
| "homeassistant"
| "home-assistant"
| "nextcloud"
| "vaultwarden"
| "jellyfin"
| "photoprism"
| "uptime-kuma"
| "gitea"
| "portainer"
| "meshtastic"
| "botfights"
)
}
async fn do_orchestrator_package_restart(
orchestrator: &dyn crate::container::traits::ContainerOrchestrator,
to_restart: &[String],
@ -770,22 +1038,72 @@ async fn do_orchestrator_package_restart(
async fn do_package_stop(containers: &[String]) -> Result<()> {
let mut errors = Vec::new();
for name in containers {
match inspect_runtime_container_state(name).await {
Ok(None) => {
tracing::debug!(container = %name, "container already absent during stop");
continue;
}
Ok(Some(state)) if matches!(state.as_str(), "exited" | "stopped" | "configured") => {
tracing::debug!(container = %name, state = %state, "container already stopped");
continue;
}
Ok(Some(_)) => {}
Err(e) => {
tracing::warn!(container = %name, error = %e, "failed to inspect before stop")
}
}
tracing::info!(
"Stopping container: {} (timeout: {}s)",
name,
stop_timeout_secs(name)
);
let out = podman_control(&["stop", "-t", stop_timeout_secs(name), name])
.await
.context(format!("Failed to exec podman stop {}", name))?;
let out = match podman_control(&["stop", "-t", stop_timeout_secs(name), name]).await {
Ok(out) => out,
Err(e) => {
tracing::warn!(
container = %name,
error = %e,
"podman stop errored, trying force stop"
);
match force_stop_runtime_container(name).await {
Ok(()) => {
tracing::info!(container = %name, "force stop after stop error succeeded");
continue;
}
Err(force_err) => {
tracing::error!(
"Failed to stop {}: {}; force stop failed: {}",
name,
e,
force_err
);
errors.push(format!("{}: {}; force stop failed: {}", name, e, force_err));
continue;
}
}
}
};
if !out.status.success() {
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
if is_missing_companion_ok(name, &stderr) {
tracing::debug!(container = %name, "companion already absent during stop");
continue;
}
tracing::error!("Failed to stop {}: {}", name, stderr);
errors.push(format!("{}: {}", name, stderr));
tracing::warn!("Failed to stop {}: {}, trying force stop", name, stderr);
match force_stop_runtime_container(name).await {
Ok(()) => {
tracing::info!(container = %name, "force stop after stop failure succeeded")
}
Err(e) => {
tracing::error!(
"Failed to stop {}: {}; force stop failed: {}",
name,
stderr,
e
);
errors.push(format!("{}: {}; force stop failed: {}", name, stderr, e));
}
}
}
}
if !errors.is_empty() {
@ -801,6 +1119,7 @@ async fn do_package_restart(containers: &[String]) -> Result<()> {
for name in containers {
tracing::info!("Restarting container: {}", name);
repair_before_package_start(name).await;
wait_before_package_start(name).await;
let out = podman_control(&["restart", "-t", stop_timeout_secs(name), name])
.await
.context(format!("Failed to exec podman restart {}", name))?;
@ -818,7 +1137,8 @@ async fn do_package_restart(containers: &[String]) -> Result<()> {
);
// Fallback: stop then start
let _ = podman_control(&["stop", "-t", stop_timeout_secs(name), name]).await;
let start_out = podman_control(&["start", name])
wait_before_package_start(name).await;
let start_out = podman_start_container(name)
.await
.context(format!("Failed to exec podman start {}", name))?;
if !start_out.status.success() {
@ -855,22 +1175,158 @@ fn is_unknown_app_id_error(err: &anyhow::Error) -> bool {
async fn repair_before_package_start(container_name: &str) {
match container_name {
"btcpay-server" | "archy-nbxplorer" => repair_btcpay_dirs().await,
"indeedhub-postgres" | "indeedhub-redis" | "indeedhub-minio" | "indeedhub-relay"
| "indeedhub-api" | "indeedhub-ffmpeg" | "indeedhub" => repair_indeedhub_network().await,
"indeedhub" => repair_indeedhub_network().await,
"immich_server" => repair_immich_dirs().await,
"netbird" => repair_netbird_network().await,
"grafana" => {
repair_grafana_dirs().await;
cleanup_stale_pasta_port("3000").await;
}
"vaultwarden" => cleanup_stale_pasta_port("8082").await,
"homeassistant" | "home-assistant" => cleanup_stale_pasta_port("8123").await,
"nextcloud" => {
repair_nextcloud_dirs().await;
cleanup_stale_pasta_port("8085").await;
}
"nginx-proxy-manager" => repair_nginx_proxy_manager_container().await,
"gitea" => cleanup_gitea_stale_ports().await,
_ => {}
}
cleanup_runtime_host_ports(container_name).await;
}
async fn wait_before_package_start(container_name: &str) {
match container_name {
"indeedhub" => wait_for_indeedhub_dependency_dns().await,
"immich_server" => wait_for_immich_dependencies().await,
"netbird" => wait_for_netbird_dependency_dns().await,
_ => {}
}
}
async fn wait_for_indeedhub_dependency_dns() {
for _ in 0..30 {
if indeedhub_frontend_dependencies_running().await {
super::stacks::repair_indeedhub_network_aliases().await;
break;
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
for _ in 0..30 {
let ready = podman_with_timeout(
&["exec", "indeedhub-minio", "getent", "hosts", "minio"],
Duration::from_secs(5),
)
.await
.map(|out| out.status.success())
.unwrap_or(false);
if ready {
return;
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
}
async fn indeedhub_frontend_dependencies_running() -> bool {
for container in ["indeedhub-minio", "indeedhub-redis", "indeedhub-api"] {
if !container_is_running(container).await {
return false;
}
}
true
}
async fn container_is_running(container: &str) -> bool {
let Ok(output) = podman_with_timeout(
&["inspect", container, "--format", "{{.State.Status}}"],
Duration::from_secs(5),
)
.await
else {
return false;
};
output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "running"
}
async fn wait_for_netbird_dependency_dns() {
for _ in 0..30 {
if container_is_running("netbird-server").await
&& container_is_running("netbird-dashboard").await
{
super::stacks::repair_netbird_network_aliases().await;
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
return;
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
}
async fn wait_for_immich_dependencies() {
for _ in 0..60 {
if immich_postgres_ready().await && immich_redis_ready().await {
return;
}
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
}
async fn immich_postgres_ready() -> bool {
if container_health_is_healthy("immich_postgres").await {
return true;
}
let Ok(output) = podman_with_timeout(
&[
"exec",
"immich_postgres",
"pg_isready",
"-U",
"postgres",
"-d",
"immich",
],
Duration::from_secs(5),
)
.await
else {
return false;
};
output.status.success()
}
async fn immich_redis_ready() -> bool {
if container_health_is_healthy("immich_redis").await {
return true;
}
let Ok(output) = podman_with_timeout(
&["exec", "immich_redis", "valkey-cli", "ping"],
Duration::from_secs(5),
)
.await
else {
return false;
};
output.status.success() && String::from_utf8_lossy(&output.stdout).contains("PONG")
}
async fn container_health_is_healthy(container: &str) -> bool {
let Ok(output) = podman_with_timeout(
&[
"inspect",
container,
"--format",
"{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}",
],
Duration::from_secs(5),
)
.await
else {
return false;
};
output.status.success() && String::from_utf8_lossy(&output.stdout).trim() == "healthy"
}
async fn repair_netbird_network() {
super::stacks::repair_netbird_network_aliases().await;
}
async fn repair_nginx_proxy_manager_container() {
@ -1009,11 +1465,11 @@ async fn recreate_nginx_proxy_manager_container() -> Result<()> {
}
async fn ensure_runtime_host_port_listener(container_name: &str) -> Result<()> {
let Some(port) = runtime_required_host_port(container_name) else {
let Some(port) = runtime_host_ports(container_name).into_iter().next() else {
return Ok(());
};
if wait_for_runtime_host_port(port, 10).await {
if wait_for_runtime_host_port(container_name, port, 10).await {
return Ok(());
}
@ -1035,7 +1491,7 @@ async fn ensure_runtime_host_port_listener(container_name: &str) -> Result<()> {
));
}
if wait_for_runtime_host_port(port, 60).await {
if wait_for_runtime_host_port(container_name, port, 60).await {
install_log(&format!(
"START REPAIR OK: {} — host port {} is listening after restart",
container_name, port
@ -1051,27 +1507,99 @@ async fn ensure_runtime_host_port_listener(container_name: &str) -> Result<()> {
))
}
fn runtime_required_host_port(container_name: &str) -> Option<u16> {
match container_name {
"grafana" => Some(3000),
"homeassistant" | "home-assistant" => Some(8123),
"searxng" => Some(8888),
"uptime-kuma" => Some(3002),
"vaultwarden" => Some(8082),
"gitea" => Some(3001),
"nextcloud" => Some(8085),
"nginx-proxy-manager" => Some(8081),
_ => None,
fn runtime_host_ports(container_name: &str) -> Vec<u16> {
let manifest_ports = manifest_host_ports(container_name);
if !manifest_ports.is_empty() {
return with_legacy_extra_ports(container_name, manifest_ports);
}
let ports = match container_name {
"grafana" => vec![3000],
"homeassistant" | "home-assistant" => vec![8123],
"jellyfin" => vec![8096],
"searxng" => vec![8888],
"uptime-kuma" => vec![3002],
"vaultwarden" => vec![8082],
"gitea" => vec![3001, 2222, 3000],
"nextcloud" => vec![8085],
"nginx-proxy-manager" => vec![8081, 8084, 8444],
_ => Vec::new(),
};
ports
}
fn with_legacy_extra_ports(container_name: &str, mut ports: Vec<u16>) -> Vec<u16> {
if container_name == "gitea" && !ports.contains(&3000) {
ports.push(3000);
}
if container_name == "nginx-proxy-manager" {
for port in [8084, 8444] {
if !ports.contains(&port) {
ports.push(port);
}
}
}
ports
}
fn manifest_host_ports(container_name: &str) -> Vec<u16> {
for apps_dir in manifest_apps_dirs() {
let Ok(entries) = std::fs::read_dir(apps_dir) else {
continue;
};
for entry in entries.flatten() {
let path = entry.path().join("manifest.yml");
let Ok(contents) = std::fs::read_to_string(&path) else {
continue;
};
let Ok(manifest) = AppManifest::parse(&contents) else {
continue;
};
if manifest_container_name(&manifest) == container_name {
return manifest.app.ports.iter().map(|p| p.host).collect();
}
}
}
Vec::new()
}
fn manifest_apps_dirs() -> Vec<std::path::PathBuf> {
let mut dirs = Vec::new();
if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") {
dirs.push(Path::new(&manifest_dir).join("../../apps"));
}
dirs.extend([
Path::new("apps").to_path_buf(),
Path::new("/opt/archipelago/apps").to_path_buf(),
Path::new("/opt/archipelago/web-ui/archipelago-runtime/apps").to_path_buf(),
]);
dirs
}
fn manifest_container_name(manifest: &AppManifest) -> String {
if let Some(v) = manifest.app.extensions.get("container_name") {
if let Some(s) = v.as_str() {
if !s.is_empty() {
return s.to_string();
}
}
}
match manifest.app.id.as_str() {
"bitcoin-ui" | "electrs-ui" | "lnd-ui" => format!("archy-{}", manifest.app.id),
id => id.to_string(),
}
}
async fn wait_for_runtime_host_port(port: u16, timeout_secs: u64) -> bool {
async fn wait_for_runtime_host_port(container_name: &str, port: u16, timeout_secs: u64) -> bool {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs);
loop {
if tokio::net::TcpStream::connect(("127.0.0.1", port))
let ready = match container_name {
"uptime-kuma" => http_host_port_ready(port, "/").await,
_ => tokio::net::TcpStream::connect(("127.0.0.1", port))
.await
.is_ok()
{
.is_ok(),
};
if ready {
return true;
}
@ -1083,6 +1611,37 @@ async fn wait_for_runtime_host_port(port: u16, timeout_secs: u64) -> bool {
}
}
async fn http_host_port_ready(port: u16, path: &str) -> bool {
let Ok(Ok(mut stream)) = tokio::time::timeout(
std::time::Duration::from_secs(3),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await
else {
return false;
};
let request = format!("GET {path} HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n");
if stream.write_all(request.as_bytes()).await.is_err() {
return false;
}
let mut buf = [0u8; 128];
let Ok(Ok(n)) =
tokio::time::timeout(std::time::Duration::from_secs(3), stream.read(&mut buf)).await
else {
return false;
};
if n == 0 {
return false;
}
let head = String::from_utf8_lossy(&buf[..n]);
head.starts_with("HTTP/1.1 2")
|| head.starts_with("HTTP/1.1 3")
|| head.starts_with("HTTP/1.0 2")
|| head.starts_with("HTTP/1.0 3")
}
async fn repair_btcpay_dirs() {
let _ = tokio::process::Command::new("sudo")
.args([
@ -1157,6 +1716,27 @@ async fn repair_nextcloud_dirs() {
}
}
async fn repair_immich_dirs() {
let _ = tokio::process::Command::new("sudo")
.args(["mkdir", "-p", "/var/lib/archipelago/immich"])
.output()
.await;
let podman_chown = podman_control(&[
"unshare",
"chown",
"-R",
"0:0",
"/var/lib/archipelago/immich",
])
.await;
if !podman_chown.as_ref().is_ok_and(|o| o.status.success()) {
let _ = tokio::process::Command::new("sudo")
.args(["chown", "-R", "1000:1000", "/var/lib/archipelago/immich"])
.output()
.await;
}
}
async fn repair_btcpay_database_password() {
let Ok(db_pass) =
tokio::fs::read_to_string("/var/lib/archipelago/secrets/btcpay-db-password").await
@ -1205,25 +1785,28 @@ async fn cleanup_start_conflict(container_name: &str, stderr: &str) {
return;
}
if container_name == "gitea" {
cleanup_gitea_stale_ports().await;
let ports = runtime_host_ports(container_name);
if !ports.is_empty() {
cleanup_ports(&ports).await;
return;
}
}
match container_name {
"grafana" => cleanup_stale_pasta_port("3000").await,
"homeassistant" | "home-assistant" => cleanup_stale_pasta_port("8123").await,
"vaultwarden" => cleanup_stale_pasta_port("8082").await,
"nextcloud" => cleanup_stale_pasta_port("8085").await,
"nginx-proxy-manager" => cleanup_nginx_proxy_manager_ports().await,
_ => {}
async fn cleanup_runtime_host_ports(container_name: &str) {
let ports = runtime_host_ports(container_name);
if !ports.is_empty() {
cleanup_ports(&ports).await;
}
}
async fn cleanup_nginx_proxy_manager_ports() {
cleanup_stale_pasta_port("8081").await;
cleanup_stale_pasta_port("8084").await;
cleanup_stale_pasta_port("8444").await;
cleanup_ports(&[8081, 8084, 8444]).await;
}
async fn cleanup_ports(ports: &[u16]) {
for port in ports {
cleanup_stale_pasta_port(&port.to_string()).await;
}
}
async fn cleanup_stale_pasta_port(port: &str) {
@ -1249,31 +1832,6 @@ async fn cleanup_stale_pasta_port(port: &str) {
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
async fn cleanup_gitea_stale_ports() {
for port in ["3001", "2222", "3000"] {
let kill_listener = format!(
"ss -ltnp 'sport = :{}' 2>/dev/null | sed -n 's/.*pid=\\([0-9]*\\).*/\\1/p' | xargs -r kill 2>/dev/null || true",
port
);
let _ = tokio::process::Command::new("sh")
.args(["-c", &kill_listener])
.output()
.await;
let pattern = format!("pasta.*{}", port);
let _ = tokio::process::Command::new("pkill")
.args(["-f", &pattern])
.output()
.await;
let pattern = format!("rootlessport.*{}", port);
let _ = tokio::process::Command::new("pkill")
.args(["-f", &pattern])
.output()
.await;
}
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
pub(super) fn is_missing_companion_ok(name: &str, stderr: &str) -> bool {
matches!(
name,
@ -1352,3 +1910,20 @@ pub(super) fn orchestrator_uninstall_app_ids(package_id: &str) -> Vec<String> {
_ => vec![package_id.to_string()],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn runtime_host_ports_are_manifest_derived_for_public_apps() {
assert_eq!(runtime_host_ports("photoprism"), vec![2342]);
assert_eq!(runtime_host_ports("jellyfin"), vec![8096]);
assert_eq!(runtime_host_ports("uptime-kuma"), vec![3002]);
}
#[test]
fn runtime_host_ports_preserve_legacy_extra_ports() {
assert_eq!(runtime_host_ports("gitea"), vec![3001, 2222, 3000]);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,8 @@ use anyhow::{Context, Result};
use tokio::io::{AsyncBufReadExt, BufReader};
use tracing::{error, info, warn};
const PODMAN_UPDATE_PULL_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(600);
impl RpcHandler {
/// Update a package to the version pinned in image-versions.sh.
/// This is a manual operation — the user clicks "Update" in the UI.
@ -327,6 +329,7 @@ impl RpcHandler {
if archipelago_container::image_uses_insecure_registry(image) {
cmd.arg("--tls-verify=false");
}
cmd.kill_on_drop(true);
let mut child = cmd
.arg(image)
.stdout(std::process::Stdio::piped())
@ -334,23 +337,38 @@ impl RpcHandler {
.spawn()
.context("Failed to start image pull")?;
if let Some(stderr) = child.stderr.take() {
let progress_task = if let Some(stderr) = child.stderr.take() {
let reader = BufReader::new(stderr);
let mut lines = reader.lines();
let pkg_id = package_id.to_string();
let state_mgr = self.state_manager.clone();
Some(tokio::spawn(async move {
while let Ok(Some(line)) = lines.next_line().await {
if let Some((downloaded, total)) = parse_pull_progress(&line) {
Self::update_install_progress(&state_mgr, &pkg_id, downloaded, total).await;
}
}
}
}))
} else {
None
};
let status = child
.wait()
.await
.context("Failed to wait for image pull")?;
let status = match tokio::time::timeout(PODMAN_UPDATE_PULL_TIMEOUT, child.wait()).await {
Ok(result) => result.context("Failed to wait for image pull")?,
Err(_) => {
let _ = child.kill().await;
return Err(anyhow::anyhow!(
"podman pull {} timed out after {}s",
image,
PODMAN_UPDATE_PULL_TIMEOUT.as_secs()
));
}
};
if let Some(task) = progress_task {
let _ = task.await;
}
if !status.success() {
return Err(anyhow::anyhow!("podman pull {} failed", image));
}
@ -430,7 +448,6 @@ fn should_try_orchestrator_update(package_id: &str, orchestrator_available: bool
fn orchestrator_update_app_id(package_id: &str) -> &str {
match package_id {
"bitcoin-knots" => "bitcoin-core",
"electrs" | "mempool-electrs" => "electrumx",
_ => package_id,
}
@ -459,8 +476,8 @@ fn candidate_app_ids_for_container(container_name: &str) -> Vec<String> {
match container_name {
"bitcoin-knots" | "bitcoin-core" => {
push("bitcoin-core");
push("bitcoin-knots");
push("bitcoin-core");
}
"archy-bitcoin-ui" => push("bitcoin-ui"),
"archy-lnd-ui" => push("lnd-ui"),
@ -525,7 +542,7 @@ mod tests {
fn container_name_candidates_cover_common_aliases() {
assert_eq!(
candidate_app_ids_for_container("bitcoin-knots"),
vec!["bitcoin-core", "bitcoin-knots"]
vec!["bitcoin-knots", "bitcoin-core"]
);
assert_eq!(
candidate_app_ids_for_container("archy-bitcoin-ui"),
@ -543,7 +560,8 @@ mod tests {
#[test]
fn update_aliases_map_to_manifest_app_ids() {
assert_eq!(orchestrator_update_app_id("bitcoin-knots"), "bitcoin-core");
assert_eq!(orchestrator_update_app_id("bitcoin-knots"), "bitcoin-knots");
assert_eq!(orchestrator_update_app_id("bitcoin-core"), "bitcoin-core");
assert_eq!(orchestrator_update_app_id("electrs"), "electrumx");
assert_eq!(orchestrator_update_app_id("mempool-electrs"), "electrumx");
assert_eq!(orchestrator_update_app_id("fedimint"), "fedimint");

View File

@ -1,7 +1,7 @@
use super::*;
use crate::api::rpc::RpcHandler;
use anyhow::{Context, Result};
use tracing::{debug, info};
use tracing::{debug, info, warn};
impl RpcHandler {
/// server.set-name — Rename the server (persisted to data_dir/server-name)
@ -32,6 +32,21 @@ impl RpcHandler {
data.server_info.name = Some(name.clone());
self.state_manager.update_data(data).await;
let hostname = hostname_from_server_name(&name);
let hostname_result = set_system_hostname(&hostname).await;
let (hostname_updated, hostname_error) = match hostname_result {
Ok(()) => (true, None),
Err(e) => {
warn!(
name = %name,
hostname = %hostname,
"Server name persisted but OS hostname update failed: {}",
e
);
(false, Some(e.to_string()))
}
};
info!("Server name updated to: {}", name);
// Push the new name to federation peers in background
@ -43,7 +58,12 @@ impl RpcHandler {
}
});
Ok(serde_json::json!({ "name": name }))
Ok(serde_json::json!({
"name": name,
"hostname": hostname,
"hostname_updated": hostname_updated,
"hostname_error": hostname_error,
}))
}
/// system.stats — CPU usage, RAM used/total, disk used/total, uptime, load average
@ -155,21 +175,7 @@ impl RpcHandler {
let mut freed_bytes: u64 = 0;
let mut actions: Vec<String> = Vec::new();
// 1. Prune dangling container images
match prune_container_images().await {
Ok(bytes) => {
if bytes > 0 {
freed_bytes += bytes;
actions.push(format!(
"Pruned dangling images: {} freed",
format_bytes(bytes)
));
}
}
Err(e) => actions.push(format!("Image prune failed: {}", e)),
}
// 2. Clean old log files (> 30 days)
// 1. Clean old log files (> 30 days)
match clean_old_logs(30).await {
Ok(bytes) => {
if bytes > 0 {
@ -180,7 +186,20 @@ impl RpcHandler {
Err(e) => actions.push(format!("Log cleanup failed: {}", e)),
}
// 3. Remove stale temp files
match vacuum_journal_logs("200M").await {
Ok(bytes) => {
if bytes > 0 {
freed_bytes += bytes;
actions.push(format!(
"Vacuumed journal logs: {} freed",
format_bytes(bytes)
));
}
}
Err(e) => actions.push(format!("Journal cleanup failed: {}", e)),
}
// 2. Remove stale temp files
match clean_temp_files().await {
Ok(bytes) => {
if bytes > 0 {
@ -191,17 +210,53 @@ impl RpcHandler {
Err(e) => actions.push(format!("Temp cleanup failed: {}", e)),
}
// 4. Prune container build cache
match prune_build_cache().await {
// 3. Keep only the most recent backend deploy backups. These are useful
// for rollback, but a long-lived alpha node can accumulate gigabytes of
// old binaries under /usr/local/bin.
match clean_backend_backups(3).await {
Ok(bytes) => {
if bytes > 0 {
freed_bytes += bytes;
actions.push(format!("Pruned build cache: {} freed", format_bytes(bytes)));
actions.push(format!(
"Removed old backend backups: {} freed",
format_bytes(bytes)
));
}
}
Err(e) => actions.push(format!("Build cache prune failed: {}", e)),
Err(e) => actions.push(format!("Backend backup cleanup failed: {}", e)),
}
match clean_legacy_backend_backups(3).await {
Ok(bytes) => {
if bytes > 0 {
freed_bytes += bytes;
actions.push(format!(
"Removed old legacy backend backups: {} freed",
format_bytes(bytes)
));
}
}
Err(e) => actions.push(format!("Legacy backend backup cleanup failed: {}", e)),
}
match clean_web_ui_backups(3).await {
Ok(bytes) => {
if bytes > 0 {
freed_bytes += bytes;
actions.push(format!(
"Removed old web UI backups: {} freed",
format_bytes(bytes)
));
}
}
Err(e) => actions.push(format!("Web UI backup cleanup failed: {}", e)),
}
actions.push(
"Skipped Podman image/volume prune: Podman store commands can block app health on busy nodes"
.to_string(),
);
tracing::info!(
"Disk cleanup complete: {} freed ({} actions)",
format_bytes(freed_bytes),
@ -216,6 +271,54 @@ impl RpcHandler {
}
}
pub(super) fn hostname_from_server_name(name: &str) -> String {
let mut hostname = String::with_capacity(name.len());
let mut previous_dash = false;
for c in name.trim().chars().flat_map(char::to_lowercase) {
let valid = c.is_ascii_lowercase() || c.is_ascii_digit();
if valid {
hostname.push(c);
previous_dash = false;
} else if !previous_dash {
hostname.push('-');
previous_dash = true;
}
if hostname.len() >= 63 {
break;
}
}
let hostname = hostname.trim_matches('-').to_string();
if hostname.is_empty() {
"archipelago".to_string()
} else {
hostname
}
}
async fn set_system_hostname(hostname: &str) -> Result<()> {
let output = tokio::process::Command::new("/usr/bin/sudo")
.args(["-n", "/usr/bin/hostnamectl", "set-hostname", hostname])
.output()
.await
.context("Failed to run hostnamectl")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
anyhow::bail!(
"{}",
if stderr.is_empty() {
"hostnamectl failed".to_string()
} else {
stderr
}
);
}
Ok(())
}
impl RpcHandler {
/// system.factory-reset — Wipe all user data, remove containers, and restart.
/// Only preserves the data_dir itself (recreated empty on restart).

View File

@ -1,6 +1,9 @@
mod handlers;
use crate::update::host_sudo;
use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use tracing::{debug, info};
/// Push the server name to all federation peers by syncing state.
@ -301,53 +304,12 @@ pub(super) async fn detect_usb_hardware_wallets() -> Result<Vec<serde_json::Valu
Ok(devices)
}
/// Prune dangling container images via `podman image prune -f`.
/// Returns estimated bytes freed.
pub(super) async fn prune_container_images() -> Result<u64> {
let output = tokio::process::Command::new("podman")
.args(["image", "prune", "-f"])
.output()
.await
.context("Failed to run podman image prune")?;
if !output.status.success() {
anyhow::bail!(
"podman image prune failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
// Podman outputs image IDs, estimate ~100MB per pruned image
let stdout = String::from_utf8_lossy(&output.stdout);
let pruned_count = stdout.lines().filter(|l| !l.trim().is_empty()).count();
Ok(pruned_count as u64 * 100_000_000) // rough estimate
}
/// Prune container build cache via `podman system prune -f`.
pub(super) async fn prune_build_cache() -> Result<u64> {
// Just prune volumes and build cache (not containers or images — those are handled above)
let output = tokio::process::Command::new("podman")
.args(["volume", "prune", "-f"])
.output()
.await
.context("Failed to run podman volume prune")?;
if !output.status.success() {
anyhow::bail!(
"podman volume prune failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
let stdout = String::from_utf8_lossy(&output.stdout);
let pruned_count = stdout.lines().filter(|l| !l.trim().is_empty()).count();
Ok(pruned_count as u64 * 10_000_000) // rough estimate per volume
}
/// Clean log files older than `max_age_days` from common log directories.
pub(super) async fn clean_old_logs(max_age_days: u64) -> Result<u64> {
let output = tokio::process::Command::new("sudo")
let output = tokio::process::Command::new("timeout")
.args([
"60s",
"sudo",
"find",
"/var/log",
"-type",
@ -366,8 +328,10 @@ pub(super) async fn clean_old_logs(max_age_days: u64) -> Result<u64> {
let stdout = String::from_utf8_lossy(&output.stdout);
let deleted_count = stdout.lines().filter(|l| !l.trim().is_empty()).count();
// Also clean rotated/compressed logs
let _ = tokio::process::Command::new("sudo")
let _ = tokio::process::Command::new("timeout")
.args([
"60s",
"sudo",
"find",
"/var/log",
"-type",
@ -384,14 +348,81 @@ pub(super) async fn clean_old_logs(max_age_days: u64) -> Result<u64> {
Ok(deleted_count as u64 * 500_000) // rough estimate per log file
}
/// Vacuum systemd journals to a bounded size. Returns measured bytes freed.
pub(super) async fn vacuum_journal_logs(max_size: &str) -> Result<u64> {
let before = journal_disk_usage().await.unwrap_or(0);
let output = tokio::process::Command::new("timeout")
.args(["60s", "sudo", "journalctl", "--vacuum-size", max_size])
.output()
.await
.context("Failed to run journal vacuum")?;
if !output.status.success() {
anyhow::bail!(
"journal vacuum failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
let after = journal_disk_usage().await.unwrap_or(before);
Ok(before.saturating_sub(after))
}
async fn journal_disk_usage() -> Result<u64> {
let output = tokio::process::Command::new("sudo")
.args(["-n", "journalctl", "--disk-usage"])
.output()
.await
.context("Failed to read journal disk usage")?;
if !output.status.success() {
anyhow::bail!(
"journalctl --disk-usage failed: {}",
String::from_utf8_lossy(&output.stderr)
);
}
parse_journal_disk_usage(&String::from_utf8_lossy(&output.stdout))
.ok_or_else(|| anyhow::anyhow!("could not parse journal disk usage"))
}
fn parse_journal_disk_usage(output: &str) -> Option<u64> {
let mut parts = output.split_whitespace();
while let Some(part) = parts.next() {
let (number, inline_unit) = split_number_unit(part);
let Ok(value) = number.parse::<f64>() else {
continue;
};
let unit = inline_unit.unwrap_or_else(|| parts.next().unwrap_or_default());
let multiplier = match unit {
"B" | "bytes" => 1.0,
"K" | "KB" | "KiB" => 1024.0,
"M" | "MB" | "MiB" => 1024.0 * 1024.0,
"G" | "GB" | "GiB" => 1024.0 * 1024.0 * 1024.0,
_ => continue,
};
return Some((value * multiplier) as u64);
}
None
}
fn split_number_unit(value: &str) -> (&str, Option<&str>) {
let split_at = value
.char_indices()
.find_map(|(idx, ch)| (!ch.is_ascii_digit() && ch != '.').then_some(idx))
.unwrap_or(value.len());
let (number, unit) = value.split_at(split_at);
(number, (!unit.is_empty()).then_some(unit))
}
/// Remove stale temp files from /tmp and /var/tmp.
pub(super) async fn clean_temp_files() -> Result<u64> {
let mut freed = 0u64;
for dir in &["/tmp", "/var/tmp"] {
let output = tokio::process::Command::new("sudo")
let output = tokio::process::Command::new("timeout")
.args([
"find", dir, "-type", "f", "-mtime", "+7", "-delete", "-print",
"45s", "sudo", "find", dir, "-type", "f", "-mtime", "+7", "-delete", "-print",
])
.output()
.await;
@ -406,6 +437,177 @@ pub(super) async fn clean_temp_files() -> Result<u64> {
Ok(freed)
}
/// Keep the newest timestamped backend backups and remove older ones.
pub(super) async fn clean_backend_backups(keep: usize) -> Result<u64> {
clean_backend_backups_in(Path::new("/usr/local/bin"), keep).await
}
/// Keep the newest legacy backend backups and remove older alpha-era deploy artifacts.
pub(super) async fn clean_legacy_backend_backups(keep: usize) -> Result<u64> {
clean_named_backups_in(
Path::new("/usr/local/bin"),
keep,
|name| name.starts_with("archipelago.bak") || name.starts_with("archipelago.before-"),
false,
)
.await
}
/// Keep the newest web UI rollback backups and remove older copies.
pub(super) async fn clean_web_ui_backups(keep: usize) -> Result<u64> {
clean_named_backups_in(
Path::new("/opt/archipelago"),
keep,
|name| name.starts_with("web-ui.bak") || name == "web-ui.old",
true,
)
.await
}
async fn clean_backend_backups_in(dir: &Path, keep: usize) -> Result<u64> {
let mut backups = backend_backup_candidates(dir).await?;
remove_old_backups(&mut backups, keep, false).await
}
async fn clean_named_backups_in(
dir: &Path,
keep: usize,
matches_name: impl Fn(&str) -> bool,
allow_dirs: bool,
) -> Result<u64> {
let mut backups = named_backup_candidates(dir, matches_name, allow_dirs).await?;
remove_old_backups(&mut backups, keep, allow_dirs).await
}
async fn remove_old_backups(
backups: &mut Vec<BackupArtifact>,
keep: usize,
allow_dirs: bool,
) -> Result<u64> {
backups.sort_by(|a, b| {
b.modified
.cmp(&a.modified)
.then_with(|| b.name.cmp(&a.name))
});
let mut freed = 0u64;
for backup in backups.iter().skip(keep) {
let remove_result = if backup.is_dir && allow_dirs {
tokio::fs::remove_dir_all(&backup.path).await
} else {
tokio::fs::remove_file(&backup.path).await
};
match remove_result {
Ok(()) => freed += backup.size,
Err(_) => {
remove_path_with_sudo(&backup.path, backup.is_dir && allow_dirs).await?;
freed += backup.size;
}
}
}
Ok(freed)
}
async fn remove_path_with_sudo(path: &Path, recursive: bool) -> Result<()> {
let path = path.to_string_lossy();
let args = if recursive {
vec!["rm", "-rf", path.as_ref()]
} else {
vec!["rm", "-f", path.as_ref()]
};
let status = host_sudo(&args)
.await
.with_context(|| format!("removing {path} via sudo"))?;
if !status.success() {
anyhow::bail!(
"sudo rm {} {path} exited with {status}",
if recursive { "-rf" } else { "-f" }
);
}
Ok(())
}
#[derive(Debug)]
struct BackupArtifact {
path: PathBuf,
name: String,
modified: SystemTime,
size: u64,
is_dir: bool,
}
async fn backend_backup_candidates(dir: &Path) -> Result<Vec<BackupArtifact>> {
named_backup_candidates(
dir,
|name| {
name.strip_prefix("archipelago.backup-")
.is_some_and(|suffix| !suffix.is_empty() && !suffix.contains('/'))
},
false,
)
.await
}
async fn named_backup_candidates(
dir: &Path,
matches_name: impl Fn(&str) -> bool,
allow_dirs: bool,
) -> Result<Vec<BackupArtifact>> {
let mut backups = Vec::new();
let mut entries = match tokio::fs::read_dir(dir).await {
Ok(entries) => entries,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(backups),
Err(e) => return Err(e).with_context(|| format!("reading {}", dir.display())),
};
while let Some(entry) = entries.next_entry().await? {
let file_name = entry.file_name();
let name = file_name.to_string_lossy();
if !matches_name(&name) {
continue;
}
let meta = entry.metadata().await?;
if !meta.is_file() && !(allow_dirs && meta.is_dir()) {
continue;
}
backups.push(BackupArtifact {
path: entry.path(),
name: name.to_string(),
modified: meta.modified().unwrap_or(SystemTime::UNIX_EPOCH),
size: path_size(&entry.path(), &meta).await.unwrap_or(meta.len()),
is_dir: meta.is_dir(),
});
}
Ok(backups)
}
async fn path_size(path: &Path, meta: &std::fs::Metadata) -> Result<u64> {
if meta.is_file() {
return Ok(meta.len());
}
if !meta.is_dir() {
return Ok(0);
}
let output = tokio::process::Command::new("du")
.args(["-sb", &path.to_string_lossy()])
.output()
.await
.with_context(|| format!("du -sb {}", path.display()))?;
if !output.status.success() {
anyhow::bail!("du -sb {} failed", path.display());
}
let stdout = String::from_utf8_lossy(&output.stdout);
stdout
.split_whitespace()
.next()
.ok_or_else(|| anyhow::anyhow!("du output missing size for {}", path.display()))?
.parse::<u64>()
.with_context(|| format!("parse du size for {}", path.display()))
}
pub(super) fn format_bytes(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
@ -422,6 +624,103 @@ pub(super) fn format_bytes(bytes: u64) -> String {
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn backend_backup_cleanup_keeps_newest_files() {
let dir = tempfile::tempdir().unwrap();
for name in [
"archipelago.backup-20260501",
"archipelago.backup-20260502",
"archipelago.backup-20260503",
"archipelago.backup-20260504",
"archipelago.backup-20260505",
"archipelago.bak",
"archipelago",
] {
tokio::fs::write(dir.path().join(name), b"12345")
.await
.unwrap();
}
let freed = clean_backend_backups_in(dir.path(), 3).await.unwrap();
assert_eq!(freed, 10);
assert!(!dir.path().join("archipelago.backup-20260501").exists());
assert!(!dir.path().join("archipelago.backup-20260502").exists());
assert!(dir.path().join("archipelago.backup-20260503").exists());
assert!(dir.path().join("archipelago.backup-20260504").exists());
assert!(dir.path().join("archipelago.backup-20260505").exists());
assert!(dir.path().join("archipelago.bak").exists());
assert!(dir.path().join("archipelago").exists());
}
#[tokio::test]
async fn legacy_backend_backup_cleanup_keeps_newest_matching_files() {
let dir = tempfile::tempdir().unwrap();
for name in [
"archipelago.bak-1",
"archipelago.bak-2",
"archipelago.before-3",
"archipelago.backup-keep-separate",
"archipelago",
] {
tokio::fs::write(dir.path().join(name), b"12345")
.await
.unwrap();
}
let freed = clean_named_backups_in(
dir.path(),
1,
|name| name.starts_with("archipelago.bak") || name.starts_with("archipelago.before-"),
false,
)
.await
.unwrap();
assert_eq!(freed, 10);
assert_eq!(
[
"archipelago.bak-1",
"archipelago.bak-2",
"archipelago.before-3"
]
.into_iter()
.filter(|name| dir.path().join(name).exists())
.count(),
1
);
assert!(dir.path().join("archipelago.backup-keep-separate").exists());
assert!(dir.path().join("archipelago").exists());
}
#[test]
fn hostname_from_server_name_derives_linux_safe_hostname() {
assert_eq!(
handlers::hostname_from_server_name("My Archipelago Node"),
"my-archipelago-node"
);
assert_eq!(
handlers::hostname_from_server_name("Kitchen_Node!! 01"),
"kitchen-node-01"
);
assert_eq!(handlers::hostname_from_server_name("!!!"), "archipelago");
}
#[test]
fn parses_journal_disk_usage() {
assert_eq!(
parse_journal_disk_usage(
"Archived and active journals take up 463.9M in the file system."
),
Some(486_434_406)
);
}
}
/// Read temperatures from /sys/class/thermal/thermal_zone*/temp.
pub(super) async fn read_temperatures() -> Result<Vec<serde_json::Value>> {
let mut temps = Vec::new();

View File

@ -86,6 +86,11 @@ pub struct AuthManager {
data_dir: PathBuf,
}
pub struct ChangePasswordOutcome {
pub ssh_updated: bool,
pub ssh_error: Option<String>,
}
impl AuthManager {
pub fn new(data_dir: PathBuf) -> Self {
Self { data_dir }
@ -288,7 +293,7 @@ impl AuthManager {
current_password: &str,
new_password: &str,
also_change_ssh: bool,
) -> Result<()> {
) -> Result<ChangePasswordOutcome> {
if !self.verify_password(current_password).await? {
anyhow::bail!("Current password is incorrect");
}
@ -314,11 +319,21 @@ impl AuthManager {
let content = serde_json::to_string_pretty(&user)?;
fs::write(&user_file, content).await?;
let mut outcome = ChangePasswordOutcome {
ssh_updated: false,
ssh_error: None,
};
if also_change_ssh {
change_ssh_password(new_password).await?;
match change_ssh_password(new_password).await {
Ok(()) => outcome.ssh_updated = true,
Err(e) => {
tracing::warn!("Web password changed but SSH password update failed: {}", e);
outcome.ssh_error = Some(e.to_string());
}
}
}
Ok(())
Ok(outcome)
}
}
@ -485,6 +500,23 @@ mod tests {
assert!(validate_password_strength("MyP@ssw0rd!123").is_ok());
}
#[tokio::test]
async fn test_change_password_updates_web_password_without_ssh() {
let dir = tempfile::tempdir().unwrap();
let auth = AuthManager::new(dir.path().to_path_buf());
auth.setup_user("password123").await.unwrap();
let outcome = auth
.change_password("password123", "MyP@ssw0rd!123", false)
.await
.unwrap();
assert!(!outcome.ssh_updated);
assert!(outcome.ssh_error.is_none());
assert!(auth.verify_password("MyP@ssw0rd!123").await.unwrap());
assert!(!auth.verify_password("password123").await.unwrap());
}
#[test]
fn test_validate_password_strength_too_short() {
assert!(validate_password_strength("Ab1!").is_err());

View File

@ -13,7 +13,8 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tokio::sync::RwLock;
use tracing::{debug, warn};
const CACHE_REFRESH_SECS: u64 = 5;
const CACHE_REFRESH_SECS: u64 = 10;
const CACHE_ERROR_BACKOFF_SECS: u64 = 15;
#[derive(Debug, Clone, Serialize)]
pub struct BitcoinNodeStatus {
@ -65,6 +66,36 @@ fn transient_error(err_msg: &str) -> bool {
|| lower.contains("broken pipe")
|| lower.contains("eof")
|| lower.contains("500 internal server error")
|| lower.contains("503 service unavailable")
|| lower.contains("work queue depth exceeded")
|| lower.contains("decode bitcoin rpc json")
|| lower.contains("error decoding response body")
|| lower.contains("expected value at line 1 column 1")
}
fn friendly_transient_error(has_cached_state: bool, err_msg: &str) -> String {
let detail = err_msg
.lines()
.next()
.unwrap_or(err_msg)
.trim()
.trim_end_matches('.');
let lower = detail.to_lowercase();
let state = if lower.contains("verifying blocks") {
"verifying blocks after restart"
} else if lower.contains("connection refused") || lower.contains("tcp connect error") {
"waiting for the Bitcoin RPC listener"
} else if lower.contains("timed out") || lower.contains("timeout") {
"busy and not answering RPC before the timeout"
} else {
"starting or busy syncing"
};
if has_cached_state {
format!("Bitcoin node is {state}; showing last known state and retrying. Detail: {detail}")
} else {
format!("Bitcoin node is {state}; retrying automatically. Detail: {detail}")
}
}
pub fn spawn_status_cache() {
@ -72,6 +103,7 @@ pub fn spawn_status_cache() {
loop {
let fresh = fetch_bitcoin_status().await;
let mut cached = cache().write().await;
let mut sleep_secs = CACHE_REFRESH_SECS;
match fresh {
Ok(mut status) => {
status.ok = true;
@ -80,33 +112,31 @@ pub fn spawn_status_cache() {
*cached = status;
}
Err(e) => {
let err_msg = e.to_string();
let err_msg = format!("{e:#}");
if transient_error(&err_msg) {
debug!("Bitcoin status: transient RPC failure: {}", err_msg);
} else {
warn!("Bitcoin status: RPC failure: {}", err_msg);
}
sleep_secs = CACHE_ERROR_BACKOFF_SECS;
if cached.blockchain_info.is_some() {
cached.ok = false;
cached.stale = true;
cached.error = Some(format!(
"Bitcoin node is reconnecting; showing last known state: {}",
err_msg
));
cached.error = Some(friendly_transient_error(true, &err_msg));
} else {
*cached = BitcoinNodeStatus {
ok: false,
stale: false,
updated_at_ms: now_ms(),
error: Some(format!("Connecting to Bitcoin node: {}", err_msg)),
error: Some(friendly_transient_error(false, &err_msg)),
..BitcoinNodeStatus::default()
};
}
}
}
drop(cached);
tokio::time::sleep(Duration::from_secs(CACHE_REFRESH_SECS)).await;
tokio::time::sleep(Duration::from_secs(sleep_secs)).await;
}
});
}
@ -117,7 +147,7 @@ pub async fn get_bitcoin_status() -> BitcoinNodeStatus {
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(8))
.timeout(Duration::from_secs(20))
.build()
.context("build Bitcoin status HTTP client")?;
@ -183,3 +213,40 @@ async fn bitcoin_rpc_call(
.cloned()
.context("missing Bitcoin RPC result")
}
#[cfg(test)]
mod tests {
use super::friendly_transient_error;
#[test]
fn explains_verifying_blocks_without_generic_timeout_copy() {
let msg = friendly_transient_error(
false,
r#"getblockchaininfo: Bitcoin RPC returned 500 Internal Server Error: {"error":{"code":-28,"message":"Verifying blocks..."}}"#,
);
assert!(msg.contains("verifying blocks after restart"));
assert!(msg.contains("retrying automatically"));
}
#[test]
fn explains_missing_rpc_listener() {
let msg = friendly_transient_error(
true,
"getblockchaininfo: tcp connect error: Connection refused (os error 111)",
);
assert!(msg.contains("waiting for the Bitcoin RPC listener"));
assert!(msg.contains("showing last known state"));
}
#[test]
fn explains_rpc_timeout() {
let msg = friendly_transient_error(
false,
"getblockchaininfo: Bitcoin RPC request failed: operation timed out",
);
assert!(msg.contains("busy and not answering RPC before the timeout"));
}
}

View File

@ -23,5 +23,15 @@ server {
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
add_header Cache-Control "no-store";
}
location /rpc/v1 {
proxy_pass http://127.0.0.1:5678/rpc/v1;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Cookie $http_cookie;
proxy_set_header X-CSRF-Token $http_x_csrf_token;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
add_header Cache-Control "no-store";
}
location / { try_files $uri $uri/ /index.html; }
}

View File

@ -34,6 +34,7 @@ pub struct BootReconciler {
/// `systemctl --user` and `podman`, which both block real time
/// and would race the paused-clock test fixtures.
companion_stage: bool,
wait_for_recovery: bool,
}
impl BootReconciler {
@ -47,6 +48,7 @@ impl BootReconciler {
interval,
shutdown,
companion_stage: true,
wait_for_recovery: true,
}
}
@ -56,6 +58,7 @@ impl BootReconciler {
#[cfg(test)]
pub fn without_companion_stage(mut self) -> Self {
self.companion_stage = false;
self.wait_for_recovery = false;
self
}
@ -78,6 +81,21 @@ impl BootReconciler {
/// by the orchestrator, and companion failures are logged but never
/// propagated.
pub async fn run_forever(self) {
let wait_start = Instant::now();
while self.wait_for_recovery && !crate::crash_recovery::is_recovery_complete() {
if wait_start.elapsed() > Duration::from_secs(1800) {
tracing::warn!("boot reconciler: boot recovery did not complete within 30 minutes, starting anyway");
break;
}
tokio::select! {
_ = time::sleep(Duration::from_secs(5)) => {}
_ = self.shutdown.notified() => {
tracing::info!("boot reconciler: shutdown requested before recovery completed");
return;
}
}
}
// Initial pass: no delay.
self.tick().await;
@ -244,58 +262,65 @@ mod tests {
ProdContainerOrchestrator::with_runtime(rt, PathBuf::from("/nonexistent-for-tests"));
let tmp = tempfile::tempdir().unwrap().keep();
orch.set_data_dir(tmp);
orch.set_disk_gb_for_test(2_000);
let orch = Arc::new(orch);
orch.insert_manifest_for_test(
pull_manifest("bitcoin-knots", "docker.io/bitcoin/knots:28"),
PathBuf::from("/tmp/bk"),
pull_manifest("test-app", "docker.io/example/test-app:1"),
PathBuf::from("/tmp/test-app"),
)
.await;
orch
}
#[tokio::test(start_paused = true)]
async fn wait_for_status_calls(rt: &CountingRuntime, expected: u32) -> u32 {
for _ in 0..100 {
let count = rt.status_call_count();
if count >= expected {
return count;
}
tokio::task::yield_now().await;
tokio::time::sleep(Duration::from_millis(1)).await;
}
rt.status_call_count()
}
#[tokio::test]
async fn initial_pass_fires_immediately() {
let rt = Arc::new(CountingRuntime::new_with(&["bitcoin-knots"]));
let rt = Arc::new(CountingRuntime::new_with(&["test-app"]));
let orch = orch_with_one_running_manifest(rt.clone()).await;
let shutdown = Arc::new(Notify::new());
let reconciler =
BootReconciler::new(orch.clone(), Duration::from_secs(30), shutdown.clone())
BootReconciler::new(orch.clone(), Duration::from_millis(50), shutdown.clone())
.without_companion_stage();
let handle = tokio::spawn(reconciler.run_forever());
// Yield so the spawned task gets CPU to run its initial reconcile.
tokio::task::yield_now().await;
tokio::task::yield_now().await;
// We expect exactly one reconcile pass to have run by now (the initial),
// NOT a second one (the 30s sleep hasn't elapsed in paused time).
assert_eq!(rt.status_call_count(), 1, "initial pass should fire once");
assert_eq!(
wait_for_status_calls(&rt, 1).await,
1,
"initial pass should fire once"
);
shutdown.notify_one();
// Under paused clock the select! is blocked on sleep_until; the notify
// will unblock it. Advance wall-clock a hair so the notify gets polled.
tokio::task::yield_now().await;
let _ = tokio::time::timeout(Duration::from_secs(1), handle).await;
}
#[tokio::test(start_paused = true)]
#[tokio::test]
async fn second_pass_fires_after_interval() {
let rt = Arc::new(CountingRuntime::new_with(&["bitcoin-knots"]));
let rt = Arc::new(CountingRuntime::new_with(&["test-app"]));
let orch = orch_with_one_running_manifest(rt.clone()).await;
let shutdown = Arc::new(Notify::new());
let reconciler =
BootReconciler::new(orch.clone(), Duration::from_secs(30), shutdown.clone())
BootReconciler::new(orch.clone(), Duration::from_millis(10), shutdown.clone())
.without_companion_stage();
let handle = tokio::spawn(reconciler.run_forever());
tokio::task::yield_now().await;
tokio::task::yield_now().await;
assert_eq!(rt.status_call_count(), 1);
assert_eq!(wait_for_status_calls(&rt, 1).await, 1);
// Fast-forward past one interval; the sleep_until should fire.
tokio::time::advance(Duration::from_secs(31)).await;
tokio::task::yield_now().await;
tokio::task::yield_now().await;
tokio::time::sleep(Duration::from_millis(20)).await;
wait_for_status_calls(&rt, 2).await;
assert_eq!(
rt.status_call_count(),
@ -308,27 +333,23 @@ mod tests {
let _ = tokio::time::timeout(Duration::from_secs(1), handle).await;
}
#[tokio::test(start_paused = true)]
#[tokio::test]
async fn shutdown_terminates_loop() {
let rt = Arc::new(CountingRuntime::new_with(&["bitcoin-knots"]));
let rt = Arc::new(CountingRuntime::new_with(&["test-app"]));
let orch = orch_with_one_running_manifest(rt.clone()).await;
let shutdown = Arc::new(Notify::new());
let reconciler =
BootReconciler::new(orch.clone(), Duration::from_secs(30), shutdown.clone())
BootReconciler::new(orch.clone(), Duration::from_millis(50), shutdown.clone())
.without_companion_stage();
let handle = tokio::spawn(reconciler.run_forever());
tokio::task::yield_now().await;
tokio::task::yield_now().await;
wait_for_status_calls(&rt, 1).await;
shutdown.notify_one();
// The select! should wake on Notified and return. Use a real timeout
// with advancing the paused clock to make sure the task exits.
tokio::time::advance(Duration::from_millis(10)).await;
let result = tokio::time::timeout(Duration::from_secs(5), handle).await;
assert!(result.is_ok(), "reconciler did not exit after shutdown");
}
#[tokio::test(start_paused = true)]
#[tokio::test]
async fn failure_in_one_pass_does_not_stop_loop() {
// Manifest references a container the runtime does not have AND
// cannot create (no install path — install_fresh will also fail to
@ -344,26 +365,23 @@ mod tests {
);
let tmp = tempfile::tempdir().unwrap().keep();
orch.set_data_dir(tmp);
orch.set_disk_gb_for_test(2_000);
let orch = Arc::new(orch);
orch.insert_manifest_for_test(
pull_manifest("bitcoin-knots", "docker.io/bitcoin/knots:28"),
PathBuf::from("/tmp/bk"),
pull_manifest("test-app", "docker.io/example/test-app:1"),
PathBuf::from("/tmp/test-app"),
)
.await;
let shutdown = Arc::new(Notify::new());
let reconciler =
BootReconciler::new(orch.clone(), Duration::from_secs(30), shutdown.clone())
BootReconciler::new(orch.clone(), Duration::from_millis(10), shutdown.clone())
.without_companion_stage();
let handle = tokio::spawn(reconciler.run_forever());
tokio::task::yield_now().await;
tokio::task::yield_now().await;
let first = rt.status_call_count();
let first = wait_for_status_calls(&rt, 1).await;
assert!(first >= 1, "initial pass should have touched the runtime");
// Advance one interval — second pass should fire regardless of what
// the first pass did.
tokio::time::advance(Duration::from_secs(31)).await;
tokio::time::sleep(Duration::from_millis(20)).await;
tokio::task::yield_now().await;
tokio::task::yield_now().await;
let second = rt.status_call_count();
@ -373,7 +391,6 @@ mod tests {
);
shutdown.notify_one();
tokio::time::advance(Duration::from_millis(10)).await;
let _ = tokio::time::timeout(Duration::from_secs(5), handle).await;
}
}

View File

@ -9,6 +9,7 @@
//! | bitcoin-core | archy-bitcoin-ui | RPC viewer |
//! | lnd | archy-lnd-ui | wallet/channel UI |
//! | electrumx | archy-electrs-ui | indexer status UI |
//! | fedimint | archy-fedimint-ui | wait/proxy Guardian UI |
//!
//! Lifecycle: `install` writes a Quadlet `.container` unit to
//! `~/.config/containers/systemd/`, daemon-reloads, then starts the
@ -22,6 +23,7 @@
use anyhow::{Context, Result};
use std::path::PathBuf;
use std::time::Duration;
use tokio::fs;
use tokio::process::Command;
use tracing::{info, warn};
@ -30,6 +32,9 @@ use crate::container::quadlet::{self, BindMount, NetworkMode, QuadletUnit};
use archipelago_container::image_uses_insecure_registry;
const COMPANION_REGISTRY: &str = "146.59.87.168:3000/lfg2025";
const COMPANION_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(15);
const COMPANION_BUILD_TIMEOUT: Duration = Duration::from_secs(900);
const COMPANION_PULL_TIMEOUT: Duration = Duration::from_secs(300);
/// Static description of one companion. The full list per backend
/// app_id lives in `companions_for`.
@ -65,6 +70,7 @@ pub fn companions_for(package_id: &str) -> &'static [CompanionSpec] {
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => BITCOIN_UI,
"lnd" => LND_UI,
"electrumx" | "electrs" | "mempool-electrs" => ELECTRS_UI,
"fedimint" | "fedimintd" => FEDIMINT_UI,
_ => &[],
}
}
@ -114,6 +120,20 @@ const ELECTRS_UI: &[CompanionSpec] = &[CompanionSpec {
host_network: true,
}];
const FEDIMINT_UI: &[CompanionSpec] = &[CompanionSpec {
name: "archy-fedimint-ui",
image_base: "fedimint-ui",
build_dir_candidates: &[
"/opt/archipelago/docker/fedimint-ui",
"/home/archipelago/archy/docker/fedimint-ui",
"/home/archipelago/Projects/archy/docker/fedimint-ui",
],
pre_start: None,
bind_mounts: &[],
ports: &[],
host_network: true,
}];
fn render_bitcoin_ui() -> futures_util::future::BoxFuture<'static, Result<()>> {
Box::pin(async {
let paths = crate::container::bitcoin_ui::RenderPaths::default();
@ -201,11 +221,12 @@ async fn ensure_image_present(spec: &CompanionSpec) -> Result<String> {
return Ok(local_image);
}
info!(companion = spec.name, "building locally from {dir}");
let out = Command::new("podman")
.args(["build", "-t", &local_image, dir])
.output()
.await
.context("spawn podman build")?;
let out = command_output_with_timeout(
Command::new("podman").args(["build", "-t", &local_image, dir]),
COMPANION_BUILD_TIMEOUT,
"podman build companion image",
)
.await?;
if out.status.success() {
return Ok(local_image);
}
@ -226,7 +247,12 @@ async fn ensure_image_present(spec: &CompanionSpec) -> Result<String> {
cmd.arg("--tls-verify=false");
}
cmd.arg(&registry_image);
let out = cmd.output().await.context("spawn podman pull")?;
let out = command_output_with_timeout(
&mut cmd,
COMPANION_PULL_TIMEOUT,
"podman pull companion image",
)
.await?;
if !out.status.success() {
anyhow::bail!(
"no local Dockerfile and registry pull failed for {}: {}",
@ -238,11 +264,31 @@ async fn ensure_image_present(spec: &CompanionSpec) -> Result<String> {
}
async fn image_exists(image: &str) -> bool {
Command::new("podman")
.args(["image", "exists", image])
.status()
let mut cmd = Command::new("podman");
cmd.args(["image", "inspect", image]);
match tokio::time::timeout(COMPANION_IMAGE_CHECK_TIMEOUT, cmd.status()).await {
Ok(Ok(status)) => status.success(),
Ok(Err(err)) => {
warn!(image = %image, error = %err, "companion image existence check failed");
false
}
Err(_) => {
warn!(image = %image, "companion image existence check timed out");
false
}
}
}
async fn command_output_with_timeout(
cmd: &mut Command,
timeout: Duration,
description: &str,
) -> Result<std::process::Output> {
cmd.kill_on_drop(true);
tokio::time::timeout(timeout, cmd.output())
.await
.is_ok_and(|status| status.success())
.with_context(|| format!("{description} timed out after {}s", timeout.as_secs()))?
.with_context(|| format!("spawn {description}"))
}
fn build_unit(spec: &CompanionSpec, image: &str) -> QuadletUnit {
@ -368,6 +414,8 @@ mod tests {
assert_eq!(companions_for("electrumx").len(), 1);
assert_eq!(companions_for("electrs").len(), 1);
assert_eq!(companions_for("mempool-electrs").len(), 1);
assert_eq!(companions_for("fedimint").len(), 1);
assert_eq!(companions_for("fedimintd").len(), 1);
assert_eq!(companions_for("nextcloud").len(), 0);
assert_eq!(companions_for("not-a-real-app").len(), 0);
}
@ -398,4 +446,13 @@ mod tests {
assert!(matches!(u.network, NetworkMode::Bridge(ref n) if n == "bridge"));
assert_eq!(u.ports, vec![(18083, 80, "tcp".into())]);
}
#[test]
fn fedimint_ui_uses_host_network_for_public_guardian_port() {
let spec = &FEDIMINT_UI[0];
let u = build_unit(spec, "localhost/fedimint-ui:latest");
assert_eq!(u.name, "archy-fedimint-ui");
assert!(matches!(u.network, NetworkMode::Host));
assert!(u.ports.is_empty());
}
}

View File

@ -26,13 +26,7 @@ impl DockerPackageScanner {
/// Scan Docker containers and convert to package data
pub async fn scan_containers(&self) -> Result<HashMap<String, PackageDataEntry>> {
let containers = match self.runtime.list_containers().await {
Ok(c) => c,
Err(e) => {
debug!("Failed to list containers: {}", e);
return Ok(HashMap::new());
}
};
let containers = self.runtime.list_containers().await?;
debug!("Found {} containers", containers.len());
@ -63,14 +57,6 @@ impl DockerPackageScanner {
"indeedhub-build_ffmpeg-worker_1",
"netbird-server",
"netbird-dashboard",
"saleor-api",
"saleor-worker",
"saleor-db",
"saleor-cache",
"saleor-jaeger",
"saleor-mailpit",
"saleor-storefront",
"saleor-storefront-app",
"buildx_buildkit_default",
];
@ -298,7 +284,6 @@ fn get_app_tier(app_id: &str) -> &'static str {
"uptime-kuma" => "recommended",
"grafana" => "recommended",
"searxng" => "recommended",
"saleor" => "recommended",
"tailscale" | "netbird" => "recommended",
"portainer" => "recommended",
// Optional: everything else
@ -519,13 +504,6 @@ fn get_app_metadata(app_id: &str) -> AppMetadata {
repo: "https://github.com/netbirdio/netbird".to_string(),
tier: "",
},
"saleor" => AppMetadata {
title: "Saleor".to_string(),
description: "Composable commerce platform with storefront, dashboard, and GraphQL API. The customer storefront opens on port 9011; admin dashboard is on 9010 with admin@example.com credentials stored on the node.".to_string(),
icon: "/assets/img/app-icons/saleor.svg".to_string(),
repo: "https://github.com/saleor/saleor".to_string(),
tier: "",
},
"gitea" => AppMetadata {
title: "Gitea".to_string(),
description: "Self-hosted Git service with repository and package hosting".to_string(),
@ -732,18 +710,23 @@ async fn reachable_lan_address(app_id: &str, candidate: Option<String>) -> Optio
let Some(port) = url.rsplit(':').next().and_then(|p| p.parse::<u16>().ok()) else {
return None;
};
match tokio::time::timeout(
std::time::Duration::from_secs(2),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await
{
Ok(Ok(_)) => Some(url),
_ => {
if launch_port_reachable(port).await {
Some(url)
} else {
debug!(app_id = %app_id, port, "suppressing unreachable launch URL");
None
}
}
async fn launch_port_reachable(port: u16) -> bool {
matches!(
tokio::time::timeout(
std::time::Duration::from_secs(2),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await,
Ok(Ok(_))
)
}
fn requires_reachable_launch(app_id: &str) -> bool {
@ -766,7 +749,6 @@ fn requires_reachable_launch(app_id: &str) -> bool {
| "tailscale"
| "immich"
| "searxng"
| "saleor"
)
}

View File

@ -8,6 +8,8 @@ use anyhow::{Context, Result};
use std::path::PathBuf;
use tokio::fs;
use crate::update::host_sudo;
pub const DEFAULT_SRV_ROOT: &str = "/var/lib/archipelago/filebrowser";
pub const DEFAULT_DATA_DIR: &str = "/var/lib/archipelago/filebrowser-data";
pub const DEFAULT_CONFIG_PATH: &str = "/var/lib/archipelago/filebrowser-data/.filebrowser.json";
@ -39,17 +41,11 @@ pub enum EnsureOutcome {
}
pub async fn ensure_config(paths: &EnsurePaths) -> Result<EnsureOutcome> {
fs::create_dir_all(&paths.srv_root)
.await
.with_context(|| format!("creating {}", paths.srv_root.display()))?;
fs::create_dir_all(&paths.data_dir)
.await
.with_context(|| format!("creating {}", paths.data_dir.display()))?;
create_dir_all_or_sudo(&paths.srv_root).await?;
create_dir_all_or_sudo(&paths.data_dir).await?;
for d in ["Documents", "Photos", "Music", "Downloads", "Builds"] {
fs::create_dir_all(paths.srv_root.join(d))
.await
.with_context(|| format!("creating {}/{}", paths.srv_root.display(), d))?;
create_dir_all_or_sudo(&paths.srv_root.join(d)).await?;
}
if paths.config_path.exists() {
@ -60,14 +56,34 @@ pub async fn ensure_config(paths: &EnsurePaths) -> Result<EnsureOutcome> {
.config_path
.parent()
.ok_or_else(|| anyhow::anyhow!("config_path has no parent directory"))?;
fs::create_dir_all(parent)
.await
.with_context(|| format!("creating {}", parent.display()))?;
create_dir_all_or_sudo(parent).await?;
let tmp = paths.config_path.with_extension("tmp");
fs::write(&tmp, DEFAULT_CONFIG_JSON)
write_config_atomically(paths).await?;
Ok(EnsureOutcome::Written)
}
async fn create_dir_all_or_sudo(path: &std::path::Path) -> Result<()> {
match fs::create_dir_all(path).await {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
let path = path.to_string_lossy();
let status = host_sudo(&["mkdir", "-p", &path])
.await
.with_context(|| format!("writing tmp {}", tmp.display()))?;
.with_context(|| format!("creating {path} via sudo"))?;
if !status.success() {
anyhow::bail!("mkdir -p {path} via sudo exited with {status}");
}
Ok(())
}
Err(e) => Err(e).with_context(|| format!("creating {}", path.display())),
}
}
async fn write_config_atomically(paths: &EnsurePaths) -> Result<()> {
let tmp = paths.config_path.with_extension("tmp");
match fs::write(&tmp, DEFAULT_CONFIG_JSON).await {
Ok(()) => {
fs::rename(&tmp, &paths.config_path)
.await
.with_context(|| {
@ -77,8 +93,28 @@ pub async fn ensure_config(paths: &EnsurePaths) -> Result<EnsureOutcome> {
paths.config_path.display()
)
})?;
Ok(())
}
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => {
let script = format!(
"set -eu\ncat > '{}' <<'FILEBROWSERCONF'\n{}FILEBROWSERCONF\n",
shell_quote(&paths.config_path.to_string_lossy()),
DEFAULT_CONFIG_JSON
);
let status = host_sudo(&["sh", "-lc", &script])
.await
.context("writing .filebrowser.json via sudo")?;
if !status.success() {
anyhow::bail!("writing .filebrowser.json via sudo exited with {status}");
}
Ok(())
}
Err(e) => Err(e).with_context(|| format!("writing tmp {}", tmp.display())),
}
}
Ok(EnsureOutcome::Written)
fn shell_quote(s: &str) -> String {
s.replace('\'', "'\\''")
}
#[cfg(test)]

View File

@ -219,6 +219,10 @@ pub fn pinned_image_for_app(app_id: &str) -> Option<String> {
/// explicit versions we should advertise to users as available updates.
pub fn available_update_for_app(app_id: &str, running_image: &str) -> Option<String> {
let pinned = pinned_image_for_app(app_id)?;
available_update_for_images(&pinned, running_image)
}
fn available_update_for_images(pinned: &str, running_image: &str) -> Option<String> {
let pinned_version = extract_version_from_image(&pinned);
if is_floating_tag(&pinned_version) {
return None;
@ -378,6 +382,28 @@ mod tests {
assert!(!is_floating_tag("v0.18.4-beta"));
}
#[test]
fn available_update_ignores_registry_only_changes() {
assert_eq!(
available_update_for_images(
"146.59.87.168:3000/lfg2025/nextcloud:29",
"git.tx1138.com/lfg2025/nextcloud:29",
),
None
);
}
#[test]
fn available_update_returns_pinned_version_for_same_repo_newer_tag() {
assert_eq!(
available_update_for_images(
"146.59.87.168:3000/lfg2025/nextcloud:29",
"146.59.87.168:3000/lfg2025/nextcloud:28",
),
Some("29".to_string())
);
}
#[test]
fn test_parse_image_versions() {
let content = r#"

View File

@ -76,7 +76,7 @@ pub async fn ensure_wallet_initialized() -> Result<()> {
let admin_macaroon = "/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon";
let wallet_db = "/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/wallet.db";
if file_exists_as_root(wallet_db).await {
if file_exists_as_root(admin_macaroon).await && lnd_getinfo_ready(admin_macaroon).await {
if file_exists_as_root(admin_macaroon).await {
return Ok(());
}
unlock_existing_wallet().await?;
@ -305,6 +305,7 @@ async fn decode_lnd_unlocker_response<T: for<'de> Deserialize<'de>>(
anyhow::bail!("LND REST {path} returned {status}: {text}")
}
#[allow(dead_code)]
async fn lnd_getinfo_ready(admin_macaroon: &str) -> bool {
let Ok(macaroon) = read_file_as_root(admin_macaroon).await else {
return false;

File diff suppressed because it is too large Load Diff

View File

@ -34,9 +34,13 @@ use anyhow::{anyhow, Context, Result};
use archipelago_container::AppManifest;
use std::fmt::Write as _;
use std::path::{Path, PathBuf};
use std::time::Duration;
use tokio::fs;
use tokio::process::Command;
const QUADLET_START_TIMEOUT: Duration = Duration::from_secs(90);
const QUADLET_STOP_TIMEOUT: Duration = Duration::from_secs(45);
/// Default rootless quadlet directory. Resolved per-user at runtime via
/// `unit_dir()`. Tests pass an explicit dir.
pub const DEFAULT_REL_UNIT_DIR: &str = ".config/containers/systemd";
@ -61,6 +65,12 @@ pub enum NetworkMode {
/// attached to it. The network must already exist (orchestrator's
/// `ensure_container_network` handles that on every reconcile tick).
Bridge(String),
/// Rootless slirp4netns networking. Podman rejects network aliases with
/// this mode, so render only Network=slirp4netns.
Slirp4netns,
/// Rootless pasta networking. This is more reliable than slirp4netns for
/// host port forwarding on long-running web apps.
Pasta,
}
/// systemd Restart= policy for the generated `.service` unit. Companions
@ -181,6 +191,12 @@ impl QuadletUnit {
NetworkMode::Host => {
let _ = writeln!(s, "Network=host");
}
NetworkMode::Slirp4netns => {
let _ = writeln!(s, "Network=slirp4netns");
}
NetworkMode::Pasta => {
let _ = writeln!(s, "Network=pasta");
}
NetworkMode::Bridge(net) => {
let _ = writeln!(s, "Network={net}");
for alias in &self.network_aliases {
@ -261,6 +277,13 @@ impl QuadletUnit {
}
let _ = writeln!(s);
let _ = writeln!(s, "[Service]");
// Dependency-gated apps may legitimately keep their container entrypoint
// in a wait loop before the actual daemon binds ports. Fedimint waits
// for Bitcoin IBD to finish before execing fedimintd; systemd's default
// start timeout otherwise kills the generated podman run job and leaves
// the unit stuck in deactivating. Health/status remains app-level state,
// not a systemd start gate.
let _ = writeln!(s, "TimeoutStartSec=0");
// Restart policy + 10s backoff. RestartSec keeps a crash-loop
// from saturating the journal. Companions: Always. Backends:
// OnFailure (clean stops stay stopped).
@ -334,6 +357,8 @@ impl QuadletUnit {
// either form.
other if !other.is_empty() && other != "isolated" => NetworkMode::Bridge(other.into()),
_ => match app.container.network.as_deref() {
Some("slirp4netns") => NetworkMode::Slirp4netns,
Some("pasta") => NetworkMode::Pasta,
Some(n) if !n.is_empty() && n != "host" => NetworkMode::Bridge(n.into()),
_ => NetworkMode::Default,
},
@ -382,7 +407,7 @@ impl QuadletUnit {
entrypoint: app.container.entrypoint.clone(),
command: app.container.custom_args.clone(),
read_only_root: app.security.readonly_root,
no_new_privileges: true,
no_new_privileges: app.security.no_new_privileges,
cpu_quota: app.resources.cpu_limit,
restart_policy: RestartPolicy::OnFailure,
}
@ -436,13 +461,14 @@ fn translate_health_check(hc: &archipelago_container::HealthCheck) -> Option<Hea
let path = hc.path.as_deref().unwrap_or("/");
format!("{url}{path}")
};
let helper_timeout = health_timeout_seconds(&hc.timeout);
// Images vary wildly: SearXNG ships wget but no curl, while some
// Node images ship neither. Use whichever probe helper exists and
// skip Podman health if the image has none; host-side lifecycle
// probes still verify reachability.
format!(
"if command -v wget >/dev/null 2>&1; then wget -q -T 5 -O /dev/null {0}; elif command -v curl >/dev/null 2>&1; then curl -fsS -m 5 {0}; else exit 0; fi",
final_url
"if command -v wget >/dev/null 2>&1; then wget -q -T {1} -O /dev/null {0}; elif command -v curl >/dev/null 2>&1; then curl -fsS -m {1} {0}; else exit 0; fi",
final_url, helper_timeout
)
}
"cmd" => hc.endpoint.as_deref()?.to_string(),
@ -456,6 +482,29 @@ fn translate_health_check(hc: &archipelago_container::HealthCheck) -> Option<Hea
})
}
fn health_timeout_seconds(raw: &str) -> u64 {
let trimmed = raw.trim();
if trimmed.is_empty() {
return 5;
}
let (number, multiplier) = match trimmed.chars().last() {
Some('s') | Some('S') => (&trimmed[..trimmed.len() - 1], 1),
Some('m') | Some('M') => (&trimmed[..trimmed.len() - 1], 60),
Some('h') | Some('H') => (&trimmed[..trimmed.len() - 1], 3600),
Some(c) if c.is_ascii_digit() => (trimmed, 1),
_ => return 5,
};
number
.trim()
.parse::<u64>()
.ok()
.and_then(|n| n.checked_mul(multiplier))
.filter(|n| *n > 0)
.unwrap_or(5)
}
/// Parse the manifest's memory_limit string into MiB. Recognises the
/// forms our manifests actually use: "<n>", "<n>m"/"<n>M", "<n>g"/"<n>G".
/// Returns None for anything else; the caller treats None as unlimited.
@ -532,12 +581,21 @@ pub async fn enable_now(service: &str) -> Result<()> {
// .service file lives under /run, not /etc — `enable` would refuse
// ("transient or generated"). The unit's `[Install] WantedBy` is
// honoured at daemon-reload, so we just start it.
let status = Command::new("systemctl")
.args(["--user", "start", service])
.status()
let status = systemctl_user_status(&["start", service], QUADLET_START_TIMEOUT)
.await
.with_context(|| format!("spawn systemctl --user start {service}"))?;
.with_context(|| format!("systemctl --user start {service}"))?;
if !status.success() {
if wait_not_deactivating(service, Duration::from_secs(30)).await {
let retry = systemctl_user_status(&["start", service], QUADLET_START_TIMEOUT)
.await
.with_context(|| format!("retry systemctl --user start {service}"))?;
if retry.success() {
return Ok(());
}
return Err(anyhow!(
"systemctl --user start {service} exited {status}; retry exited {retry}"
));
}
return Err(anyhow!("systemctl --user start {service} exited {status}"));
}
Ok(())
@ -545,31 +603,111 @@ pub async fn enable_now(service: &str) -> Result<()> {
/// Restart a generated Quadlet service after rewriting a known-bad unit.
pub async fn restart_service(service: &str) -> Result<()> {
let status = Command::new("systemctl")
.args(["--user", "restart", service])
.status()
.await
.with_context(|| format!("spawn systemctl --user restart {service}"))?;
if !status.success() {
// `systemctl restart` hides the stop phase. On rootless Podman nodes a
// generated unit can sit in deactivating while `podman rm -f` hangs, which
// makes RPC/UI state look frozen. Split restart into bounded stop + start
// so stop timeouts can be recovered with an app-scoped kill/reset.
if let Err(err) = stop_service(service).await {
tracing::warn!(
service = %service,
error = %err,
"quadlet stop failed during restart; waiting for unit to settle before start"
);
}
if !wait_not_deactivating(service, Duration::from_secs(120)).await {
return Err(anyhow!(
"systemctl --user restart {service} exited {status}"
"systemctl --user restart {service} could not leave deactivating state"
));
}
Ok(())
enable_now(service).await
}
/// Stop a generated Quadlet service without removing its unit file.
pub async fn stop_service(service: &str) -> Result<()> {
let status = Command::new("systemctl")
.args(["--user", "stop", service])
.status()
.await
.with_context(|| format!("spawn systemctl --user stop {service}"))?;
if !status.success() {
return Err(anyhow!("systemctl --user stop {service} exited {status}"));
match systemctl_user_status(&["stop", service], QUADLET_STOP_TIMEOUT).await {
Ok(status) if status.success() => Ok(()),
Ok(status) => Err(anyhow!("systemctl --user stop {service} exited {status}")),
Err(err) => {
tracing::warn!(
service = %service,
error = %err,
"quadlet stop timed out/failed; killing app-scoped unit"
);
kill_and_reset_service(service).await?;
if !wait_not_deactivating(service, Duration::from_secs(60)).await {
return Err(anyhow!(
"systemctl --user stop {service} remained deactivating after app-scoped kill"
));
}
Ok(())
}
}
}
async fn systemctl_user_status(
args: &[&str],
timeout: Duration,
) -> Result<std::process::ExitStatus> {
let mut cmd = Command::new("systemctl");
cmd.arg("--user").args(args);
cmd.kill_on_drop(true);
tokio::time::timeout(timeout, cmd.status())
.await
.with_context(|| {
format!(
"systemctl --user {} timed out after {}s",
args.join(" "),
timeout.as_secs()
)
})?
.with_context(|| format!("spawn systemctl --user {}", args.join(" ")))
}
async fn kill_and_reset_service(service: &str) -> Result<()> {
let _ = systemctl_user_status(
&["kill", "--kill-whom=all", "-s", "SIGKILL", service],
Duration::from_secs(15),
)
.await;
tokio::time::sleep(Duration::from_secs(2)).await;
let _ = systemctl_user_status(&["reset-failed", service], Duration::from_secs(15)).await;
Ok(())
}
async fn wait_not_deactivating(service: &str, timeout: Duration) -> bool {
let deadline = tokio::time::Instant::now() + timeout;
loop {
let Ok(status) =
systemctl_user_output(&["is-active", service], Duration::from_secs(5)).await
else {
return true;
};
let state = String::from_utf8_lossy(&status.stdout).trim().to_string();
if state != "deactivating" && state != "activating" {
return true;
}
if tokio::time::Instant::now() >= deadline {
return false;
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
async fn systemctl_user_output(args: &[&str], timeout: Duration) -> Result<std::process::Output> {
let mut cmd = Command::new("systemctl");
cmd.arg("--user").args(args);
cmd.kill_on_drop(true);
tokio::time::timeout(timeout, cmd.output())
.await
.with_context(|| {
format!(
"systemctl --user {} timed out after {}s",
args.join(" "),
timeout.as_secs()
)
})?
.with_context(|| format!("spawn systemctl --user {}", args.join(" ")))
}
pub fn contains_stale_health_gate(unit_body: &str) -> bool {
unit_body.contains("Notify=healthy")
@ -579,6 +717,12 @@ pub fn contains_stale_health_gate(unit_body: &str) -> bool {
pub fn health_cmd_changed(old_body: &str, new_body: &str) -> bool {
directive_values(old_body, "HealthCmd=") != directive_values(new_body, "HealthCmd=")
|| directive_values(old_body, "HealthInterval=")
!= directive_values(new_body, "HealthInterval=")
|| directive_values(old_body, "HealthTimeout=")
!= directive_values(new_body, "HealthTimeout=")
|| directive_values(old_body, "HealthRetries=")
!= directive_values(new_body, "HealthRetries=")
}
pub fn publish_ports_changed(old_body: &str, new_body: &str) -> bool {
@ -588,9 +732,11 @@ pub fn publish_ports_changed(old_body: &str, new_body: &str) -> bool {
}
pub fn network_aliases_changed(old_body: &str, new_body: &str) -> bool {
let old_network = directive_values(old_body, "Network=");
let new_network = directive_values(new_body, "Network=");
let old_aliases = directive_values(old_body, "NetworkAlias=");
let new_aliases = directive_values(new_body, "NetworkAlias=");
old_aliases != new_aliases
old_network != new_network || old_aliases != new_aliases
}
pub fn exec_changed(old_body: &str, new_body: &str) -> bool {
@ -620,9 +766,11 @@ pub async fn disable_remove(unit_name: &str, dir: &Path) -> Result<()> {
.await;
let path = dir.join(format!("{unit_name}.container"));
if fs::try_exists(&path).await.unwrap_or(false) {
fs::remove_file(&path)
.await
.with_context(|| format!("remove {}", path.display()))?;
match fs::remove_file(&path).await {
Ok(()) => {}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
Err(err) => return Err(err).with_context(|| format!("remove {}", path.display())),
}
}
daemon_reload_user().await.ok();
// Defensive: kill the actual container too, in case quadlet left it.
@ -957,6 +1105,48 @@ app:
assert!(!s.contains("Network=host"));
}
#[test]
fn from_manifest_slirp4netns_omits_network_alias() {
let yaml = r#"
app:
id: vaultwarden
name: Vaultwarden
version: 1.0.0
container:
image: registry/vaultwarden:1
network: slirp4netns
security:
network_policy: isolated
"#;
let m = AppManifest::parse(yaml).expect("manifest must parse");
let s = QuadletUnit::from_manifest(&m, "vaultwarden").render();
assert!(s.contains("Network=slirp4netns"));
assert!(!s.contains("NetworkAlias="));
assert!(!s.contains("--network-alias"));
}
#[test]
fn from_manifest_pasta_omits_network_alias() {
let yaml = r#"
app:
id: nextcloud
name: Nextcloud
version: 1.0.0
container:
image: registry/nextcloud:1
network: pasta
security:
network_policy: isolated
"#;
let m = AppManifest::parse(yaml).expect("manifest must parse");
let s = QuadletUnit::from_manifest(&m, "nextcloud").render();
assert!(s.contains("Network=pasta"));
assert!(!s.contains("NetworkAlias="));
assert!(!s.contains("--network-alias"));
}
#[test]
fn from_manifest_preserves_grafana_data_uid_and_volume_shape() {
let yaml = r#"
@ -1056,18 +1246,20 @@ app:
assert!(s.contains("HealthRetries=3"));
assert!(!s.contains("Notify=healthy"));
assert!(!s.contains("TimeoutStartSec=600"));
assert!(s.contains("TimeoutStartSec=0"));
}
#[test]
fn render_skips_health_directives_when_absent() {
// No health spec → no Notify=healthy, no HealthCmd, no TimeoutStartSec
// override. Companions rely on this so their rendered bytes stay
// unchanged.
// No health spec → no Notify=healthy and no HealthCmd. TimeoutStartSec=0
// is a service-level baseline so dependency-waiting apps are not killed
// by systemd before their app daemon binds.
let s = sample_unit().render();
assert!(!s.contains("HealthCmd="));
assert!(!s.contains("Notify=healthy"));
assert!(!s.contains("HealthRetries="));
assert!(!s.contains("TimeoutStartSec="));
assert!(s.contains("TimeoutStartSec=0"));
assert!(!s.contains("TimeoutStartSec=600"));
}
#[test]
@ -1094,7 +1286,7 @@ app:
let h = translate_health_check(&http).expect("http must translate");
assert_eq!(
h.cmd,
"if command -v wget >/dev/null 2>&1; then wget -q -T 5 -O /dev/null http://localhost:8080/health; elif command -v curl >/dev/null 2>&1; then curl -fsS -m 5 http://localhost:8080/health; else exit 0; fi"
"if command -v wget >/dev/null 2>&1; then wget -q -T 3 -O /dev/null http://localhost:8080/health; elif command -v curl >/dev/null 2>&1; then curl -fsS -m 3 http://localhost:8080/health; else exit 0; fi"
);
let cmdck = HealthCheck {
@ -1163,6 +1355,25 @@ app:
assert!(h.cmd.contains("https://example.local/health"));
}
#[test]
fn translate_health_check_http_uses_manifest_timeout_for_helpers() {
use archipelago_container::HealthCheck;
let http = HealthCheck {
check_type: "http".into(),
endpoint: Some("localhost:3000".into()),
path: Some("/api/health".into()),
interval: "30s".into(),
timeout: "30s".into(),
retries: 5,
};
let h = translate_health_check(&http).expect("http must translate");
assert!(h.cmd.contains("wget -q -T 30 "), "got: {}", h.cmd);
assert!(h.cmd.contains("curl -fsS -m 30 "), "got: {}", h.cmd);
assert_eq!(h.timeout, "30s");
assert_eq!(h.retries, 5);
}
#[test]
fn from_manifest_picks_up_health_check() {
let yaml = r#"
@ -1201,6 +1412,14 @@ app:
assert!(!network_aliases_changed(new, new));
}
#[test]
fn network_aliases_changed_detects_network_mode_drift() {
let old = "[Container]\nNetwork=slirp4netns\n";
let new = "[Container]\n";
assert!(network_aliases_changed(old, new));
assert!(!network_aliases_changed(new, new));
}
#[test]
fn shell_join_escapes_dollars_for_container_runtime_expansion() {
let rendered = shell_join(&["sh".into(), "-lc".into(), "echo ${BITCOIN_RPC_PASS}".into()]);
@ -1223,6 +1442,14 @@ app:
assert!(!health_cmd_changed(new, new));
}
#[test]
fn health_cmd_changed_detects_probe_timing_drift() {
let old = "[Container]\nHealthCmd=curl -fsS http://localhost:8080/\nHealthTimeout=5s\nHealthRetries=3\n";
let new = "[Container]\nHealthCmd=curl -fsS http://localhost:8080/\nHealthTimeout=30s\nHealthRetries=5\n";
assert!(health_cmd_changed(old, new));
assert!(!health_cmd_changed(new, new));
}
#[test]
fn from_manifest_renders_to_a_systemd_unit() {
// End-to-end: parse a real-shape manifest, build the unit, render

View File

@ -334,6 +334,103 @@ fn is_process_running(pid: u32) -> bool {
/// The crash recovery (PID-based) handles dirty shutdowns; this handles clean ones.
/// Skips containers that the user intentionally stopped via the UI.
pub async fn start_stopped_containers(data_dir: &Path) -> RecoveryReport {
start_stopped_containers_for(data_dir, false).await
}
/// Start stopped multi-container stack members after the backend is already
/// ready. These can take minutes after a reboot, so they must not block
/// systemd readiness.
pub async fn start_stopped_stack_containers(data_dir: &Path) -> RecoveryReport {
start_stopped_app_stacks(data_dir).await
}
async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport {
let user_stopped = load_user_stopped(data_dir).await;
let mut report = RecoveryReport {
total: 0,
recovered: 0,
failed: Vec::new(),
};
for stack in stack_recovery_specs() {
if !stack_has_any_container(stack).await {
continue;
}
info!(
"Recovering stopped {} stack containers after boot",
stack.name
);
repair_stack_network_aliases(stack).await;
for container in stack.containers {
if user_stopped.contains(*container) {
info!("Skipping user-stopped container: {}", container);
continue;
}
match container_state(container).await {
Some(state) if state == "running" => continue,
Some(_) => {}
None => continue,
}
repair_stack_network_aliases(stack).await;
wait_before_stack_container_recovery(stack, container).await;
report.total += 1;
if start_existing_container(container).await {
report.recovered += 1;
} else {
report.failed.push((*container).to_string());
}
}
}
report
}
async fn wait_before_stack_container_recovery(stack: &StackRecoverySpec, container: &str) {
if stack.name != "indeedhub" || container != "indeedhub" {
return;
}
for _ in 0..60 {
if indeedhub_recovery_dependencies_running().await {
repair_stack_network_aliases(stack).await;
break;
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
for _ in 0..60 {
let ready = podman_output(
&["exec", "indeedhub-api", "getent", "hosts", "minio"],
Duration::from_secs(5),
)
.await
.map(|output| output.status.success())
.unwrap_or(false);
if ready {
return;
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
}
async fn indeedhub_recovery_dependencies_running() -> bool {
for name in ["indeedhub-redis", "indeedhub-minio", "indeedhub-api"] {
if container_state(name).await.as_deref() != Some("running") {
return false;
}
}
true
}
async fn start_stopped_containers_for(
data_dir: &Path,
include_stack_members: bool,
) -> RecoveryReport {
let mut cmd = tokio::process::Command::new("podman");
cmd.args([
"ps",
@ -400,7 +497,7 @@ pub async fn start_stopped_containers(data_dir: &Path) -> RecoveryReport {
let names: Vec<String> = names
.into_iter()
.filter(|n| should_auto_start_stopped_container(n))
.filter(|n| should_auto_start_stopped_container(n, include_stack_members))
.collect();
if names.is_empty() {
@ -429,11 +526,276 @@ pub async fn start_stopped_containers(data_dir: &Path) -> RecoveryReport {
recover_containers(&records).await
}
fn should_auto_start_stopped_container(name: &str) -> bool {
fn should_auto_start_stopped_container(name: &str, include_stack_members: bool) -> bool {
// Keep generic boot recovery narrow. The Rust manifest reconciler owns
// managed app stacks; starting every exited Podman container here races
// it and resurrects legacy/orphan helper containers.
matches!(name, "filebrowser" | "nostr-rs-relay")
if matches!(name, "filebrowser" | "nostr-rs-relay") {
return true;
}
include_stack_members
&& matches!(
name,
"immich_postgres"
| "immich_redis"
| "immich_server"
| "indeedhub-postgres"
| "indeedhub-redis"
| "indeedhub-minio"
| "indeedhub-relay"
| "indeedhub-api"
| "indeedhub-ffmpeg"
| "indeedhub"
| "netbird-server"
| "netbird-dashboard"
| "netbird"
| "saleor-db"
| "saleor-cache"
| "saleor-jaeger"
| "saleor-mailpit"
| "saleor-api"
| "saleor-worker"
| "saleor"
| "saleor-storefront"
| "saleor-storefront-app"
)
}
struct StackRecoverySpec {
name: &'static str,
network: &'static str,
aliases: &'static [(&'static str, &'static str)],
containers: &'static [&'static str],
}
fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
&[
StackRecoverySpec {
name: "immich",
network: "immich-net",
aliases: &[
("immich_postgres", "immich_postgres"),
("immich_redis", "immich_redis"),
("immich_server", "immich_server"),
],
containers: &["immich_postgres", "immich_redis", "immich_server"],
},
StackRecoverySpec {
name: "indeedhub",
network: "indeedhub-net",
aliases: &[
("indeedhub-postgres", "postgres"),
("indeedhub-redis", "redis"),
("indeedhub-minio", "minio"),
("indeedhub-relay", "relay"),
("indeedhub-api", "api"),
("indeedhub", "indeedhub"),
],
containers: &[
"indeedhub-postgres",
"indeedhub-redis",
"indeedhub-minio",
"indeedhub-relay",
"indeedhub-api",
"indeedhub-ffmpeg",
"indeedhub",
],
},
StackRecoverySpec {
name: "netbird",
network: "netbird-net",
aliases: &[
("netbird-server", "netbird-server"),
("netbird-dashboard", "netbird-dashboard"),
("netbird", "netbird"),
],
containers: &["netbird-server", "netbird-dashboard", "netbird"],
},
StackRecoverySpec {
name: "saleor",
network: "saleor-net",
aliases: &[
("saleor-db", "db"),
("saleor-cache", "cache"),
("saleor-jaeger", "jaeger"),
("saleor-mailpit", "mailpit"),
("saleor-api", "api"),
("saleor-worker", "worker"),
("saleor", "saleor"),
("saleor-storefront", "storefront"),
("saleor-storefront-app", "storefront-app"),
],
containers: &[
"saleor-db",
"saleor-cache",
"saleor-jaeger",
"saleor-mailpit",
"saleor-api",
"saleor-worker",
"saleor",
"saleor-storefront",
"saleor-storefront-app",
],
},
]
}
async fn stack_has_any_container(stack: &StackRecoverySpec) -> bool {
for container in stack.containers {
if container_state(container).await.is_some() {
return true;
}
}
false
}
async fn repair_stack_network_aliases(stack: &StackRecoverySpec) {
let _ = podman_status(
&["network", "create", stack.network],
Duration::from_secs(15),
)
.await;
for (container, alias) in stack.aliases {
if container_state(container).await.is_none() {
continue;
}
if network_alias_present(stack.network, container, alias).await {
continue;
}
let _ = podman_status(
&["network", "disconnect", "-f", stack.network, container],
Duration::from_secs(15),
)
.await;
let _ = podman_status(
&[
"network",
"connect",
"--alias",
alias,
stack.network,
container,
],
Duration::from_secs(15),
)
.await;
}
}
async fn network_alias_present(network_name: &str, container: &str, alias: &str) -> bool {
let output = match podman_output(
&[
"inspect",
container,
"--format",
"{{json .NetworkSettings.Networks}}",
],
Duration::from_secs(10),
)
.await
{
Ok(output) if output.status.success() => output,
_ => return false,
};
let Ok(networks) = serde_json::from_slice::<serde_json::Value>(&output.stdout) else {
return false;
};
networks
.get(network_name)
.and_then(|network| network.get("Aliases"))
.and_then(|aliases| aliases.as_array())
.map(|aliases| aliases.iter().any(|value| value.as_str() == Some(alias)))
.unwrap_or(false)
}
async fn container_state(container: &str) -> Option<String> {
let output = podman_output(
&["inspect", container, "--format", "{{.State.Status}}"],
Duration::from_secs(10),
)
.await
.ok()?;
output
.status
.success()
.then(|| String::from_utf8_lossy(&output.stdout).trim().to_string())
}
async fn start_existing_container(container: &str) -> bool {
info!("Recovering stack container: {}", container);
let timeout = match container {
"immich_server" | "netbird-server" => Duration::from_secs(120),
_ => Duration::from_secs(90),
};
if container_state(container).await.as_deref() == Some("initialized") {
cleanup_container_runtime_state(container).await;
}
match podman_output(&["start", container], timeout).await {
Ok(output) if output.status.success() => {
tokio::time::sleep(Duration::from_secs(3)).await;
if container_state(container).await.as_deref() == Some("exited") {
warn!("Stack container {} exited shortly after start", container);
false
} else {
info!("Successfully recovered stack container: {}", container);
true
}
}
Ok(output) => {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
if stderr.contains("exec.fifo") || stderr.contains("failed to start container") {
cleanup_container_runtime_state(container).await;
if let Ok(retry) = podman_output(&["start", container], timeout).await {
if retry.status.success() {
info!(
"Successfully recovered stack container after cleanup: {}",
container
);
return true;
}
warn!(
"Failed to recover stack container {} after cleanup: {}",
container,
String::from_utf8_lossy(&retry.stderr).trim()
);
return false;
}
}
warn!(
"Failed to recover stack container {}: {}",
container, stderr
);
false
}
Err(e) => {
warn!("Failed to recover stack container {}: {}", container, e);
false
}
}
}
async fn cleanup_container_runtime_state(container: &str) {
let _ = podman_output(
&["container", "cleanup", container],
Duration::from_secs(30),
)
.await;
}
async fn podman_status(args: &[&str], timeout: Duration) -> Option<std::process::ExitStatus> {
podman_output(args, timeout)
.await
.ok()
.map(|output| output.status)
}
async fn podman_output(args: &[&str], timeout: Duration) -> Result<Output> {
let mut cmd = tokio::process::Command::new("podman");
cmd.args(args);
command_with_timeout(cmd, timeout, &format!("podman {}", args.join(" "))).await
}
/// Simple tier ordering for boot recovery (mirrors health_monitor tiers).
@ -620,10 +982,17 @@ mod tests {
#[test]
fn generic_boot_recovery_skips_manifest_owned_and_legacy_stacks() {
assert!(should_auto_start_stopped_container("filebrowser"));
assert!(should_auto_start_stopped_container("nostr-rs-relay"));
assert!(!should_auto_start_stopped_container("bitcoin-knots"));
assert!(!should_auto_start_stopped_container("lnd"));
assert!(!should_auto_start_stopped_container("indeedhub-postgres"));
assert!(should_auto_start_stopped_container("filebrowser", false));
assert!(should_auto_start_stopped_container("nostr-rs-relay", false));
assert!(!should_auto_start_stopped_container("bitcoin-knots", false));
assert!(!should_auto_start_stopped_container("lnd", false));
assert!(!should_auto_start_stopped_container(
"indeedhub-postgres",
false
));
assert!(should_auto_start_stopped_container(
"indeedhub-postgres",
true
));
}
}

View File

@ -17,7 +17,8 @@ const ELECTRUMX_DATA_DIR: &str = "/var/lib/archipelago/electrumx";
const ESTIMATED_FULL_INDEX_BYTES: f64 = 130_000_000_000.0;
/// Refresh interval for status cache
const CACHE_REFRESH_SECS: u64 = 15;
const CACHE_REFRESH_SECS: u64 = 30;
const CACHE_ERROR_BACKOFF_SECS: u64 = 60;
/// Build Bitcoin RPC Basic auth header using shared credentials.
async fn bitcoin_rpc_auth() -> String {
@ -70,6 +71,11 @@ pub fn spawn_status_cache() {
tokio::spawn(async {
loop {
let mut fresh = fetch_electrs_sync_status().await;
let sleep_secs = if fresh.status == "waiting" && fresh.bitcoin_height == 0 {
CACHE_ERROR_BACKOFF_SECS
} else {
CACHE_REFRESH_SECS
};
let mut cached = cache().write().await;
if fresh.indexed_height == 0
&& cached.indexed_height > 0
@ -92,7 +98,7 @@ pub fn spawn_status_cache() {
}
*cached = fresh;
drop(cached);
tokio::time::sleep(Duration::from_secs(CACHE_REFRESH_SECS)).await;
tokio::time::sleep(Duration::from_secs(sleep_secs)).await;
}
});
}
@ -146,6 +152,8 @@ fn is_transient_error(err_msg: &str) -> bool {
|| lower.contains("broken pipe")
|| lower.contains("eof")
|| lower.contains("connection")
|| lower.contains("503 service unavailable")
|| lower.contains("work queue depth exceeded")
}
/// Fetch ElectrumX indexed height via Electrum protocol (TCP JSON-RPC).

View File

@ -217,6 +217,7 @@ struct ContainerHealth {
app_id: String,
state: String,
podman_health: Option<String>,
host_port_ready: Option<bool>,
healthy: bool,
}
@ -427,20 +428,22 @@ async fn check_containers() -> Vec<ContainerHealth> {
// nbxplorer, mempool-api) and UI containers need auto-restart too.
// Only skip ephemeral containers (build infrastructure, init one-shots).
containers
.iter()
.filter_map(|c| {
let mut out = Vec::new();
for c in &containers {
let name = c.get("Names").and_then(|v| {
if let Some(arr) = v.as_array() {
arr.first().and_then(|n| n.as_str()).map(|s| s.to_string())
} else {
v.as_str().map(|s| s.to_string())
}
})?;
});
let Some(name) = name else {
continue;
};
// Skip podman-compose infrastructure and one-shot init containers
if name.starts_with("indeedhub-build_") || name.contains("-init") {
return None;
continue;
}
let app_id = name.strip_prefix("archy-").unwrap_or(&name).to_string();
@ -452,17 +455,65 @@ async fn check_containers() -> Vec<ContainerHealth> {
.to_lowercase();
let podman_health = parse_podman_health(c, &state);
let healthy = state == "running" && podman_health.as_deref() != Some("unhealthy");
let host_ports = host_tcp_ports_from_container(c);
let host_port_ready = if host_ports.is_empty() {
None
} else {
Some(host_ports_ready(&host_ports).await)
};
let healthy = state == "running"
&& podman_health.as_deref() != Some("unhealthy")
&& host_port_ready != Some(false);
Some(ContainerHealth {
out.push(ContainerHealth {
name,
app_id,
state,
podman_health,
host_port_ready,
healthy,
});
}
out
}
fn host_tcp_ports_from_container(c: &serde_json::Value) -> Vec<u16> {
let Some(ports) = c.get("Ports").and_then(|v| v.as_array()) else {
return Vec::new();
};
let mut out: Vec<u16> = ports
.iter()
.filter(|p| {
p.get("protocol")
.and_then(|v| v.as_str())
.unwrap_or("tcp")
.eq_ignore_ascii_case("tcp")
})
.filter_map(|p| {
p.get("host_port")
.and_then(|v| v.as_u64())
.and_then(|port| u16::try_from(port).ok())
})
.collect()
.collect();
out.sort_unstable();
out.dedup();
out
}
async fn host_ports_ready(ports: &[u16]) -> bool {
for port in ports {
let ready = tokio::time::timeout(
std::time::Duration::from_secs(2),
tokio::net::TcpStream::connect(("127.0.0.1", *port)),
)
.await
.is_ok_and(|r| r.is_ok());
if !ready {
return false;
}
}
true
}
fn live_container_ids(containers: &[serde_json::Value]) -> HashSet<String> {
@ -640,33 +691,41 @@ fn parse_health_from_status(status: &str) -> Option<String> {
(start < end).then(|| status[start + 1..end].to_string())
}
/// Try to restart a container.
async fn restart_container(name: &str) -> bool {
info!("Auto-restarting unhealthy container: {}", name);
/// Try to recover a container. Running containers need a real restart so
/// rootless network helpers such as pasta are recreated; `podman start` is a
/// no-op for a running container with a missing host listener.
async fn restart_container(name: &str, state: &str) -> bool {
let action = if state == "running" {
"restart"
} else {
"start"
};
info!("Auto-{}ing unhealthy container: {}", action, name);
let result = tokio::time::timeout(
std::time::Duration::from_secs(120),
tokio::process::Command::new("podman")
.args(["start", name])
tokio::process::Command::new("systemd-run")
.args(["--user", "--scope", "--quiet", "--collect", "podman"])
.args([action, name])
.output(),
)
.await;
match result {
Ok(Ok(output)) if output.status.success() => {
info!("Successfully restarted container: {}", name);
info!("Successfully recovered container: {}", name);
true
}
Ok(Ok(output)) => {
let stderr = String::from_utf8_lossy(&output.stderr);
warn!("Failed to restart container {}: {}", name, stderr.trim());
warn!("Failed to {} container {}: {}", action, name, stderr.trim());
false
}
Ok(Err(e)) => {
warn!("Failed to execute podman start for {}: {}", name, e);
warn!("Failed to execute podman {} for {}: {}", action, name, e);
false
}
Err(_) => {
warn!("Timeout starting container {} (120s)", name);
warn!("Timeout {}ing container {} (120s)", action, name);
false
}
}
@ -684,9 +743,10 @@ pub fn spawn_health_monitor(state: Arc<StateManager>, data_dir: PathBuf) {
if crate::crash_recovery::is_recovery_complete() {
break;
}
// Safety timeout: start anyway after 5 minutes even if recovery hangs
if wait_start.elapsed().as_secs() > 300 {
warn!("Health monitor: boot recovery did not complete within 5 minutes, starting anyway");
// Safety timeout: start anyway after 30 minutes even if recovery hangs.
// Stack recovery can take many minutes on low-resource nodes after reboot.
if wait_start.elapsed().as_secs() > 1800 {
warn!("Health monitor: boot recovery did not complete within 30 minutes, starting anyway");
break;
}
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
@ -827,6 +887,7 @@ pub fn spawn_health_monitor(state: Arc<StateManager>, data_dir: PathBuf) {
}
// Handle exited, stopped, created, and Podman-unhealthy running containers.
if container.podman_health.as_deref() == Some("unhealthy")
|| container.host_port_ready == Some(false)
|| container.state == "exited"
|| container.state == "stopped"
|| container.state == "created"
@ -932,7 +993,7 @@ pub fn spawn_health_monitor(state: Arc<StateManager>, data_dir: PathBuf) {
.unwrap_or(&90)
);
let restarted = restart_container(&container.name).await;
let restarted = restart_container(&container.name, &container.state).await;
if !restarted || attempt >= MAX_RESTART_ATTEMPTS {
let notification = Notification {
@ -1088,6 +1149,7 @@ mod tests {
app_id: "bitcoin-knots".to_string(),
state: "running".to_string(),
podman_health: Some("healthy".to_string()),
host_port_ready: None,
healthy: true,
};
assert!(health.healthy);
@ -1103,6 +1165,7 @@ mod tests {
app_id: "mempool-web".to_string(),
state: "exited".to_string(),
podman_health: None,
host_port_ready: None,
healthy: false,
};
assert!(!health.healthy);
@ -1193,6 +1256,7 @@ mod tests {
app_id: "indeedhub-postgres".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
},
ContainerHealth {
@ -1200,6 +1264,7 @@ mod tests {
app_id: "indeedhub-redis".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
},
ContainerHealth {
@ -1207,6 +1272,7 @@ mod tests {
app_id: "indeedhub-api".into(),
state: "exited".into(),
podman_health: None,
host_port_ready: None,
healthy: false,
},
];
@ -1217,6 +1283,7 @@ mod tests {
app_id: "indeedhub-redis".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
}];
assert!(!deps_are_running("indeedhub-api", &partial));
@ -1229,6 +1296,7 @@ mod tests {
app_id: "bitcoin-core".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
}];
assert!(deps_are_running("lnd", &core));
@ -1238,6 +1306,7 @@ mod tests {
app_id: "bitcoin-knots".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
}];
assert!(deps_are_running("fedimint", &knots));
@ -1247,6 +1316,7 @@ mod tests {
app_id: "bitcoin-core".into(),
state: "stopped".into(),
podman_health: None,
host_port_ready: None,
healthy: false,
}];
assert!(!deps_are_running("electrumx", &stopped));
@ -1259,6 +1329,7 @@ mod tests {
app_id: "bitcoin-core".into(),
state: "running".into(),
podman_health: None,
host_port_ready: None,
healthy: true,
}];
@ -1274,6 +1345,7 @@ mod tests {
app_id: "bitcoin-core".into(),
state: "stopped".into(),
podman_health: None,
host_port_ready: None,
healthy: false,
}];

View File

@ -20,6 +20,7 @@
use anyhow::{Context, Result};
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::Duration;
use tokio::signal;
use tokio::sync::Notify;
use tracing::info;
@ -168,8 +169,6 @@ async fn main() -> Result<()> {
boot_report.recovered, boot_report.total, boot_report.failed
);
}
crash_recovery::mark_recovery_complete();
// Construct the container orchestrator once. In prod mode we load the
// on-disk app manifests, do an initial adoption pass, and spawn the
// BootReconciler loop (Step 5/6 of the rust-orchestrator migration).
@ -195,17 +194,20 @@ async fn main() -> Result<()> {
}
// Adoption pass: link existing podman containers back to their
// manifests so the reconciler doesn't recreate them.
match prod.adopt_existing().await {
Ok(report) => {
match tokio::time::timeout(Duration::from_secs(35), prod.adopt_existing()).await {
Ok(Ok(report)) => {
info!(
"🔗 Adopted {} existing container(s): {:?}",
report.adopted.len(),
report.adopted
);
}
Err(e) => {
Ok(Err(e)) => {
tracing::warn!(error = %e, "prod orchestrator: adopt_existing failed (non-fatal)");
}
Err(_) => {
tracing::warn!("prod orchestrator: adopt_existing timed out after 35s (non-fatal)")
}
}
// Spawn the boot reconciler loop. Runs an initial reconcile
// immediately, then re-checks every RECONCILER_DEFAULT_INTERVAL
@ -272,6 +274,23 @@ async fn main() -> Result<()> {
// Spawn periodic container snapshot (for crash recovery)
crash_recovery::spawn_snapshot_task(config.data_dir.clone());
// Recover stopped multi-container stack members after the backend is up.
// This can take minutes on busy nodes after a reboot, so keep it out of
// the synchronous systemd startup path.
{
let data_dir = config.data_dir.clone();
tokio::spawn(async move {
let report = crash_recovery::start_stopped_stack_containers(&data_dir).await;
if report.total > 0 {
info!(
"🔄 Stack boot recovery: {}/{} containers started (failed: {:?})",
report.recovered, report.total, report.failed
);
}
crash_recovery::mark_recovery_complete();
});
}
// Spawn disk space monitor (warns at 85%, auto-cleans at 90%)
disk_monitor::spawn_disk_monitor(config.data_dir.clone());

View File

@ -1,15 +1,20 @@
use crate::monitoring::types::{AlertRuleKind, FiredAlert};
use crate::webhooks::{self, WebhookEvent, WebhookPayload};
use chrono::Utc;
use std::collections::HashSet;
use std::path::Path;
use std::sync::Arc;
use tracing::info;
const NOTIFICATION_MAX_AGE_SECS: i64 = 30 * 60;
/// Push fired alerts as notifications to the state manager (broadcast via WebSocket).
pub(crate) async fn push_alert_notifications(
state_mgr: &Arc<crate::state::StateManager>,
alerts: &[FiredAlert],
) {
let (mut data, _rev) = state_mgr.get_snapshot().await;
prune_stale_alert_notifications(&mut data.notifications, alerts);
for alert in alerts {
let level = match alert.kind {
AlertRuleKind::DiskUsage | AlertRuleKind::RamUsage => {
@ -27,7 +32,7 @@ pub(crate) async fn push_alert_notifications(
level,
title: format!("{:?} Alert", alert.kind),
message: alert.message.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
timestamp: Utc::now().to_rfc3339(),
app_id: None,
};
data.notifications.push(notification);
@ -40,6 +45,30 @@ pub(crate) async fn push_alert_notifications(
info!("Fired {} alert(s)", alerts.len());
}
fn prune_stale_alert_notifications(
notifications: &mut Vec<crate::data_model::Notification>,
alerts: &[FiredAlert],
) {
let now = Utc::now();
let active_ids: HashSet<&str> = alerts.iter().map(|alert| alert.id.as_str()).collect();
notifications.retain(|notification| {
if active_ids.contains(notification.id.as_str()) {
return false;
}
if notification.app_id.is_some() || notification.id.starts_with("health-") {
return true;
}
match chrono::DateTime::parse_from_rfc3339(&notification.timestamp) {
Ok(ts) => {
now.signed_duration_since(ts.with_timezone(&Utc))
.num_seconds()
<= NOTIFICATION_MAX_AGE_SECS
}
Err(_) => false,
}
});
}
/// Deliver webhook notifications for alerts that map to webhook events.
pub(crate) async fn deliver_alert_webhooks(data_dir: &Path, alerts: &[FiredAlert]) {
for alert in alerts {
@ -53,7 +82,7 @@ pub(crate) async fn deliver_alert_webhooks(data_dir: &Path, alerts: &[FiredAlert
event,
title: format!("{:?} Alert", alert.kind),
message: alert.message.clone(),
timestamp: chrono::Utc::now().to_rfc3339(),
timestamp: Utc::now().to_rfc3339(),
node_id: String::new(),
details: Some(serde_json::json!({
"value": alert.value,
@ -64,3 +93,46 @@ pub(crate) async fn deliver_alert_webhooks(data_dir: &Path, alerts: &[FiredAlert
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::data_model::{Notification, NotificationLevel};
fn notification(id: &str, timestamp: String, app_id: Option<&str>) -> Notification {
Notification {
id: id.to_string(),
level: NotificationLevel::Warning,
title: "DiskUsage Alert".to_string(),
message: "Disk warning".to_string(),
timestamp,
app_id: app_id.map(str::to_string),
}
}
#[test]
fn prune_stale_alert_notifications_removes_duplicate_and_old_generic_alerts() {
let active_alert = FiredAlert {
id: "alert-active".to_string(),
kind: AlertRuleKind::DiskUsage,
message: "Disk warning".to_string(),
value: 90.0,
threshold: 85.0,
timestamp: Utc::now().timestamp(),
acknowledged: false,
};
let old_timestamp = (Utc::now() - chrono::Duration::minutes(45)).to_rfc3339();
let fresh_timestamp = (Utc::now() - chrono::Duration::minutes(5)).to_rfc3339();
let mut notifications = vec![
notification("alert-active", fresh_timestamp.clone(), None),
notification("alert-old", old_timestamp, None),
notification("alert-fresh", fresh_timestamp.clone(), None),
notification("health-indeedhub-1", fresh_timestamp, Some("indeedhub")),
];
prune_stale_alert_notifications(&mut notifications, &[active_alert]);
let ids: Vec<&str> = notifications.iter().map(|n| n.id.as_str()).collect();
assert_eq!(ids, vec!["alert-fresh", "health-indeedhub-1"]);
}
}

View File

@ -71,8 +71,8 @@ async fn build_telemetry_report(
data_dir: &std::path::Path,
) -> anyhow::Result<serde_json::Value> {
// Anonymous node ID — truncated SHA-256 hash of pubkey
let (node_id, version, container_count, running_count, peer_count) = if let Some(ref sm) = state
{
let (node_id, version, container_count, running_count, peer_count, containers) =
if let Some(ref sm) = state {
let (data, _) = sm.get_snapshot().await;
let id = {
use sha2::{Digest, Sha256};
@ -80,6 +80,17 @@ async fn build_telemetry_report(
h.update(data.server_info.pubkey.as_bytes());
hex::encode(h.finalize())[..16].to_string()
};
let containers: Vec<serde_json::Value> = data
.package_data
.iter()
.map(|(id, pkg)| {
serde_json::json!({
"id": id,
"state": format!("{:?}", pkg.state),
"version": pkg.manifest.version,
})
})
.collect();
let running = data
.package_data
.values()
@ -91,9 +102,17 @@ async fn build_telemetry_report(
data.package_data.len(),
running,
data.peer_health.len(),
containers,
)
} else {
("unknown".to_string(), "unknown".to_string(), 0, 0, 0)
(
"unknown".to_string(),
"unknown".to_string(),
0,
0,
0,
Vec::new(),
)
};
// System info
@ -153,6 +172,7 @@ async fn build_telemetry_report(
"cpu_pct": (cpu_pct * 10.0).round() / 10.0,
"mem_pct": (mem_pct * 10.0).round() / 10.0,
"disk_pct": (disk_pct * 10.0).round() / 10.0,
"containers": containers,
"container_count": container_count,
"running_count": running_count,
"federation_peers": peer_count,
@ -166,16 +186,28 @@ async fn post_telemetry_report(url: &str, report: &serde_json::Value) -> anyhow:
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(10))
.build()?;
let payload = serde_json::json!({
"method": "telemetry.ingest",
"params": report,
});
let response = client
.post(url)
.header("Content-Type", "application/json")
.header("User-Agent", "Archipelago-Telemetry/1.0")
.json(report)
.json(&payload)
.send()
.await?;
if !response.status().is_success() {
anyhow::bail!("Collector returned {}", response.status());
}
let status = response.status();
let body: serde_json::Value = response.json().await.unwrap_or_default();
if let Some(error) = body.get("error") {
anyhow::bail!("Collector RPC error: {}", error);
}
if body.get("result").is_none() {
anyhow::bail!("Collector returned {} without RPC result", status);
}
Ok(())
}

View File

@ -17,6 +17,7 @@ use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpListener;
use tracing::{debug, error, info, warn};
@ -331,6 +332,7 @@ impl Server {
// lifecycle op, and to break out if the spawned task dies
// without ever writing a final state.
let mut transitional_since: HashMap<String, Instant> = HashMap::new();
let mut scan_backoff_until: Option<Instant> = None;
if let Err(e) = scan_and_update_packages(
&scanner,
&state,
@ -342,6 +344,10 @@ impl Server {
.await
{
error!("Failed to scan containers: {}", e);
if is_podman_scan_timeout(&e) {
scan_backoff_until = Some(Instant::now() + Duration::from_secs(30));
warn!("Podman container scan timed out; backing off scans for 30s");
}
}
// Bump the scan-completion counter so any caller waiting on a
// kicked scan (install/update success path) can proceed.
@ -364,8 +370,16 @@ impl Server {
debug!("Scan kicked by install/update success — running immediately");
}
}
if let Some(until) = scan_backoff_until {
if Instant::now() < until {
debug!("Skipping container scan — Podman scan backoff active");
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
continue;
}
}
if scanning.load(std::sync::atomic::Ordering::Relaxed) {
debug!("Skipping container scan — previous scan still in progress");
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
continue;
}
scanning.store(true, std::sync::atomic::Ordering::Relaxed);
@ -380,6 +394,12 @@ impl Server {
.await
{
error!("Failed to update containers: {}", e);
if is_podman_scan_timeout(&e) {
scan_backoff_until = Some(Instant::now() + Duration::from_secs(30));
warn!("Podman container scan timed out; backing off scans for 30s");
}
} else {
scan_backoff_until = None;
}
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
scanning.store(false, std::sync::atomic::Ordering::Relaxed);
@ -847,10 +867,10 @@ const TRANSITIONAL_STUCK_TIMEOUT: Duration = Duration::from_secs(120);
const INSTALLING_STUCK_TIMEOUT: Duration = Duration::from_secs(20 * 60);
fn transitional_stuck_timeout(state: &crate::data_model::PackageState) -> Duration {
if *state == crate::data_model::PackageState::Installing {
INSTALLING_STUCK_TIMEOUT
} else {
TRANSITIONAL_STUCK_TIMEOUT
use crate::data_model::PackageState::*;
match state {
Installing | Starting | Restarting => INSTALLING_STUCK_TIMEOUT,
_ => TRANSITIONAL_STUCK_TIMEOUT,
}
}
@ -874,6 +894,18 @@ fn is_transitional(state: &crate::data_model::PackageState) -> bool {
)
}
fn absent_transitional_replacement(
state: &crate::data_model::PackageState,
) -> Option<crate::data_model::PackageState> {
match state {
// A stop operation is complete once the container record disappears.
// Do not leave the app card wedged in "Stopping..." just because the
// background task died or the backend restarted before it wrote back.
crate::data_model::PackageState::Stopping => Some(crate::data_model::PackageState::Stopped),
_ => None,
}
}
/// Merge a fresh scan entry `fresh` into `existing` while preserving
/// `existing.state` (which is transitional — the RPC spawn task owns it).
/// Non-state observability fields are taken from `fresh` so the UI still
@ -881,8 +913,17 @@ fn is_transitional(state: &crate::data_model::PackageState) -> bool {
fn merge_preserving_transitional(
existing: &crate::data_model::PackageDataEntry,
fresh: &crate::data_model::PackageDataEntry,
user_stop_requested: bool,
) -> crate::data_model::PackageDataEntry {
let state = match (&existing.state, &fresh.state) {
// A user-initiated stop must keep showing Stopping while podman still
// reports Running. Repair/restart transitions do not have a user-stop
// marker, so a fresh Running scan means the app recovered.
(crate::data_model::PackageState::Stopping, crate::data_model::PackageState::Running)
if !user_stop_requested =>
{
fresh.state.clone()
}
// Removing with a live running container is stale: uninstall either
// failed or Archipelago restarted before the spawned task could revert
// state. Let the scanner recover the UI immediately instead of
@ -909,6 +950,11 @@ fn merge_preserving_transitional(
}
}
fn is_podman_scan_timeout(error: &anyhow::Error) -> bool {
let msg = format!("{:#}", error);
msg.contains("podman ps") && msg.contains("timed out")
}
async fn scan_and_update_packages(
scanner: &DockerPackageScanner,
state: &StateManager,
@ -925,6 +971,7 @@ async fn scan_and_update_packages(
pkg.exit_code = None;
}
}
normalize_reachable_package_health(&mut packages).await;
let (current_data, _) = state.get_snapshot().await;
let tor_addr = docker_packages::read_tor_address("archipelago").await;
@ -992,7 +1039,11 @@ async fn scan_and_update_packages(
// observability fields (health, exit_code, lan_address
// via installed) from the fresh scan so the UI still
// sees live readings.
let merged_entry = merge_preserving_transitional(existing_entry, pkg);
let merged_entry = merge_preserving_transitional(
existing_entry,
pkg,
user_stopped.contains(id),
);
if existing.cloned() != Some(merged_entry.clone()) {
merged.insert(id.clone(), merged_entry);
changed = true;
@ -1029,6 +1080,19 @@ async fn scan_and_update_packages(
// owner (spawn_task) is responsible for clearing state, not us.
if let Some(entry) = merged.get(&id) {
if is_transitional(&entry.state) {
if let Some(replacement) = absent_transitional_replacement(&entry.state) {
let mut updated = entry.clone();
updated.state = replacement;
updated.health = None;
updated.exit_code = None;
updated.install_progress = None;
updated.uninstall_stage = None;
merged.insert(id.clone(), updated);
transitional_since.remove(&id);
absence_tracker.remove(&id);
changed = true;
continue;
}
let entered = *transitional_since.entry(id.clone()).or_insert(now);
let timeout = transitional_stuck_timeout(&entry.state);
if now.duration_since(entered) > timeout {
@ -1088,6 +1152,99 @@ async fn scan_and_update_packages(
Ok(())
}
async fn normalize_reachable_package_health(
packages: &mut HashMap<String, crate::data_model::PackageDataEntry>,
) {
for (id, pkg) in packages.iter_mut() {
if pkg.state != crate::data_model::PackageState::Running {
continue;
}
if !matches!(pkg.health.as_deref(), Some("starting" | "unhealthy" | "1")) {
continue;
}
let Some(port) = pkg
.installed
.as_ref()
.and_then(|i| i.interface_addresses.get("main"))
.and_then(|a| a.lan_address.as_deref())
.and_then(port_from_url)
.or_else(|| fallback_package_port(id))
else {
continue;
};
if frontend_port_http_ready(port).await {
debug!(app_id = %id, port, "normalizing reachable package health to healthy");
pkg.health = Some("healthy".to_string());
ensure_main_lan_address(pkg, port);
}
}
}
async fn frontend_port_http_ready(port: u16) -> bool {
let Ok(Ok(mut stream)) = tokio::time::timeout(
Duration::from_secs(2),
tokio::net::TcpStream::connect(("127.0.0.1", port)),
)
.await
else {
return false;
};
let request = b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n";
if stream.write_all(request).await.is_err() {
return false;
}
let mut buf = [0u8; 64];
let Ok(Ok(n)) = tokio::time::timeout(Duration::from_secs(2), stream.read(&mut buf)).await
else {
return false;
};
if n == 0 {
return false;
}
let head = String::from_utf8_lossy(&buf[..n]);
head.starts_with("HTTP/1.1 2")
|| head.starts_with("HTTP/1.1 3")
|| head.starts_with("HTTP/1.0 2")
|| head.starts_with("HTTP/1.0 3")
}
fn ensure_main_lan_address(pkg: &mut crate::data_model::PackageDataEntry, port: u16) {
let Some(installed) = pkg.installed.as_mut() else {
return;
};
let main = installed
.interface_addresses
.entry("main".to_string())
.or_insert_with(|| crate::data_model::InterfaceAddress {
tor_address: String::new(),
lan_address: None,
});
if main.lan_address.is_none() {
main.lan_address = Some(format!("http://localhost:{port}"));
}
}
fn fallback_package_port(app_id: &str) -> Option<u16> {
match app_id {
"fedimint" | "fedimintd" => Some(8175),
"filebrowser" => Some(8083),
"indeedhub" => Some(7778),
"nginx-proxy-manager" => Some(8081),
"nostr-rs-relay" => Some(18081),
_ => None,
}
}
fn port_from_url(url: &str) -> Option<u16> {
let after_scheme = url.split_once("://").map(|(_, rest)| rest).unwrap_or(url);
let host_port = after_scheme.split('/').next().unwrap_or(after_scheme);
let port = host_port.rsplit_once(':')?.1;
port.parse::<u16>().ok()
}
/// Register Archipelago DWN protocols on startup.
async fn register_dwn_protocols(data_dir: &std::path::Path) -> Result<()> {
use crate::network::dwn_store::{DwnStore, ProtocolDefinition};
@ -1211,10 +1368,19 @@ mod merge_tests {
// not clobber the transitional state owned by the RPC spawn task.
let existing = make_entry(PackageState::Stopping, Some("healthy"));
let fresh = make_entry(PackageState::Running, Some("starting"));
let merged = merge_preserving_transitional(&existing, &fresh);
let merged = merge_preserving_transitional(&existing, &fresh, true);
assert_eq!(merged.state, PackageState::Stopping);
}
#[test]
fn non_user_stopping_recovers_when_container_is_running() {
let existing = make_entry(PackageState::Stopping, Some("unknown"));
let fresh = make_entry(PackageState::Running, Some("healthy"));
let merged = merge_preserving_transitional(&existing, &fresh, false);
assert_eq!(merged.state, PackageState::Running);
assert_eq!(merged.health.as_deref(), Some("healthy"));
}
#[test]
fn merges_fresh_observability_fields() {
// Non-state observability fields (health, exit_code, installed)
@ -1224,7 +1390,7 @@ mod merge_tests {
existing.exit_code = None;
let mut fresh = make_entry(PackageState::Running, Some("unhealthy"));
fresh.exit_code = Some(0);
let merged = merge_preserving_transitional(&existing, &fresh);
let merged = merge_preserving_transitional(&existing, &fresh, true);
assert_eq!(merged.state, PackageState::Stopping);
assert_eq!(merged.health.as_deref(), Some("unhealthy"));
assert_eq!(merged.exit_code, Some(0));
@ -1234,7 +1400,7 @@ mod merge_tests {
fn stale_removing_recovers_when_container_is_running() {
let existing = make_entry(PackageState::Removing, Some("unknown"));
let fresh = make_entry(PackageState::Running, Some("healthy"));
let merged = merge_preserving_transitional(&existing, &fresh);
let merged = merge_preserving_transitional(&existing, &fresh, false);
assert_eq!(merged.state, PackageState::Running);
assert_eq!(merged.health.as_deref(), Some("healthy"));
}
@ -1272,4 +1438,20 @@ mod merge_tests {
TRANSITIONAL_STUCK_TIMEOUT
);
}
#[test]
fn absent_stopping_transitions_to_stopped() {
assert_eq!(
absent_transitional_replacement(&PackageState::Stopping),
Some(PackageState::Stopped)
);
}
#[test]
fn absent_installing_still_waits_for_owner() {
assert_eq!(
absent_transitional_replacement(&PackageState::Installing),
None
);
}
}

View File

@ -8,9 +8,9 @@ pub mod runtime;
pub use bitcoin_simulator::{BitcoinSimulationMode, BitcoinSimulator};
pub use health_monitor::HealthMonitor;
pub use manifest::{
AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, HealthCheck, HostFacts,
ManifestError, ResolvedSource, ResourceLimits, SecretEnv, SecretsProvider, SecurityPolicy,
Volume,
AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, GeneratedFile, HealthCheck,
HostFacts, ManifestError, ResolvedSource, ResourceLimits, SecretEnv, SecretsProvider,
SecurityPolicy, Volume,
};
pub use podman_client::{
image_uses_insecure_registry, ContainerState, ContainerStatus, PodmanClient,

View File

@ -1,5 +1,5 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use thiserror::Error;
#[derive(Debug, Error)]
@ -42,6 +42,9 @@ pub struct AppDefinition {
#[serde(default)]
pub volumes: Vec<Volume>,
#[serde(default)]
pub files: Vec<GeneratedFile>,
#[serde(default)]
pub environment: Vec<String>,
@ -216,6 +219,8 @@ pub struct SecurityPolicy {
pub capabilities: Vec<String>,
#[serde(default = "default_true")]
pub readonly_root: bool,
#[serde(default = "default_true")]
pub no_new_privileges: bool,
#[serde(default = "default_network_policy")]
pub network_policy: String,
#[serde(default)]
@ -263,6 +268,14 @@ pub struct Volume {
pub tmpfs_options: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct GeneratedFile {
pub path: String,
pub content: String,
#[serde(default)]
pub overwrite: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HealthCheck {
#[serde(rename = "type")]
@ -302,8 +315,16 @@ impl AppManifest {
}
pub fn validate(&self) -> Result<(), ManifestError> {
if self.app.id.is_empty() {
return Err(ManifestError::Invalid("app.id cannot be empty".to_string()));
if !is_valid_app_id(&self.app.id) {
return Err(ManifestError::Invalid(
"app.id must be lowercase ASCII letters, digits, or single hyphens".to_string(),
));
}
if self.app.name.trim().is_empty() {
return Err(ManifestError::Invalid(
"app.name cannot be empty".to_string(),
));
}
// Exactly one of container.image or container.build must be set. We can't
@ -355,6 +376,11 @@ impl AppManifest {
"container.network cannot be empty (omit the field to use default)".to_string(),
));
}
if is_dangerous_network_mode(n) {
return Err(ManifestError::Invalid(format!(
"container.network '{n}' is not allowed in app manifests"
)));
}
}
// custom_args: no empty strings (would inject literal "" into
@ -447,6 +473,11 @@ impl AppManifest {
}
}
validate_security(&self.app.security)?;
validate_ports(&self.app.ports)?;
validate_environment(&self.app.environment)?;
validate_devices(&self.app.devices)?;
// Volume tmpfs_options: only meaningful for type: tmpfs.
for (i, v) in self.app.volumes.iter().enumerate() {
if v.volume_type == "tmpfs" {
@ -466,6 +497,11 @@ impl AppManifest {
v.volume_type
)));
} else {
if v.volume_type != "bind" && v.volume_type != "volume" {
return Err(ManifestError::Invalid(format!(
"volumes[{i}].type must be bind, volume, or tmpfs"
)));
}
if v.source.is_empty() {
return Err(ManifestError::Invalid(format!(
"volumes[{i}] ({}) must set source",
@ -478,6 +514,45 @@ impl AppManifest {
v.volume_type
)));
}
if v.volume_type == "bind" {
validate_bind_source(i, &v.source)?;
} else if !is_valid_named_volume(&v.source) {
return Err(ManifestError::Invalid(format!(
"volumes[{i}].source must be a safe named volume"
)));
}
validate_container_path(i, &v.target)?;
validate_volume_options(i, &v.options)?;
}
}
for (i, f) in self.app.files.iter().enumerate() {
if f.path.is_empty() {
return Err(ManifestError::Invalid(format!(
"files[{i}].path cannot be empty"
)));
}
if !std::path::Path::new(&f.path).is_absolute() {
return Err(ManifestError::Invalid(format!(
"files[{i}].path must be absolute"
)));
}
if f.content.is_empty() {
return Err(ManifestError::Invalid(format!(
"files[{i}].content cannot be empty"
)));
}
let file_path = std::path::Path::new(&f.path);
let under_bind_mount = self
.app
.volumes
.iter()
.filter(|v| v.volume_type != "tmpfs" && !v.source.is_empty())
.any(|v| file_path.starts_with(std::path::Path::new(&v.source)));
if !under_bind_mount {
return Err(ManifestError::Invalid(format!(
"files[{i}].path must live under a bind-mounted volume source"
)));
}
}
@ -485,6 +560,195 @@ impl AppManifest {
}
}
fn is_valid_app_id(id: &str) -> bool {
if id.is_empty() || id.starts_with('-') || id.ends_with('-') || id.contains("--") {
return false;
}
id.chars()
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
}
fn is_dangerous_network_mode(mode: &str) -> bool {
mode.starts_with("container:") || mode.starts_with("ns:")
}
fn validate_security(policy: &SecurityPolicy) -> Result<(), ManifestError> {
let allowed_network_policies = ["isolated", "bridge", "host"];
if !policy.network_policy.is_empty()
&& !allowed_network_policies.contains(&policy.network_policy.as_str())
{
return Err(ManifestError::Invalid(format!(
"security.network_policy must be one of {}",
allowed_network_policies.join(", ")
)));
}
let allowed_caps = [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"NET_ADMIN",
"NET_BIND_SERVICE",
"NET_RAW",
"SETGID",
"SETUID",
"SYS_ADMIN",
];
let mut seen = HashSet::new();
for cap in &policy.capabilities {
if !allowed_caps.contains(&cap.as_str()) {
return Err(ManifestError::Invalid(format!(
"security.capabilities contains unsupported capability '{cap}'"
)));
}
if !seen.insert(cap.as_str()) {
return Err(ManifestError::Invalid(format!(
"security.capabilities contains duplicate capability '{cap}'"
)));
}
}
Ok(())
}
fn validate_ports(ports: &[PortMapping]) -> Result<(), ManifestError> {
let mut seen_host = HashSet::new();
for (i, port) in ports.iter().enumerate() {
if port.host == 0 || port.container == 0 {
return Err(ManifestError::Invalid(format!(
"ports[{i}].host and ports[{i}].container must be non-zero"
)));
}
let protocol = if port.protocol.is_empty() {
"tcp"
} else {
port.protocol.as_str()
};
if protocol != "tcp" && protocol != "udp" {
return Err(ManifestError::Invalid(format!(
"ports[{i}].protocol must be tcp or udp"
)));
}
if !seen_host.insert((port.host, protocol.to_string())) {
return Err(ManifestError::Invalid(format!(
"ports contains duplicate host binding {}/{}",
port.host, protocol
)));
}
}
Ok(())
}
fn validate_environment(env: &[String]) -> Result<(), ManifestError> {
let mut seen = HashSet::new();
for (i, entry) in env.iter().enumerate() {
let Some((key, _)) = entry.split_once('=') else {
return Err(ManifestError::Invalid(format!(
"environment[{i}] must be KEY=VALUE"
)));
};
if !is_valid_env_key(key) {
return Err(ManifestError::Invalid(format!(
"environment[{i}] has invalid key '{key}'"
)));
}
if !seen.insert(key) {
return Err(ManifestError::Invalid(format!(
"environment contains duplicate key '{key}'"
)));
}
}
Ok(())
}
fn is_valid_env_key(key: &str) -> bool {
let mut chars = key.chars();
match chars.next() {
Some(c) if c.is_ascii_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
fn validate_devices(devices: &[String]) -> Result<(), ManifestError> {
let mut seen = HashSet::new();
for (i, device) in devices.iter().enumerate() {
if !device.starts_with("/dev/") || device.contains("..") {
return Err(ManifestError::Invalid(format!(
"devices[{i}] must be an absolute /dev path"
)));
}
if !seen.insert(device.as_str()) {
return Err(ManifestError::Invalid(format!(
"devices contains duplicate entry '{device}'"
)));
}
}
Ok(())
}
fn validate_bind_source(index: usize, source: &str) -> Result<(), ManifestError> {
let path = std::path::Path::new(source);
if !path.is_absolute() {
if is_valid_named_volume(source) {
return Ok(());
}
return Err(ManifestError::Invalid(format!(
"volumes[{index}].source must be absolute for host bind mounts or a safe named volume"
)));
}
if source.contains("..") {
return Err(ManifestError::Invalid(format!(
"volumes[{index}].source must not contain '..'"
)));
}
if source.starts_with("/var/lib/archipelago/") || is_reviewed_host_bind_exception(source) {
return Ok(());
}
Err(ManifestError::Invalid(format!(
"volumes[{index}].source must be under /var/lib/archipelago or a reviewed host-bind exception"
)))
}
fn is_reviewed_host_bind_exception(source: &str) -> bool {
source == "/run/user/1000/podman/podman.sock" || source == "/var/run/dbus"
}
fn is_valid_named_volume(source: &str) -> bool {
if source.is_empty() || source.contains('/') || source.contains("..") {
return false;
}
source
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.')
}
fn validate_container_path(index: usize, target: &str) -> Result<(), ManifestError> {
if !std::path::Path::new(target).is_absolute() || target.contains("..") {
return Err(ManifestError::Invalid(format!(
"volumes[{index}].target must be an absolute container path without '..'"
)));
}
Ok(())
}
fn validate_volume_options(index: usize, options: &[String]) -> Result<(), ManifestError> {
let allowed = ["rw", "ro", "z", "Z", "shared", "rshared", "slave", "rslave"];
let mut seen = HashSet::new();
for option in options {
if !allowed.contains(&option.as_str()) {
return Err(ManifestError::Invalid(format!(
"volumes[{index}].options contains unsupported option '{option}'"
)));
}
if !seen.insert(option.as_str()) {
return Err(ManifestError::Invalid(format!(
"volumes[{index}].options contains duplicate option '{option}'"
)));
}
}
Ok(())
}
/// Host facts available to `derived_env` templates at apply time.
///
/// Mirrors the values `scripts/container-specs.sh:detect_environment()`
@ -864,6 +1128,38 @@ app:
);
}
#[test]
fn generated_files_must_live_under_bind_mounts() {
let yaml = r#"
app:
id: test-app
name: Test App
version: 1.0.0
container:
image: test/image:latest
volumes:
- type: bind
source: /var/lib/archipelago/test-app
target: /data
files:
- path: /var/lib/archipelago/test-app/config.yaml
content: |
key: value
"#;
let manifest = AppManifest::parse(yaml).unwrap();
assert_eq!(manifest.app.files.len(), 1);
let bad = yaml.replace(
"/var/lib/archipelago/test-app/config.yaml",
"/etc/test-app/config.yaml",
);
let err = AppManifest::parse(&bad).unwrap_err();
assert!(
format!("{err}").contains("bind-mounted volume source"),
"unexpected error: {err}"
);
}
#[test]
fn empty_custom_arg_is_rejected() {
let yaml = r#"
@ -1089,6 +1385,157 @@ app:
}
}
#[test]
fn unsafe_manifest_values_are_rejected() {
let cases = [
(
"bad app id",
r#"
app:
id: Bad_App
name: Bad
version: 1.0.0
container:
image: test/image:latest
"#,
"app.id",
),
(
"unsupported capability",
r#"
app:
id: bad-cap
name: Bad
version: 1.0.0
container:
image: test/image:latest
security:
capabilities: [SYS_MODULE]
"#,
"unsupported capability",
),
(
"docker socket bind",
r#"
app:
id: bad-bind
name: Bad
version: 1.0.0
container:
image: test/image:latest
volumes:
- type: bind
source: /var/run/docker.sock
target: /var/run/docker.sock
"#,
"reviewed host-bind exception",
),
(
"path-like relative bind source",
r#"
app:
id: bad-bind
name: Bad
version: 1.0.0
container:
image: test/image:latest
volumes:
- type: bind
source: data/cache
target: /data
"#,
"absolute for host bind mounts",
),
(
"bad environment key",
r#"
app:
id: bad-env
name: Bad
version: 1.0.0
container:
image: test/image:latest
environment:
- 1BAD=value
"#,
"invalid key",
),
(
"duplicate host port",
r#"
app:
id: bad-port
name: Bad
version: 1.0.0
container:
image: test/image:latest
ports:
- { host: 8080, container: 80, protocol: tcp }
- { host: 8080, container: 81, protocol: tcp }
"#,
"duplicate host binding",
),
(
"bad device",
r#"
app:
id: bad-device
name: Bad
version: 1.0.0
container:
image: test/image:latest
devices:
- /tmp/fake-device
"#,
"absolute /dev path",
),
(
"container network namespace",
r#"
app:
id: bad-network
name: Bad
version: 1.0.0
container:
image: test/image:latest
network: container:host
"#,
"not allowed",
),
];
for (name, yaml, expected) in cases {
let err = AppManifest::parse(yaml).unwrap_err();
let msg = format!("{err}");
assert!(
msg.contains(expected),
"case {name} expected '{expected}', got: {msg}"
);
}
}
#[test]
fn reviewed_host_bind_exceptions_parse() {
let yaml = r#"
app:
id: reviewed-binds
name: Reviewed Binds
version: 1.0.0
container:
image: test/image:latest
volumes:
- type: bind
source: /run/user/1000/podman/podman.sock
target: /var/run/docker.sock
options: [rw]
- type: bind
source: /var/run/dbus
target: /var/run/dbus
options: [ro]
"#;
AppManifest::parse(yaml).unwrap();
}
#[test]
fn parse_every_real_manifest() {
let app_manifests = list_repo_manifests();
@ -1099,7 +1546,6 @@ app:
let mut failures: Vec<String> = Vec::new();
let mut modern_count = 0usize;
let mut legacy_count = 0usize;
for path in app_manifests {
let content = fs::read_to_string(&path).expect("read manifest");
let parsed_yaml: serde_yaml::Value = match serde_yaml::from_str(&content) {
@ -1121,15 +1567,14 @@ app:
failures.push(format!("{}: {err}", path.display()));
}
} else {
legacy_count += 1;
failures.push(format!(
"{}: expected modern app-schema manifest",
path.display()
));
}
}
assert!(modern_count > 0, "no modern app-schema manifests found");
assert!(
legacy_count > 0,
"expected at least one legacy manifest shape"
);
assert!(
failures.is_empty(),

View File

@ -56,9 +56,9 @@ pub enum ContainerState {
impl From<&str> for ContainerState {
fn from(s: &str) -> Self {
match s.to_lowercase().as_str() {
"created" => ContainerState::Created,
"created" | "initialized" => ContainerState::Created,
"running" => ContainerState::Running,
"stopping" => ContainerState::Stopping,
"stopping" | "removing" => ContainerState::Stopping,
"stopped" => ContainerState::Stopped,
"exited" => ContainerState::Exited,
"paused" => ContainerState::Paused,
@ -129,7 +129,6 @@ impl PodmanClient {
"filebrowser" => "http://localhost:8083",
"nginx-proxy-manager" => "http://localhost:8081",
"portainer" => "http://localhost:9000",
"saleor" => "http://localhost:9011",
"uptime-kuma" => "http://localhost:3002",
"fedimint" | "fedimintd" => "http://localhost:8175",
"fedimint-gateway" => "http://localhost:8176",
@ -390,7 +389,7 @@ impl PodmanClient {
"cap_add": cap_add,
"cap_drop": cap_drop,
"read_only_filesystem": manifest.app.security.readonly_root,
"no_new_privileges": true,
"no_new_privileges": manifest.app.security.no_new_privileges,
"restart_policy": "unless-stopped",
"restart_tries": 5,
"netns": {
@ -635,6 +634,7 @@ fn podman_network_settings(
Some("bridge") => ("bridge", None),
Some("none") => ("none", None),
Some("slirp4netns") => ("slirp4netns", None),
Some("pasta") => ("pasta", None),
Some("private") => ("private", None),
Some(custom) => ("bridge", Some(custom.to_string())),
None if network_policy == "host" => ("host", None),

View File

@ -7,6 +7,7 @@ use std::time::Duration;
use tokio::process::Command as TokioCommand;
const PODMAN_CLI_DEFAULT_TIMEOUT: Duration = Duration::from_secs(30);
const PODMAN_CLI_IMAGE_CHECK_TIMEOUT: Duration = Duration::from_secs(10);
const PODMAN_CLI_BUILD_TIMEOUT: Duration = Duration::from_secs(900);
#[async_trait]
@ -150,7 +151,25 @@ impl ContainerRuntime for PodmanRuntime {
if is_missing_container_error(&stderr) {
return Ok(());
}
Err(api_err.context(format!("podman rm fallback failed: {}", stderr.trim())))
let zero_timeout = self.podman_cli(&["rm", "-f", "--time", "0", name]).await?;
if zero_timeout.status.success() {
return Ok(());
}
let _ = self.podman_cli(&["container", "cleanup", name]).await;
let cleanup_rm = self.podman_cli(&["rm", "-f", name]).await?;
if cleanup_rm.status.success() {
return Ok(());
}
let cleanup_stderr = String::from_utf8_lossy(&cleanup_rm.stderr);
if is_missing_container_error(&cleanup_stderr) {
return Ok(());
}
Err(api_err.context(format!(
"podman rm fallback failed: {}; cleanup rm failed: {}",
stderr.trim(),
cleanup_stderr.trim()
)))
}
}
}
@ -196,20 +215,26 @@ impl ContainerRuntime for PodmanRuntime {
}
async fn image_exists(&self, image_ref: &str) -> Result<bool> {
// `podman image exists` returns 0 if present, 1 if absent. Any other
// exit code is an environment failure we should surface.
let output = self.podman_cli(&["image", "exists", image_ref]).await?;
// Avoid `podman image exists`: on production nodes with a stressed
// rootless store it can hang even when targeted at one image. A bounded
// inspect is the local-storage probe the trait contract describes.
let output = self
.podman_cli_timeout(
&["image", "inspect", image_ref],
PODMAN_CLI_IMAGE_CHECK_TIMEOUT,
)
.await?;
match output.status.code() {
Some(0) => Ok(true),
Some(1) => Ok(false),
Some(code) => {
let stderr = String::from_utf8_lossy(&output.stderr);
Err(anyhow::anyhow!(
"podman image exists {image_ref} exited with {code}: {stderr}"
"podman image inspect {image_ref} exited with {code}: {stderr}"
))
}
None => Err(anyhow::anyhow!(
"podman image exists {image_ref} terminated by signal"
"podman image inspect {image_ref} terminated by signal"
)),
}
}

View File

@ -18,6 +18,7 @@
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', sans-serif;
min-height: 100vh;
background: #000;
color: white;
overflow-x: hidden;
}
@ -555,6 +556,87 @@
</button>
</div>
</div>
<div class="glass-card p-6 mb-8">
<div class="flex flex-col lg:flex-row lg:items-start lg:justify-between gap-4 mb-6">
<div>
<h2 class="text-xl font-semibold text-white mb-2">Transaction Relay Sharing</h2>
<p class="text-white/70 text-sm">Trusted peer access for broadcasting transactions through this node</p>
</div>
<div class="px-3 py-2 bg-white/5 rounded-lg text-sm">
<span class="text-white/60">Local node</span>
<span class="ml-2 font-medium text-yellow-300" id="relaySyncStatus">Checking...</span>
</div>
</div>
<div class="grid grid-cols-1 lg:grid-cols-3 gap-4 mb-5">
<div class="p-4 bg-white/5 rounded-lg">
<div class="text-xs uppercase tracking-wide text-white/50 mb-2">HTTPS Endpoint</div>
<div class="text-sm text-white/80 font-mono break-all min-h-[1.5rem]" id="relayHttpsEndpoint">Not configured</div>
</div>
<div class="p-4 bg-white/5 rounded-lg">
<div class="text-xs uppercase tracking-wide text-white/50 mb-2">HTTP Endpoint</div>
<div class="text-sm text-white/80 font-mono break-all min-h-[1.5rem]" id="relayHttpEndpoint">Not configured</div>
</div>
<div class="p-4 bg-white/5 rounded-lg">
<div class="text-xs uppercase tracking-wide text-white/50 mb-2">Tor Endpoint</div>
<div class="text-sm text-white/80 font-mono break-all min-h-[1.5rem]" id="relayTorEndpoint">Not configured</div>
</div>
</div>
<div class="grid grid-cols-1 xl:grid-cols-2 gap-6">
<div class="space-y-4">
<div class="grid grid-cols-1 md:grid-cols-3 gap-3">
<label class="flex items-center justify-between gap-3 p-3 bg-white/5 rounded-lg">
<span class="text-white/80 text-sm">Allow peer use</span>
<input id="relayEnabledToggle" type="checkbox" class="h-5 w-5 accent-orange-500" onchange="saveRelaySettings()">
</label>
<label class="flex items-center justify-between gap-3 p-3 bg-white/5 rounded-lg">
<span class="text-white/80 text-sm">Allow requests</span>
<input id="relayRequestsToggle" type="checkbox" class="h-5 w-5 accent-orange-500" onchange="saveRelaySettings()">
</label>
<label class="flex items-center justify-between gap-3 p-3 bg-white/5 rounded-lg">
<span class="text-white/80 text-sm">Serve over Tor</span>
<input id="relayTorToggle" type="checkbox" class="h-5 w-5 accent-orange-500" onchange="saveRelaySettings()">
</label>
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-3">
<input id="relayHttpsInput" class="w-full px-3 py-2 rounded-lg bg-black/40 border border-white/10 text-sm text-white placeholder-white/35" placeholder="https://rpc.example.com/">
<input id="relayHttpInput" class="w-full px-3 py-2 rounded-lg bg-black/40 border border-white/10 text-sm text-white placeholder-white/35" placeholder="http://192.168.1.2/">
</div>
<div class="grid grid-cols-1 md:grid-cols-[1fr_auto] gap-3">
<input id="relayTorInput" class="w-full px-3 py-2 rounded-lg bg-black/40 border border-white/10 text-sm text-white placeholder-white/35" placeholder="http://exampleonion.onion/">
<button class="glass-button px-4 py-2 rounded-lg text-sm font-medium" onclick="createRelayTorService()">Create Tor</button>
</div>
<button class="gradient-button px-4 py-2 rounded-lg text-sm font-medium" onclick="saveRelaySettings()">Save Sharing Settings</button>
</div>
<div class="space-y-4">
<div class="grid grid-cols-1 md:grid-cols-[1fr_auto] gap-3">
<select id="relayPeerSelect" class="w-full px-3 py-2 rounded-lg bg-black/40 border border-white/10 text-sm text-white" onchange="saveRelaySettings()">
<option value="">No trusted nodes available</option>
</select>
<button id="relayRequestButton" class="glass-button px-4 py-2 rounded-lg text-sm font-medium" onclick="requestPeerRelay()">Request Access</button>
</div>
<textarea id="relayRequestMessage" class="w-full px-3 py-2 rounded-lg bg-black/40 border border-white/10 text-sm text-white placeholder-white/35 min-h-[5rem]" placeholder="Optional note for the peer"></textarea>
<div class="p-3 bg-white/5 rounded-lg">
<div class="flex items-center justify-between gap-3">
<span class="text-white/70 text-sm">Restricted RPC user</span>
<span class="text-white/90 text-sm font-mono" id="relayCredentialUser">txrelay</span>
</div>
<div class="text-xs mt-2 text-white/50" id="relayCredentialStatus">Credential status unavailable</div>
</div>
<div>
<div class="text-sm font-semibold text-white mb-2">Relay Requests</div>
<div class="space-y-2" id="relayRequestsList">
<div class="text-sm text-white/50 p-3 bg-white/5 rounded-lg">No relay requests</div>
</div>
</div>
<div class="text-sm text-white/60" id="relayStatusMessage"></div>
</div>
</div>
</div>
</div>
<!-- Settings Modal -->
@ -608,6 +690,7 @@
// RPC Configuration - Use local Nginx proxy within container
const RPC_ENDPOINT = 'bitcoin-rpc/';
const STATUS_ENDPOINT = 'bitcoin-status';
const ARCHY_RPC_ENDPOINT = 'rpc/v1';
console.log('[Bitcoin UI] RPC Endpoint:', RPC_ENDPOINT);
// Make RPC call to Bitcoin node via local proxy
@ -654,6 +737,220 @@
return response.json();
}
function cookieValue(name) {
return document.cookie
.split('; ')
.find(row => row.startsWith(`${name}=`))
?.split('=')
.slice(1)
.join('=') || '';
}
async function callArchyRPC(method, params = {}) {
const headers = { 'Content-Type': 'application/json' };
const csrf = cookieValue('csrf');
if (csrf) headers['X-CSRF-Token'] = decodeURIComponent(csrf);
const response = await fetch(ARCHY_RPC_ENDPOINT, {
method: 'POST',
headers,
credentials: 'include',
cache: 'no-store',
body: JSON.stringify({ method, params })
});
const body = await response.json().catch(() => ({}));
if (!response.ok || body.error) {
throw new Error(body.error?.message || `Archipelago RPC ${response.status}`);
}
return body.result;
}
function escapeHtml(value) {
return String(value ?? '').replace(/[&<>"']/g, char => ({
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#39;'
}[char]));
}
function setText(id, value, fallback = 'Not configured') {
const el = document.getElementById(id);
if (el) el.textContent = value || fallback;
}
function renderRelayRequests(requests = []) {
const list = document.getElementById('relayRequestsList');
if (!list) return;
if (!requests.length) {
list.innerHTML = '<div class="text-sm text-white/50 p-3 bg-white/5 rounded-lg">No relay requests</div>';
return;
}
list.innerHTML = requests.map(req => {
const name = escapeHtml(req.peer_name || req.peer_onion || req.peer_pubkey);
const message = req.message ? `<div class="text-xs text-white/50 mt-1">${escapeHtml(req.message)}</div>` : '';
const endpoint = req.approved_endpoint ? `<div class="text-xs text-white/50 mt-1 font-mono break-all">${escapeHtml(req.approved_endpoint)}</div>` : '';
const statusClass = req.status === 'approved'
? 'text-green-300'
: req.status === 'rejected'
? 'text-red-300'
: 'text-yellow-300';
const actions = req.direction === 'incoming' && req.status === 'pending'
? `<div class="flex gap-2 mt-3">
<button class="glass-button px-3 py-2 rounded-lg text-xs font-medium" onclick="approveRelayRequest('${escapeHtml(req.id)}')">Approve</button>
<button class="glass-button px-3 py-2 rounded-lg text-xs font-medium" onclick="rejectRelayRequest('${escapeHtml(req.id)}')">Reject</button>
</div>`
: '';
return `<div class="p-3 bg-white/5 rounded-lg">
<div class="flex items-center justify-between gap-3">
<div class="text-sm text-white/80">${name}</div>
<div class="text-xs uppercase ${statusClass}">${escapeHtml(req.direction)} · ${escapeHtml(req.status)}</div>
</div>
${message}
${endpoint}
${actions}
</div>`;
}).join('');
}
function renderRelayPeers(peers = [], selectedPeer = '', localSynced = true) {
const select = document.getElementById('relayPeerSelect');
const button = document.getElementById('relayRequestButton');
if (!select) return;
if (!localSynced) {
select.innerHTML = '<option value="">Local Bitcoin node must finish syncing first</option>';
select.disabled = true;
if (button) button.disabled = true;
return;
}
if (!peers.length) {
select.innerHTML = '<option value="">No trusted nodes available</option>';
select.disabled = true;
if (button) button.disabled = true;
return;
}
select.disabled = false;
if (button) button.disabled = false;
select.innerHTML = '<option value="">Choose a trusted node</option>' + peers.map(peer => {
const label = escapeHtml(peer.name || peer.onion || peer.pubkey.slice(0, 16));
const approved = peer.relay_approved ? ' · approved' : '';
const selected = peer.pubkey === selectedPeer ? ' selected' : '';
return `<option value="${escapeHtml(peer.pubkey)}"${selected}>${label}${approved}</option>`;
}).join('');
}
async function loadRelayAccess() {
const statusEl = document.getElementById('relayStatusMessage');
try {
const relay = await callArchyRPC('bitcoin.relay-status');
const settings = relay.settings || {};
const local = relay.local_node || {};
setText('relayHttpsEndpoint', settings.https_endpoint);
setText('relayHttpEndpoint', settings.http_endpoint);
setText('relayTorEndpoint', settings.tor_endpoint);
const syncEl = document.getElementById('relaySyncStatus');
if (syncEl) {
syncEl.textContent = local.synced ? 'Synchronized' : 'Not synchronized';
syncEl.className = local.synced ? 'ml-2 font-medium text-green-300' : 'ml-2 font-medium text-yellow-300';
}
const enabled = document.getElementById('relayEnabledToggle');
const requests = document.getElementById('relayRequestsToggle');
const tor = document.getElementById('relayTorToggle');
if (enabled) enabled.checked = !!settings.enabled_for_peers;
if (requests) requests.checked = !!settings.allow_peer_requests;
if (tor) tor.checked = !!settings.allow_tor;
const httpsInput = document.getElementById('relayHttpsInput');
const httpInput = document.getElementById('relayHttpInput');
const torInput = document.getElementById('relayTorInput');
if (httpsInput && document.activeElement !== httpsInput) httpsInput.value = settings.https_endpoint || '';
if (httpInput && document.activeElement !== httpInput) httpInput.value = settings.http_endpoint || '';
if (torInput && document.activeElement !== torInput) torInput.value = settings.tor_endpoint || '';
renderRelayPeers(relay.trusted_nodes || [], settings.selected_peer_pubkey || '', !!local.synced);
renderRelayRequests(relay.requests || []);
setText('relayCredentialUser', relay.credentials?.username || 'txrelay', 'txrelay');
setText(
'relayCredentialStatus',
relay.credentials?.available ? `Credential file ready: ${relay.credentials.client_env_path}. ${relay.credentials.restart_hint || ''}` : 'Restricted relay credential will be generated when peer sharing is enabled',
'Credential status unavailable'
);
if (statusEl) statusEl.textContent = '';
} catch (error) {
console.warn('[Bitcoin UI] relay status failed', error);
if (statusEl) statusEl.textContent = `Relay controls unavailable: ${error.message}`;
}
}
async function saveRelaySettings() {
const statusEl = document.getElementById('relayStatusMessage');
const payload = {
enabled_for_peers: !!document.getElementById('relayEnabledToggle')?.checked,
allow_peer_requests: !!document.getElementById('relayRequestsToggle')?.checked,
allow_tor: !!document.getElementById('relayTorToggle')?.checked,
allow_https: !!document.getElementById('relayHttpsInput')?.value.trim(),
allow_http: !!document.getElementById('relayHttpInput')?.value.trim(),
selected_peer_pubkey: document.getElementById('relayPeerSelect')?.value || '',
https_endpoint: document.getElementById('relayHttpsInput')?.value.trim() || '',
http_endpoint: document.getElementById('relayHttpInput')?.value.trim() || '',
tor_endpoint: document.getElementById('relayTorInput')?.value.trim() || ''
};
try {
await callArchyRPC('bitcoin.relay-update-settings', payload);
if (statusEl) statusEl.textContent = 'Relay settings saved.';
await loadRelayAccess();
} catch (error) {
if (statusEl) statusEl.textContent = `Save failed: ${error.message}`;
}
}
async function requestPeerRelay() {
const statusEl = document.getElementById('relayStatusMessage');
const peer = document.getElementById('relayPeerSelect')?.value;
if (!peer) {
if (statusEl) statusEl.textContent = 'Choose a trusted node first.';
return;
}
try {
await callArchyRPC('bitcoin.relay-request-peer', {
peer_pubkey: peer,
message: document.getElementById('relayRequestMessage')?.value || ''
});
if (statusEl) statusEl.textContent = 'Relay access request sent.';
await loadRelayAccess();
} catch (error) {
if (statusEl) statusEl.textContent = `Request failed: ${error.message}`;
}
}
async function approveRelayRequest(id) {
await updateRelayRequest('bitcoin.relay-approve-request', id);
}
async function rejectRelayRequest(id) {
await updateRelayRequest('bitcoin.relay-reject-request', id);
}
async function updateRelayRequest(method, id) {
const statusEl = document.getElementById('relayStatusMessage');
try {
await callArchyRPC(method, { id });
if (statusEl) statusEl.textContent = 'Relay request updated.';
await loadRelayAccess();
} catch (error) {
if (statusEl) statusEl.textContent = `Update failed: ${error.message}`;
}
}
async function createRelayTorService() {
const statusEl = document.getElementById('relayStatusMessage');
try {
await callArchyRPC('bitcoin.relay-create-tor-service');
if (statusEl) statusEl.textContent = 'Tor service requested.';
await loadRelayAccess();
} catch (error) {
if (statusEl) statusEl.textContent = `Tor setup failed: ${error.message}`;
}
}
// Implementation branding — detected from getnetworkinfo.subversion.
// Bitcoin Knots identifies as "/Satoshi:<ver>/Knots:<date>/", Bitcoin Core as "/Satoshi:<ver>/".
let brandingApplied = false;
@ -720,11 +1017,11 @@
syncStatusText.textContent = status.error || 'Bitcoin node is reconnecting... showing last known values';
syncStatusText.className = 'text-yellow-300 text-sm font-medium';
} else if (consecutiveRpcFailures < 6) {
syncStatusText.textContent = status.error || 'Connecting to Bitcoin node...';
syncStatusText.textContent = status.error || 'Bitcoin node is starting or busy syncing...';
syncStatusText.className = 'text-yellow-300 text-sm font-medium';
} else {
syncStatusText.textContent = status.error || 'Bitcoin node is not responding yet';
syncStatusText.className = 'text-red-400 text-sm font-medium';
syncStatusText.textContent = status.error || 'Bitcoin node is still syncing; retrying automatically...';
syncStatusText.className = 'text-yellow-300 text-sm font-medium';
}
}
if (syncIcon) {
@ -910,8 +1207,8 @@
if (syncStatusText) {
const hasRecentData = lastSuccessfulUpdateAt > 0 && Date.now() - lastSuccessfulUpdateAt < 120000;
syncStatusText.textContent = hasRecentData
? 'Bitcoin status bridge is reconnecting... keeping last known values'
: 'Connecting to Bitcoin status bridge...';
? 'Bitcoin status bridge is retrying... keeping last known values'
: 'Bitcoin status bridge is starting...';
syncStatusText.className = 'text-yellow-300 text-sm font-medium';
}
}
@ -920,10 +1217,12 @@
// Initial update
console.log('[Bitcoin UI] Starting initial blockchain info update...');
updateBlockchainInfo();
loadRelayAccess();
// Update every 5 seconds
console.log('[Bitcoin UI] Setting up 5-second update interval');
setInterval(updateBlockchainInfo, 5000);
setInterval(loadRelayAccess, 15000);
function copyRPCInfo() {
const info = `RPC Host: ${window.location.hostname}:8332\nRPC User: archipelago\nRPC Password: archipelago123\nRPC Endpoint: ${RPC_ENDPOINT}`;

View File

@ -0,0 +1,16 @@
FROM git.tx1138.com/lfg2025/nginx:1.27.4-alpine
COPY index.html /usr/share/nginx/html/index.html
COPY nginx.conf /etc/nginx/conf.d/default.conf
COPY assets/img/bg-network.jpg /usr/share/nginx/html/assets/img/bg-network.jpg
COPY assets/img/app-icons/fedimint.png /usr/share/nginx/html/assets/img/app-icons/fedimint.png
COPY assets/img/app-icons/fedimint.jpg /usr/share/nginx/html/assets/img/app-icons/fedimint.jpg
RUN sed -i 's/^user nginx;/user root;/' /etc/nginx/nginx.conf && \
mkdir -p /var/cache/nginx/client_temp /var/cache/nginx/proxy_temp \
/var/cache/nginx/fastcgi_temp /var/cache/nginx/uwsgi_temp \
/var/cache/nginx/scgi_temp
EXPOSE 8175
ENTRYPOINT []
CMD ["nginx", "-g", "daemon off;"]

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 976 KiB

View File

@ -0,0 +1,452 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta http-equiv="refresh" content="30">
<meta http-equiv="Cache-Control" content="no-cache, no-store, must-revalidate">
<meta http-equiv="Pragma" content="no-cache">
<meta http-equiv="Expires" content="0">
<title>Fedimint Guardian - Archipelago</title>
<style>
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
min-height: 100vh;
color: white;
overflow-x: hidden;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", "Ubuntu", sans-serif;
background: #050505;
}
.bg-perspective-container {
position: fixed;
inset: 0;
z-index: -10;
perspective: 1000px;
perspective-origin: 50% 50%;
overflow: hidden;
}
.bg-layer {
position: absolute;
inset: 0;
background-image: url("/assets/img/bg-network.jpg");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
transition: all 0.45s cubic-bezier(0.68, -0.55, 0.265, 1.55);
transform-style: preserve-3d;
opacity: 1;
transform: translateZ(0) scale(1);
}
.overlay {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.8);
z-index: -5;
pointer-events: none;
}
.container {
width: min(1400px, 100%);
margin: 0 auto;
padding: 2rem;
padding-bottom: 4rem;
}
.glass-card,
.info-card {
position: relative;
background: rgba(0, 0, 0, 0.6);
backdrop-filter: blur(24px);
-webkit-backdrop-filter: blur(24px);
box-shadow:
0 8px 24px rgba(0, 0, 0, 0.45),
inset 0 1px 0 rgba(255, 255, 255, 0.22);
border: none;
}
.glass-card {
border-radius: 16px;
padding: 24px;
overflow: hidden;
}
.info-card {
border-radius: 16px;
padding: 12px;
}
.glass-card::before,
.info-card::before {
content: "";
position: absolute;
inset: 0;
border-radius: inherit;
padding: 2px;
background: linear-gradient(135deg, rgba(0, 0, 0, 0.8), transparent);
-webkit-mask:
linear-gradient(#fff 0 0) content-box,
linear-gradient(#fff 0 0);
-webkit-mask-composite: xor;
mask-composite: exclude;
pointer-events: none;
z-index: 1;
}
.glass-card > *,
.info-card > * {
position: relative;
z-index: 2;
}
.header {
display: flex;
align-items: center;
gap: 24px;
}
.logo-gradient-border {
position: relative;
border-radius: 16px;
padding: 3px;
background: linear-gradient(135deg, rgba(255, 255, 255, 0.6) 0%, rgba(0, 0, 0, 0.8) 100%);
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5);
display: inline-block;
flex: 0 0 auto;
}
.logo-gradient-border::after {
content: "";
position: absolute;
inset: 3px;
border-radius: 13px;
background: #fff;
z-index: 0;
}
.logo-gradient-border img {
border-radius: 13px;
display: block;
position: relative;
z-index: 1;
width: 64px;
height: 64px;
object-fit: cover;
}
.title {
flex: 1;
min-width: 0;
}
.title h1 {
font-size: clamp(28px, 4vw, 36px);
line-height: 1.12;
font-weight: 700;
color: white;
margin-bottom: 8px;
}
.title p,
.body-copy {
color: rgba(255, 255, 255, 0.7);
line-height: 1.55;
}
.status-strip {
display: flex;
align-items: center;
gap: 12px;
width: 100%;
max-width: 260px;
}
.status-dot-wrap {
position: relative;
width: 12px;
height: 12px;
flex: 0 0 auto;
}
.status-dot {
width: 12px;
height: 12px;
border-radius: 999px;
background: rgb(251, 146, 60);
}
.status-dot-ping {
position: absolute;
inset: 0;
width: 12px;
height: 12px;
border-radius: 999px;
background: rgb(251, 146, 60);
opacity: 0.75;
animation: ping 1s cubic-bezier(0, 0, 0.2, 1) infinite;
}
.status-strip .label {
color: rgba(255, 255, 255, 0.6);
font-size: 12px;
line-height: 1.25;
}
.status-strip .value {
color: white;
font-size: 14px;
line-height: 1.25;
font-weight: 500;
}
.main-grid {
display: grid;
grid-template-columns: minmax(0, 1.2fr) minmax(280px, 0.8fr);
gap: 24px;
margin-top: 24px;
}
.wait-card h2 {
font-size: clamp(24px, 4vw, 32px);
line-height: 1.18;
font-weight: 700;
margin-bottom: 12px;
}
.body-copy {
max-width: 70ch;
font-size: 15px;
}
.body-copy + .body-copy {
margin-top: 10px;
}
.readiness {
display: flex;
align-items: center;
gap: 12px;
margin-top: 24px;
padding: 14px;
border-radius: 12px;
background: rgba(255, 255, 255, 0.05);
}
.readiness-icon {
width: 40px;
height: 40px;
border-radius: 12px;
background: rgba(251, 146, 60, 0.2);
color: rgb(251, 146, 60);
display: grid;
place-items: center;
flex: 0 0 auto;
}
.readiness-icon svg {
width: 22px;
height: 22px;
animation: spin 3s linear infinite;
}
.readiness .label {
color: rgba(255, 255, 255, 0.6);
font-size: 12px;
margin-bottom: 2px;
}
.readiness .value {
color: rgba(255, 255, 255, 0.92);
font-size: 15px;
font-weight: 500;
}
.detail-list {
display: grid;
gap: 12px;
}
.detail-row {
display: flex;
align-items: center;
justify-content: space-between;
gap: 16px;
padding: 12px;
border-radius: 12px;
background: rgba(255, 255, 255, 0.05);
}
.detail-row span:first-child {
color: rgba(255, 255, 255, 0.72);
font-size: 14px;
}
.detail-row span:last-child {
color: rgba(255, 255, 255, 0.92);
font-size: 14px;
font-weight: 500;
text-align: right;
}
.progress-track {
position: relative;
height: 12px;
margin-top: 22px;
overflow: hidden;
border-radius: 999px;
background: rgba(255, 255, 255, 0.1);
}
.progress-track::before {
content: "";
position: absolute;
inset: 0;
width: 45%;
border-radius: inherit;
background: linear-gradient(90deg, rgb(251, 146, 60), rgb(250, 204, 21));
animation: indeterminate 1.8s ease-in-out infinite;
box-shadow:
0 0 10px rgba(251, 146, 60, 0.5),
0 0 20px rgba(251, 146, 60, 0.28);
}
@keyframes ping {
75%, 100% {
transform: scale(2);
opacity: 0;
}
}
@keyframes spin {
from { transform: rotate(0deg); }
to { transform: rotate(360deg); }
}
@keyframes indeterminate {
0% { transform: translateX(-110%); }
50% { transform: translateX(80%); }
100% { transform: translateX(230%); }
}
@media (max-width: 900px) {
.container {
padding: 18px;
}
.header {
align-items: flex-start;
flex-direction: column;
}
.status-strip {
max-width: none;
}
.main-grid {
grid-template-columns: 1fr;
}
}
@media (max-width: 520px) {
.glass-card {
padding: 18px;
}
.logo-gradient-border img {
width: 56px;
height: 56px;
}
.detail-row {
align-items: flex-start;
flex-direction: column;
gap: 4px;
}
.detail-row span:last-child {
text-align: left;
}
}
</style>
</head>
<body>
<div class="bg-perspective-container">
<div class="bg-layer"></div>
</div>
<div class="overlay"></div>
<main class="container">
<section class="glass-card">
<div class="header">
<div class="logo-gradient-border">
<img src="/assets/img/app-icons/fedimint.jpg" alt="Fedimint Guardian">
</div>
<div class="title">
<h1>Fedimint Guardian</h1>
<p>Guardian is installed and will open here automatically when Bitcoin Knots finishes initial block download.</p>
</div>
<div class="info-card status-strip">
<div class="status-dot-wrap">
<div class="status-dot"></div>
<div class="status-dot-ping"></div>
</div>
<div>
<p class="label">Status</p>
<p class="value">Waiting for Bitcoin sync</p>
</div>
</div>
</div>
</section>
<section class="main-grid">
<div class="glass-card wait-card">
<h2>Waiting for Bitcoin sync</h2>
<p class="body-copy">Fedimint needs a synced Bitcoin RPC before federation setup can start safely.</p>
<p class="body-copy">This page refreshes every 30 seconds and switches to the Guardian UI as soon as the backend is ready.</p>
<div class="readiness">
<div class="readiness-icon" aria-hidden="true">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"></path>
</svg>
</div>
<div>
<p class="label">Readiness check</p>
<p class="value">Checking bitcoind readiness...</p>
</div>
</div>
<div class="progress-track" aria-hidden="true"></div>
</div>
<aside class="glass-card">
<div class="detail-list">
<div class="detail-row">
<span>Bitcoin backend</span>
<span>Bitcoin Knots</span>
</div>
<div class="detail-row">
<span>Guardian launch</span>
<span>Automatic</span>
</div>
<div class="detail-row">
<span>Refresh interval</span>
<span>30 seconds</span>
</div>
<div class="detail-row">
<span>Required state</span>
<span>IBD complete</span>
</div>
</div>
</aside>
</section>
</main>
</body>
</html>

View File

@ -0,0 +1,30 @@
server {
listen 8175;
server_name _;
proxy_intercept_errors on;
error_page 500 502 503 504 = @wait_page;
location /assets/ {
root /usr/share/nginx/html;
add_header Cache-Control "public, max-age=3600" always;
try_files $uri =404;
}
location / {
proxy_pass http://127.0.0.1:8177;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location @wait_page {
root /usr/share/nginx/html;
add_header Cache-Control "no-store" always;
try_files /index.html =503;
}
}

View File

@ -8,6 +8,7 @@ Type=notify
User=archipelago
Environment="ARCHIPELAGO_BIND=127.0.0.1:5678"
Environment="ARCHIPELAGO_USE_QUADLET_BACKENDS=true"
EnvironmentFile=-/var/lib/archipelago/telemetry.env
# DEV_MODE disabled in production — enabled via override.conf on dev servers
Environment="XDG_RUNTIME_DIR=/run/user/1000"
# + prefix runs these as root (needed for chown/mkdir outside ReadWritePaths)

View File

@ -148,6 +148,34 @@ server {
error_page 504 = @backend_timeout;
}
# JSON-RPC endpoint. Browser GETs are navigational mistakes, so send them
# back to the dashboard while keeping RPC POSTs proxied to the backend.
location = /rpc/v1 {
if ($request_method = GET) {
return 303 /;
}
if ($request_method = HEAD) {
return 303 /;
}
limit_req zone=rpc burst=40 nodelay;
limit_req_status 429;
proxy_pass http://127.0.0.1:5678;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
# Limit request body to 1MB for RPC calls
client_max_body_size 1m;
# Increase timeout for long-running operations (e.g., Docker image pulls)
proxy_connect_timeout 600s;
proxy_send_timeout 600s;
proxy_read_timeout 600s;
error_page 502 503 = @backend_unavailable;
error_page 504 = @backend_timeout;
}
# Proxy API requests to backend
location /rpc/ {
limit_req zone=rpc burst=40 nodelay;
@ -896,23 +924,6 @@ server {
}
}
# Compatibility proxy for cached PWA bundles that still launch Nginx Proxy
# Manager on :81. Rootless Podman cannot bind host ports below 1024, so the
# container admin UI runs on :8081 and host nginx owns the old :81 entrypoint.
server {
listen 81;
server_name _;
location / {
proxy_pass http://127.0.0.1:8081/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
# HTTPS - required for PWA install (Add to Home Screen) from dev servers
server {
listen 443 ssl default_server;

View File

@ -6,6 +6,19 @@ set -e
echo "🐳 Configuring Podman for rootless operation..."
if ! command -v catatonit >/dev/null 2>&1; then
if command -v apt-get >/dev/null 2>&1; then
apt-get update || true
apt-get install -y catatonit || true
elif command -v dnf >/dev/null 2>&1; then
dnf install -y catatonit || true
elif command -v apk >/dev/null 2>&1; then
apk add catatonit || true
fi
fi
command -v catatonit >/dev/null 2>&1 || echo "WARNING: catatonit not installed; Podman init-enabled containers may fail"
# Ensure archipelago user exists
if ! id "archipelago" &>/dev/null; then
echo "Creating archipelago user..."

View File

@ -177,11 +177,16 @@ load_spec_bitcoin-knots() {
SPEC_DATA_UID="100101:100101"
local btc_dbcache=4096
[ "${LOW_MEM:-false}" = "true" ] && btc_dbcache=2048
local btc_rpc_headroom="-rpcthreads=16 -rpcworkqueue=256"
local btc_txrelay_flags="-rpcwhitelistdefault=0"
if [ -f "$SECRETS_DIR/bitcoin-rpc-txrelay-rpcauth" ]; then
btc_txrelay_flags="$btc_txrelay_flags -rpcauth=$(cat "$SECRETS_DIR/bitcoin-rpc-txrelay-rpcauth") -rpcwhitelist=txrelay:sendrawtransaction,testmempoolaccept,getmempoolinfo,getrawmempool,getmempoolentry,getnetworkinfo,getblockchaininfo,getblockcount,getblockhash,getblockheader,getrawtransaction,decoderawtransaction,decodescript,estimatesmartfee"
fi
# Dynamic: prune on small disk
if [ "${DISK_GB:-0}" -lt 1000 ]; then
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${btc_dbcache} -par=0 -maxconnections=125"
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${btc_dbcache} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
else
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125"
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
fi
}
@ -518,11 +523,12 @@ load_spec_portainer() {
SPEC_NAME="portainer"
SPEC_IMAGE="${PORTAINER_IMAGE}"
SPEC_PORTS="9000:9000"
SPEC_VOLUMES="/var/lib/archipelago/portainer:/data /run/user/1000/podman/podman.sock:/var/run/docker.sock"
SPEC_VOLUMES="/var/lib/archipelago/portainer:/data /run/user/1000/podman/podman.sock:/var/run/docker.sock /var/lib/archipelago/portainer/compose:/data/compose"
SPEC_MEMORY="$(mem_limit portainer)"
SPEC_HEALTH_CMD="curl -sf http://localhost:9000/ || exit 1"
SPEC_TIER="3"
SPEC_DATA_DIR="/var/lib/archipelago/portainer"
SPEC_DATA_UID="1000:1000"
SPEC_OPTIONAL="true"
}

View File

@ -5,3 +5,7 @@
# Edit deploy-config.sh and set ARCHIPELAGO_PASSWORD
#
export ARCHIPELAGO_PASSWORD='your_password_here'
# Optional: central beta telemetry collector RPC endpoint.
# The reporter sends telemetry.ingest JSON-RPC requests here when users opt in.
# export TELEMETRY_COLLECTOR_URL='https://YOUR-COLLECTOR-HOST/rpc/v1'

View File

@ -17,6 +17,7 @@ set -eo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
TARGET_DIR="/home/archipelago/archy"
PODMAN_IMAGE_CHECK_TIMEOUT="${PODMAN_IMAGE_CHECK_TIMEOUT:-10}"
# Load deploy config defaults (IP addresses etc.)
[ -f "$SCRIPT_DIR/deploy-config-defaults.sh" ] && . "$SCRIPT_DIR/deploy-config-defaults.sh"
@ -186,7 +187,7 @@ deploy_node() {
# Transfer custom UI images (individual tarballs — never combined)
echo " Transferring custom UI images..."
for ui_img in bitcoin-ui lnd-ui electrs-ui; do
HAS_IMG=$(ssh $SSH_OPTS "$BUILD_SOURCE" "podman images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null | grep -q '${ui_img}:' && echo yes || echo no" 2>/dev/null)
HAS_IMG=$(ssh $SSH_OPTS "$BUILD_SOURCE" "timeout --kill-after=2s ${PODMAN_IMAGE_CHECK_TIMEOUT}s podman image exists 'localhost/${ui_img}:local' 2>/dev/null && echo yes || echo no" 2>/dev/null)
if [ "$HAS_IMG" = "yes" ]; then
echo " $ui_img..."
if ssh $SSH_OPTS "$BUILD_SOURCE" "podman save 'localhost/${ui_img}:local' 2>/dev/null" > "/tmp/${ui_img}.tar" 2>/dev/null && [ -s "/tmp/${ui_img}.tar" ]; then
@ -926,12 +927,19 @@ LNDCONF
if \$DOCKER ps -a --format '{{.Names}}' 2>/dev/null | grep -qx portainer; then
\$DOCKER start portainer 2>/dev/null || true
else
sudo mkdir -p /var/lib/archipelago/portainer
sudo mkdir -p /var/lib/archipelago/portainer/compose
sudo chown -R archipelago:archipelago /var/lib/archipelago/portainer 2>/dev/null || true
if [ ! -e /data ]; then
sudo ln -s /var/lib/archipelago/portainer /data 2>/dev/null || true
elif [ -d /data ] && [ ! -L /data ] && [ ! -e /data/compose ]; then
sudo ln -s /var/lib/archipelago/portainer/compose /data/compose 2>/dev/null || true
fi
\$DOCKER run -d --name portainer --restart unless-stopped \
--health-cmd 'curl -sf http://localhost:9000/' --health-interval=30s --health-timeout=5s --health-retries=3 \
--cap-drop ALL --cap-add CHOWN --cap-add SETUID --cap-add SETGID --cap-add DAC_OVERRIDE \
--security-opt no-new-privileges:true \
-p 9000:9000 -v /var/lib/archipelago/portainer:/data \
-v /var/lib/archipelago/portainer/compose:/data/compose \
-v /run/user/1000/podman/podman.sock:/var/run/docker.sock \
$PORTAINER_IMAGE
fi

View File

@ -421,6 +421,20 @@ deploy_secondary() {
rm -f /tmp/archipelago.service
' 2>/dev/null || true
fi
if [ -n "${TELEMETRY_COLLECTOR_URL:-}" ]; then
echo " Syncing telemetry collector config to .$SEC_LABEL..."
TMP_TELEMETRY_ENV="$(mktemp)"
printf 'TELEMETRY_COLLECTOR_URL=%s\n' "$TELEMETRY_COLLECTOR_URL" > "$TMP_TELEMETRY_ENV"
scp $SSH_OPTS "$TMP_TELEMETRY_ENV" "$SEC_TARGET:/tmp/telemetry.env" 2>/dev/null || true
rm -f "$TMP_TELEMETRY_ENV"
ssh $SSH_OPTS "$SEC_TARGET" '
sudo mkdir -p /var/lib/archipelago
sudo cp /tmp/telemetry.env /var/lib/archipelago/telemetry.env
sudo chown archipelago:archipelago /var/lib/archipelago/telemetry.env
sudo chmod 600 /var/lib/archipelago/telemetry.env
rm -f /tmp/telemetry.env
' 2>/dev/null || true
fi
# Deploy udev rule for mesh radio
UDEV_RULE="$PROJECT_DIR/image-recipe/configs/99-mesh-radio.rules"
@ -682,6 +696,20 @@ if [ "$LIVE" = true ]; then
rm -f /tmp/archipelago.service
' 2>/dev/null || true
fi
if [ -n "${TELEMETRY_COLLECTOR_URL:-}" ]; then
progress "Syncing telemetry collector config"
TMP_TELEMETRY_ENV="$(mktemp)"
printf 'TELEMETRY_COLLECTOR_URL=%s\n' "$TELEMETRY_COLLECTOR_URL" > "$TMP_TELEMETRY_ENV"
scp $SSH_OPTS "$TMP_TELEMETRY_ENV" "$TARGET_HOST:/tmp/telemetry.env" 2>/dev/null || true
rm -f "$TMP_TELEMETRY_ENV"
ssh $SSH_OPTS "$TARGET_HOST" '
sudo mkdir -p /var/lib/archipelago
sudo cp /tmp/telemetry.env /var/lib/archipelago/telemetry.env
sudo chown archipelago:archipelago /var/lib/archipelago/telemetry.env
sudo chmod 600 /var/lib/archipelago/telemetry.env
rm -f /tmp/telemetry.env
' 2>/dev/null || true
fi
# Deploy udev rule for mesh radio stable naming (/dev/mesh-radio)
UDEV_RULE="$PROJECT_DIR/image-recipe/configs/99-mesh-radio.rules"

View File

@ -431,6 +431,17 @@ fi
# Rootless podman prerequisites (run as root, configures for archipelago user)
log "Setting up rootless podman prerequisites..."
if ! command -v catatonit >/dev/null 2>&1; then
log "Installing catatonit for Podman init support..."
if command -v apt-get >/dev/null 2>&1; then
apt-get update >>"$LOG" 2>&1 || true
apt-get install -y catatonit >>"$LOG" 2>&1 || true
elif command -v dnf >/dev/null 2>&1; then
dnf install -y catatonit >>"$LOG" 2>&1 || true
elif command -v apk >/dev/null 2>&1; then
apk add catatonit >>"$LOG" 2>&1 || true
fi
fi
# Allow binding to ports >= 80 (rootless default is 1024)
if ! grep -q "unprivileged_port_start=80" /etc/sysctl.d/99-rootless-podman.conf 2>/dev/null; then
echo "net.ipv4.ip_unprivileged_port_start=80" > /etc/sysctl.d/99-rootless-podman.conf
@ -612,7 +623,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -qE 'bitcoin-knots|arch
-v /var/lib/archipelago/bitcoin:/home/bitcoin/.bitcoin \
"${BITCOIN_KNOTS_IMAGE}" \
$BTC_EXTRA_ARGS \
-printtoconsole=1 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 2>>"$LOG"; then
-printtoconsole=1 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 -rpcthreads=16 -rpcworkqueue=256 2>>"$LOG"; then
log "Bitcoin Knots started"
else
log "Bitcoin Knots failed (may already exist)"
@ -1202,7 +1213,13 @@ fi
track_container "nginx-proxy-manager"
if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q portainer; then
log "Creating Portainer..."
mkdir -p /var/lib/archipelago/portainer
mkdir -p /var/lib/archipelago/portainer/compose
chown -R archipelago:archipelago /var/lib/archipelago/portainer 2>/dev/null || true
if [ ! -e /data ]; then
ln -s /var/lib/archipelago/portainer /data 2>/dev/null || true
elif [ -d /data ] && [ ! -L /data ] && [ ! -e /data/compose ]; then
ln -s /var/lib/archipelago/portainer/compose /data/compose 2>/dev/null || true
fi
$DOCKER run -d --name portainer --restart unless-stopped \
--health-cmd="curl -sf http://localhost:9000/ || exit 1" --health-interval=120s --health-timeout=5s --health-retries=3 \
--memory=$(mem_limit portainer) \
@ -1210,7 +1227,8 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q portainer; then
--security-opt no-new-privileges:true \
-p 9000:9000 \
-v /var/lib/archipelago/portainer:/data \
-v /var/run/podman/podman.sock:/var/run/docker.sock \
-v /var/lib/archipelago/portainer/compose:/data/compose \
-v /run/user/$(id -u archipelago)/podman/podman.sock:/var/run/docker.sock \
"$PORTAINER_IMAGE" 2>>"$LOG" || true
fi
track_container "portainer"
@ -1232,7 +1250,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q tailscale; then
-v /var/lib/archipelago/tailscale:/var/lib/tailscale \
-e TS_STATE_DIR=/var/lib/tailscale \
"$TAILSCALE_IMAGE" \
sh -c 'tailscaled --tun=userspace-networking & sleep 2; tailscale web --listen 0.0.0.0:8240 & wait' 2>>"$LOG" || true
sh -c 'tailscaled --tun=userspace-networking & for i in $(seq 1 30); do [ -S /var/run/tailscale/tailscaled.sock ] && break; sleep 1; done; tailscale web --listen 0.0.0.0:8240 & wait' 2>>"$LOG" || true
fi
track_container "tailscale"

View File

@ -10,6 +10,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# This script: stops broken containers, removes them, recreates with correct images.
echo "=== IndeedHub Container Fix Script ==="
PODMAN_IMAGE_CHECK_TIMEOUT="${PODMAN_IMAGE_CHECK_TIMEOUT:-10}"
# Detect node IP (Tailscale or LAN)
NODE_IP=$(hostname -I | awk '{for(i=1;i<=NF;i++) if($i ~ /^100\./) print $i}')
@ -29,7 +30,7 @@ fi
# Verify correct images are available
echo "Verifying images..."
for img in "${INDEEDHUB_REDIS_IMAGE}" "${MINIO_IMAGE}" "${INDEEDHUB_POSTGRES_IMAGE}" "${NOSTR_RS_RELAY_IMAGE}" "${SEARXNG_IMAGE}" "localhost/indeedhub:local" "localhost/indeedhub-build_api:local" "localhost/indeedhub-build_ffmpeg-worker:local"; do
if ! podman image exists "$img" 2>/dev/null; then
if ! timeout --kill-after=2s "${PODMAN_IMAGE_CHECK_TIMEOUT}s" podman image exists "$img" 2>/dev/null; then
echo "ERROR: Missing image $img"
exit 1
fi

View File

@ -98,6 +98,11 @@ alloc_port() {
# Run as archipelago user — podman sees rootless containers directly.
# Use sudo only for chown/mkdir operations.
PODMAN="podman"
PODMAN_IMAGE_CHECK_TIMEOUT="${PODMAN_IMAGE_CHECK_TIMEOUT:-10}"
podman_bounded() {
timeout --kill-after=2s "${PODMAN_IMAGE_CHECK_TIMEOUT}s" "$PODMAN" "$@"
}
# ── Pre-flight ───────────────────────────────────────────────────────
header "╔══════════════════════════════════════════════════╗"
@ -152,7 +157,7 @@ container_image_id() {
}
spec_image_id() {
$PODMAN image inspect "$SPEC_IMAGE" --format '{{.Id}}' 2>/dev/null
podman_bounded image inspect "$SPEC_IMAGE" --format '{{.Id}}' 2>/dev/null
}
container_network() {
@ -218,6 +223,39 @@ prepare_bind_source() {
esac
}
ensure_catatonit() {
command -v catatonit >/dev/null 2>&1 && return 0
$CHECK_ONLY && { info "catatonit missing (would install)"; return 0; }
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update >/dev/null 2>&1 || true
sudo apt-get install -y catatonit >/dev/null 2>&1 || true
elif command -v dnf >/dev/null 2>&1; then
sudo dnf install -y catatonit >/dev/null 2>&1 || true
elif command -v apk >/dev/null 2>&1; then
sudo apk add catatonit >/dev/null 2>&1 || true
fi
command -v catatonit >/dev/null 2>&1 || { fail "catatonit missing; Portainer compose builds may fail"; return 1; }
}
ensure_portainer_host_paths() {
ensure_catatonit
if $CHECK_ONLY; then
[ -d /var/lib/archipelago/portainer/compose ] || info "Portainer compose dir missing (would create)"
[ -e /data ] || info "/data host path missing (would link to /var/lib/archipelago/portainer)"
return 0
fi
sudo mkdir -p /var/lib/archipelago/portainer/compose 2>/dev/null || true
sudo chown -R 1000:1000 /var/lib/archipelago/portainer 2>/dev/null || true
if [ ! -e /data ]; then
sudo ln -s /var/lib/archipelago/portainer /data 2>/dev/null || true
elif [ -d /data ] && [ ! -L /data ] && [ ! -e /data/compose ]; then
sudo ln -s /var/lib/archipelago/portainer/compose /data/compose 2>/dev/null || true
fi
}
container_has_mount() {
local name="$1" source="$2" target="$3"
$PODMAN inspect "$name" --format '{{range .Mounts}}{{println .Source "|" .Destination}}{{end}}' 2>/dev/null \
@ -250,13 +288,7 @@ container_env_val() {
URL_ENV_SUFFIXES="_URL _HOST _ENDPOINT"
image_exists() {
# Note: `grep -q` closes stdin after first match → SIGPIPE (exit 141) on podman.
# With `set -o pipefail` active in the parent script, that propagates as failure
# and spuriously skips local-image containers. Use a full scan + explicit match
# check to keep the exit code stable regardless of pipefail.
local images
images=$($PODMAN images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null)
echo "$images" | grep -qF "$1"
podman_bounded image exists "$1" >/dev/null 2>&1
}
resolve_spec_image() {
@ -280,7 +312,7 @@ resolve_spec_image() {
fi
done
repo=$($PODMAN images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null \
repo=$(podman_bounded images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null \
| grep -E "/${image_name}:${image_tag}$" \
| head -1 || true)
if [ -n "$repo" ]; then
@ -377,6 +409,8 @@ reconcile() {
return
fi
[ "$name" = "portainer" ] && ensure_portainer_host_paths
# Filter by tier
[ -n "$FILTER_TIER" ] && [ "$SPEC_TIER" != "$FILTER_TIER" ] && return
@ -701,7 +735,7 @@ BTCEOF
# bitcoin_rw.conf, so clean both files.
for conf in "$BITCOIN_CONF" "/var/lib/archipelago/bitcoin/bitcoin_rw.conf"; do
if [ -f "$conf" ]; then
sudo sed -i '/^server=/d; /^txindex=/d; /^rpcbind=/d; /^rpcallowip=/d; /^rpcport=/d; /^listen=/d; /^bind=/d; /^dbcache=/d' "$conf" 2>/dev/null
sudo sed -i '/^server=/d; /^txindex=/d; /^rpcbind=/d; /^rpcallowip=/d; /^rpcport=/d; /^listen=/d; /^bind=/d; /^dbcache=/d; /^rpcthreads=/d; /^rpcworkqueue=/d' "$conf" 2>/dev/null
fi
done
sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin 2>/dev/null

View File

@ -158,6 +158,7 @@ image_for() {
dwn) echo "146.59.87.168:3000/lfg2025/dwn-server:main" ;;
botfights) echo "146.59.87.168:3000/lfg2025/botfights:1.1.0" ;;
gitea) echo "docker.io/gitea/gitea:1.23" ;;
meshtastic) echo "docker.io/meshtastic/meshtasticd:daily-alpine" ;;
*) return 1 ;;
esac
}
@ -219,6 +220,8 @@ rpc_call() {
payload=$(jq -nc --arg m "$method" --argjson p "$params" --argjson id "$id" '{jsonrpc:"2.0",method:$m,params:$p,id:$id}')
fi
curl -sk -X POST "${BASE_URL}/rpc/v1" \
--connect-timeout 8 \
-m "${ARCHY_RPC_TIMEOUT:-60}" \
-H 'Content-Type: application/json' \
-H "Cookie: session=${SESSION}; csrf_token=${CSRF}" \
-H "X-CSRF-Token: ${CSRF}" \
@ -244,9 +247,16 @@ container_state() {
}
container_health() {
local app="$1"
local app="$1" health
health=$(
ARCHY_RPC_TIMEOUT="${ARCHY_HEALTH_RPC_TIMEOUT:-20}" \
rpc_result container-health "$(jq -nc --arg app "$app" '{app_id:$app}')" \
| jq -r --arg app "$app" '.[$app] // "unknown" | ascii_downcase'
) || health=unknown
if [[ "$app" == "indeedhub" && "$health" != "healthy" ]] && probe_launch "$app" >/dev/null 2>&1; then
health=healthy
fi
printf '%s\n' "$health"
}
assert_container_healthy() {
@ -277,6 +287,10 @@ observe_stable() {
while (( $(date +%s) < deadline )); do
state=$(container_state "$app" 2>/dev/null || echo unknown)
if [[ "$state" != "running" ]]; then
if [[ "$app" == "indeedhub" ]] && probe_launch "$app" >/dev/null 2>&1; then
sleep 5
continue
fi
echo "stability failed: $app left running state (last=$state)" >&2
return 1
fi
@ -292,7 +306,9 @@ wait_state() {
while (( $(date +%s) < deadline )); do
state=$(container_state "$app" 2>/dev/null || echo unknown)
if [[ "$target" == "absent" && "$state" == "absent" ]]; then return 0; fi
if [[ "$target" == "stopped" && "$state" == "absent" ]]; then return 0; fi
if [[ "$target" != "absent" && "$state" == "$target" ]]; then return 0; fi
if [[ "$app" == "indeedhub" && "$target" == "running" ]] && probe_launch "$app" >/dev/null 2>&1; then return 0; fi
sleep 5
done
echo "$app did not reach $target within ${timeout}s (last=$state)" >&2
@ -346,6 +362,8 @@ probe_launch() {
case "$app" in
lnd) probe_lnd_wallet_connect "$body" || { rm -f "$body"; return 1; } ;;
electrumx|electrs|mempool-electrs) probe_electrum_wallet_connect "$body" || { rm -f "$body"; return 1; } ;;
indeedhub) probe_indeedhub_nostr_signer "$body" || { rm -f "$body"; return 1; } ;;
tailscale) probe_tailscale_login_ui "$body" || { rm -f "$body"; return 1; } ;;
esac
rm -f "$body"
}
@ -362,6 +380,7 @@ wait_launch() {
assert_launch_metadata() {
local app="$1" timeout="${2:-$ARCHY_TIMEOUT}" deadline lan
launch_url_for "$app" >/dev/null 2>&1 || return 0
deadline=$(( $(date +%s) + timeout ))
while (( $(date +%s) < deadline )); do
lan=$(rpc_result container-list | jq -r --arg app "$app" '
@ -436,6 +455,47 @@ probe_electrum_wallet_connect() {
}
}
probe_indeedhub_nostr_signer() {
local body="$1" provider pubkey signed now
require_body "$body" '/nostr-provider.js' 'IndeedHub Nostr provider injection' || return 1
provider=$(curl -skL --connect-timeout 8 -m 20 "http://${ARCHY_HOST}:7778/nostr-provider.js" || true)
if [[ -z "$provider" ]]; then
echo "indeedhub nostr-provider.js unavailable" >&2
return 1
fi
printf '%s' "$provider" | grep -Eq 'window\.nostr|nostr' || {
echo "indeedhub nostr-provider.js does not look like a Nostr signer bridge" >&2
return 1
}
pubkey=$(rpc_result node.nostr-pubkey | jq -r '.nostr_pubkey // empty')
if ! [[ "$pubkey" =~ ^[0-9a-fA-F]{64}$ ]]; then
echo "indeedhub Nostr signer pubkey unavailable: $pubkey" >&2
return 1
fi
now=$(date +%s)
signed=$(rpc_result node.nostr-sign "$(jq -nc --argjson created_at "$now" '{event:{kind:1,created_at:$created_at,tags:[],content:"archy lifecycle indeedhub signer probe"}}')")
printf '%s' "$signed" | jq -e --arg pubkey "$pubkey" '
.pubkey == $pubkey and
(.id | type == "string" and test("^[0-9a-f]{64}$")) and
(.sig | type == "string" and test("^[0-9a-f]{128}$")) and
.content == "archy lifecycle indeedhub signer probe"
' >/dev/null || {
echo "indeedhub Nostr signer did not return a valid signed event: $signed" >&2
return 1
}
}
probe_tailscale_login_ui() {
local body="$1"
if grep -Eiq 'tailscale|login|log in|sign in|authenticate|authorize|auth key|connect' "$body"; then
return 0
fi
echo "tailscale launch did not present login/auth UI content" >&2
return 1
}
install_app() {
local app="$1" app_json image params
app_json=$(catalog_app_json "$app" || true)