2026-03-12 00:19:30 +00:00
|
|
|
// Crash Recovery Module
|
|
|
|
|
// Detects unexpected shutdowns and restarts containers that were running before the crash.
|
|
|
|
|
//
|
|
|
|
|
// How it works:
|
|
|
|
|
// 1. On startup, write a PID file as a "running" marker
|
|
|
|
|
// 2. Periodically snapshot which containers are running to a state file
|
|
|
|
|
// 3. On clean shutdown, remove the PID file
|
|
|
|
|
// 4. On next startup, if the PID file exists → previous instance crashed
|
|
|
|
|
// 5. On crash recovery: read the saved container list, restart them, log actions
|
|
|
|
|
|
|
|
|
|
use anyhow::{Context, Result};
|
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
use std::path::{Path, PathBuf};
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2026-03-21 01:02:16 +00:00
|
|
|
use std::time::Instant;
|
2026-03-12 00:19:30 +00:00
|
|
|
use tokio::fs;
|
|
|
|
|
use tracing::{info, warn};
|
|
|
|
|
|
|
|
|
|
const PID_FILE: &str = "archipelago.pid";
|
|
|
|
|
const CONTAINER_STATE_FILE: &str = "running-containers.json";
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
const USER_STOPPED_FILE: &str = "user-stopped.json";
|
|
|
|
|
|
|
|
|
|
/// Shared flag: true once boot recovery is complete. Health monitor should wait for this.
|
|
|
|
|
pub static RECOVERY_COMPLETE: AtomicBool = AtomicBool::new(false);
|
|
|
|
|
|
2026-03-21 01:02:16 +00:00
|
|
|
/// Process start time for uptime calculation.
|
|
|
|
|
static START_TIME: std::sync::OnceLock<Instant> = std::sync::OnceLock::new();
|
|
|
|
|
|
|
|
|
|
/// Initialize the start time. Call once at startup.
|
|
|
|
|
pub fn init_start_time() {
|
|
|
|
|
START_TIME.get_or_init(Instant::now);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get uptime in seconds since process start.
|
|
|
|
|
pub fn uptime_seconds() -> u64 {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
START_TIME.get().map(|t| t.elapsed().as_secs()).unwrap_or(0)
|
2026-03-21 01:02:16 +00:00
|
|
|
}
|
|
|
|
|
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
/// Mark boot recovery as complete. Call after crash recovery + start_stopped_containers finish.
|
|
|
|
|
pub fn mark_recovery_complete() {
|
|
|
|
|
RECOVERY_COMPLETE.store(true, Ordering::SeqCst);
|
|
|
|
|
info!("Boot recovery complete — health monitor may proceed");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if boot recovery is done.
|
|
|
|
|
pub fn is_recovery_complete() -> bool {
|
|
|
|
|
RECOVERY_COMPLETE.load(Ordering::SeqCst)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── User-stopped tracking ───────────────────────────────────────────────
|
|
|
|
|
// When a user explicitly stops a container via the UI, we record it here
|
|
|
|
|
// so crash recovery and health monitor don't auto-restart it.
|
|
|
|
|
|
|
|
|
|
/// Load the set of user-stopped containers from disk.
|
|
|
|
|
pub async fn load_user_stopped(data_dir: &Path) -> std::collections::HashSet<String> {
|
|
|
|
|
let path = data_dir.join(USER_STOPPED_FILE);
|
|
|
|
|
match fs::read_to_string(&path).await {
|
|
|
|
|
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
|
|
|
|
|
Err(_) => std::collections::HashSet::new(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Save the set of user-stopped containers to disk.
|
|
|
|
|
pub async fn save_user_stopped(data_dir: &Path, stopped: &std::collections::HashSet<String>) {
|
|
|
|
|
let path = data_dir.join(USER_STOPPED_FILE);
|
|
|
|
|
if let Ok(json) = serde_json::to_string_pretty(stopped) {
|
|
|
|
|
let _ = fs::write(&path, json).await;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Mark a container as user-stopped (won't be auto-restarted).
|
|
|
|
|
pub async fn mark_user_stopped(data_dir: &Path, name: &str) {
|
|
|
|
|
let mut stopped = load_user_stopped(data_dir).await;
|
|
|
|
|
stopped.insert(name.to_string());
|
|
|
|
|
save_user_stopped(data_dir, &stopped).await;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Clear user-stopped flag (container was manually started by user).
|
|
|
|
|
pub async fn clear_user_stopped(data_dir: &Path, name: &str) {
|
|
|
|
|
let mut stopped = load_user_stopped(data_dir).await;
|
|
|
|
|
if stopped.remove(name) {
|
|
|
|
|
save_user_stopped(data_dir, &stopped).await;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
pub struct RunningContainerRecord {
|
|
|
|
|
pub name: String,
|
|
|
|
|
pub image: String,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
|
|
|
struct ContainerSnapshot {
|
|
|
|
|
pub timestamp: u64,
|
|
|
|
|
pub containers: Vec<RunningContainerRecord>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if the previous instance crashed (PID file exists without a clean shutdown).
|
|
|
|
|
/// Returns the list of containers that were running before the crash, if any.
|
|
|
|
|
pub async fn check_for_crash(data_dir: &Path) -> Result<Option<Vec<RunningContainerRecord>>> {
|
|
|
|
|
let pid_path = data_dir.join(PID_FILE);
|
|
|
|
|
|
|
|
|
|
if !pid_path.exists() {
|
|
|
|
|
return Ok(None);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// PID file exists — previous instance didn't shut down cleanly
|
|
|
|
|
let old_pid = fs::read_to_string(&pid_path)
|
|
|
|
|
.await
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
.trim()
|
|
|
|
|
.to_string();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
warn!(
|
|
|
|
|
"Crash detected: previous instance (PID {}) did not shut down cleanly",
|
|
|
|
|
old_pid
|
|
|
|
|
);
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
// Check if that PID is actually still running (zombie/stuck process)
|
|
|
|
|
if !old_pid.is_empty() {
|
|
|
|
|
if let Ok(pid) = old_pid.parse::<u32>() {
|
|
|
|
|
if is_process_running(pid) {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
warn!(
|
|
|
|
|
"Previous process (PID {}) is still running — not a crash, skipping recovery",
|
|
|
|
|
pid
|
|
|
|
|
);
|
2026-03-12 00:19:30 +00:00
|
|
|
// Remove stale PID file and skip recovery
|
|
|
|
|
let _ = fs::remove_file(&pid_path).await;
|
|
|
|
|
return Ok(None);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load the saved container snapshot
|
|
|
|
|
let state_path = data_dir.join(CONTAINER_STATE_FILE);
|
|
|
|
|
let containers = if state_path.exists() {
|
|
|
|
|
match fs::read_to_string(&state_path).await {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
Ok(content) => match serde_json::from_str::<ContainerSnapshot>(&content) {
|
|
|
|
|
Ok(snapshot) => {
|
|
|
|
|
info!(
|
|
|
|
|
"Found {} containers from pre-crash snapshot (saved at {})",
|
|
|
|
|
snapshot.containers.len(),
|
|
|
|
|
snapshot.timestamp
|
|
|
|
|
);
|
|
|
|
|
snapshot.containers
|
2026-03-12 00:19:30 +00:00
|
|
|
}
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
Err(e) => {
|
|
|
|
|
warn!("Failed to parse container snapshot: {}", e);
|
|
|
|
|
Vec::new()
|
|
|
|
|
}
|
|
|
|
|
},
|
2026-03-12 00:19:30 +00:00
|
|
|
Err(e) => {
|
|
|
|
|
warn!("Failed to read container snapshot: {}", e);
|
|
|
|
|
Vec::new()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
info!("No container snapshot found — cannot determine which containers were running");
|
|
|
|
|
Vec::new()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Clean up the stale PID file
|
|
|
|
|
let _ = fs::remove_file(&pid_path).await;
|
|
|
|
|
|
|
|
|
|
if containers.is_empty() {
|
|
|
|
|
Ok(None)
|
|
|
|
|
} else {
|
|
|
|
|
Ok(Some(containers))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Write the PID file to mark the current instance as running.
|
|
|
|
|
pub async fn write_pid_marker(data_dir: &Path) -> Result<()> {
|
|
|
|
|
let pid = std::process::id();
|
|
|
|
|
let pid_path = data_dir.join(PID_FILE);
|
|
|
|
|
fs::write(&pid_path, pid.to_string())
|
|
|
|
|
.await
|
|
|
|
|
.context("Failed to write PID marker")?;
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Remove the PID file on clean shutdown.
|
|
|
|
|
pub async fn remove_pid_marker(data_dir: &Path) {
|
|
|
|
|
let pid_path = data_dir.join(PID_FILE);
|
|
|
|
|
if let Err(e) = fs::remove_file(&pid_path).await {
|
|
|
|
|
warn!("Failed to remove PID marker: {}", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Save a snapshot of currently running containers to disk.
|
|
|
|
|
/// Called periodically so we know what to restart after a crash.
|
|
|
|
|
pub async fn save_container_snapshot(data_dir: &Path) -> Result<()> {
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
let output = tokio::time::timeout(
|
|
|
|
|
std::time::Duration::from_secs(30),
|
feat: rootless podman, session hardening, boot stability, sidebar fix
Rootless podman migration (TASK-11):
- Remove sudo from all podman calls in PodmanClient + 8 backend files
- Remove sudo from all podman/docker calls in deploy script
- Restore full systemd security hardening: NoNewPrivileges,
RestrictAddressFamilies, MemoryDenyWriteExecute, RestrictRealtime,
RestrictNamespaces, RestrictSUIDSGID, SystemCallFilter, ProtectSystem=strict
- Enable loginctl linger for rootless container persistence
- Remove Ollama from auto-deploy (marketplace-only)
Session & auth hardening:
- Increase MAX_CONCURRENT_SESSIONS 20→50 (prevents eviction storms)
- Debounced 401 redirect in rpc-client.ts (prevents redirect storms)
Boot stability:
- optimize-debian.sh: adds chrony, swap, removes policy-rc.d
- deploy script: pre-restart chrony + swap setup
- ISO build: chrony package, swap file creation
- BootScreen: no longer clears localStorage (prevents splash replay)
- RootRedirect: sole owner of localStorage clearing on server ready
UI fixes:
- Sidebar opacity default changed from 0→visible (fixes missing sidebar
after page-persistence login without entrance animation)
- Console.log/error wrapped in import.meta.env.DEV guards
- Remove unused route import from RootRedirect
Beta tracking:
- CLAUDE.md: beta freeze protocol added
- MASTER_PLAN.md: TASK-11, TASK-17, phase structure
- BETA-PROGRESS.md: initial tracking doc
- Tagged v1.2.0-alpha.1 as pre-rootless baseline
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 13:53:27 +00:00
|
|
|
tokio::process::Command::new("podman")
|
|
|
|
|
.args(["ps", "--format", "json"])
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
.output(),
|
|
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
.context("podman ps timed out (30s)")?
|
|
|
|
|
.context("Failed to run podman ps")?;
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
if !output.status.success() {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
|
|
|
anyhow::bail!("podman ps failed: {}", stderr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
let containers: Vec<serde_json::Value> = serde_json::from_str(&stdout).unwrap_or_default();
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
let records: Vec<RunningContainerRecord> = containers
|
|
|
|
|
.iter()
|
|
|
|
|
.filter_map(|c| {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
let name = c.get("Names").and_then(|v| {
|
|
|
|
|
// Podman returns Names as an array
|
|
|
|
|
if let Some(arr) = v.as_array() {
|
|
|
|
|
arr.first().and_then(|n| n.as_str()).map(|s| s.to_string())
|
|
|
|
|
} else {
|
|
|
|
|
v.as_str().map(|s| s.to_string())
|
|
|
|
|
}
|
|
|
|
|
})?;
|
|
|
|
|
let image = c
|
|
|
|
|
.get("Image")
|
2026-03-12 00:19:30 +00:00
|
|
|
.and_then(|v| v.as_str())
|
|
|
|
|
.unwrap_or("unknown")
|
|
|
|
|
.to_string();
|
|
|
|
|
Some(RunningContainerRecord { name, image })
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
let snapshot = ContainerSnapshot {
|
|
|
|
|
timestamp: std::time::SystemTime::now()
|
|
|
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
.as_secs(),
|
|
|
|
|
containers: records,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let state_path = data_dir.join(CONTAINER_STATE_FILE);
|
|
|
|
|
let json = serde_json::to_string_pretty(&snapshot)
|
|
|
|
|
.context("Failed to serialize container snapshot")?;
|
|
|
|
|
fs::write(&state_path, json)
|
|
|
|
|
.await
|
|
|
|
|
.context("Failed to write container snapshot")?;
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Recover containers that were running before a crash.
|
|
|
|
|
/// Attempts to start each container, logging success/failure.
|
|
|
|
|
pub async fn recover_containers(containers: &[RunningContainerRecord]) -> RecoveryReport {
|
|
|
|
|
let mut report = RecoveryReport {
|
|
|
|
|
total: containers.len(),
|
|
|
|
|
recovered: 0,
|
|
|
|
|
failed: Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
for (i, record) in containers.iter().enumerate() {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
info!(
|
|
|
|
|
"Recovering container: {} (image: {})",
|
|
|
|
|
record.name, record.image
|
|
|
|
|
);
|
2026-03-12 00:19:30 +00:00
|
|
|
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
// Rate-limit container starts to avoid overwhelming podman on low-resource systems
|
|
|
|
|
if i > 0 {
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
|
|
|
|
}
|
|
|
|
|
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
// Try up to 2 attempts with increasing timeout (120s first, 180s retry)
|
|
|
|
|
let mut started = false;
|
|
|
|
|
for attempt in 0..2u32 {
|
|
|
|
|
let timeout_secs = if attempt == 0 { 120 } else { 180 };
|
|
|
|
|
if attempt > 0 {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
info!(
|
|
|
|
|
"Retrying container {} (attempt {})",
|
|
|
|
|
record.name,
|
|
|
|
|
attempt + 1
|
|
|
|
|
);
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
|
2026-03-12 00:19:30 +00:00
|
|
|
}
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
let result = tokio::time::timeout(
|
|
|
|
|
std::time::Duration::from_secs(timeout_secs),
|
|
|
|
|
tokio::process::Command::new("podman")
|
|
|
|
|
.args(["start", &record.name])
|
|
|
|
|
.output(),
|
|
|
|
|
)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
match result {
|
|
|
|
|
Ok(Ok(output)) if output.status.success() => {
|
|
|
|
|
info!("Successfully restarted container: {}", record.name);
|
|
|
|
|
report.recovered += 1;
|
|
|
|
|
started = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
Ok(Ok(output)) => {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
warn!(
|
|
|
|
|
"Failed to restart container {} (attempt {}): {}",
|
|
|
|
|
record.name,
|
|
|
|
|
attempt + 1,
|
|
|
|
|
stderr.trim()
|
|
|
|
|
);
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
}
|
|
|
|
|
Ok(Err(e)) => {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
warn!(
|
|
|
|
|
"Failed to execute podman start for {} (attempt {}): {}",
|
|
|
|
|
record.name,
|
|
|
|
|
attempt + 1,
|
|
|
|
|
e
|
|
|
|
|
);
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
}
|
|
|
|
|
Err(_) => {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
warn!(
|
|
|
|
|
"Timeout starting container {} ({}s, attempt {})",
|
|
|
|
|
record.name,
|
|
|
|
|
timeout_secs,
|
|
|
|
|
attempt + 1
|
|
|
|
|
);
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
}
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
}
|
2026-03-12 00:19:30 +00:00
|
|
|
}
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
if !started {
|
|
|
|
|
report.failed.push(record.name.clone());
|
|
|
|
|
}
|
2026-03-12 00:19:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
report
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
pub struct RecoveryReport {
|
|
|
|
|
pub total: usize,
|
|
|
|
|
pub recovered: usize,
|
|
|
|
|
pub failed: Vec<String>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if a process with the given PID is still running.
|
|
|
|
|
fn is_process_running(pid: u32) -> bool {
|
|
|
|
|
// Check /proc/{pid} on Linux
|
|
|
|
|
std::path::Path::new(&format!("/proc/{}", pid)).exists()
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-13 03:55:14 +00:00
|
|
|
/// Start all stopped containers that were previously installed.
|
|
|
|
|
/// Runs on every startup to ensure containers come back after clean reboots.
|
|
|
|
|
/// The crash recovery (PID-based) handles dirty shutdowns; this handles clean ones.
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
/// Skips containers that the user intentionally stopped via the UI.
|
|
|
|
|
pub async fn start_stopped_containers(data_dir: &Path) -> RecoveryReport {
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
let output = match tokio::time::timeout(
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
std::time::Duration::from_secs(60),
|
feat: rootless podman, session hardening, boot stability, sidebar fix
Rootless podman migration (TASK-11):
- Remove sudo from all podman calls in PodmanClient + 8 backend files
- Remove sudo from all podman/docker calls in deploy script
- Restore full systemd security hardening: NoNewPrivileges,
RestrictAddressFamilies, MemoryDenyWriteExecute, RestrictRealtime,
RestrictNamespaces, RestrictSUIDSGID, SystemCallFilter, ProtectSystem=strict
- Enable loginctl linger for rootless container persistence
- Remove Ollama from auto-deploy (marketplace-only)
Session & auth hardening:
- Increase MAX_CONCURRENT_SESSIONS 20→50 (prevents eviction storms)
- Debounced 401 redirect in rpc-client.ts (prevents redirect storms)
Boot stability:
- optimize-debian.sh: adds chrony, swap, removes policy-rc.d
- deploy script: pre-restart chrony + swap setup
- ISO build: chrony package, swap file creation
- BootScreen: no longer clears localStorage (prevents splash replay)
- RootRedirect: sole owner of localStorage clearing on server ready
UI fixes:
- Sidebar opacity default changed from 0→visible (fixes missing sidebar
after page-persistence login without entrance animation)
- Console.log/error wrapped in import.meta.env.DEV guards
- Remove unused route import from RootRedirect
Beta tracking:
- CLAUDE.md: beta freeze protocol added
- MASTER_PLAN.md: TASK-11, TASK-17, phase structure
- BETA-PROGRESS.md: initial tracking doc
- Tagged v1.2.0-alpha.1 as pre-rootless baseline
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 13:53:27 +00:00
|
|
|
tokio::process::Command::new("podman")
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
.args([
|
|
|
|
|
"ps",
|
|
|
|
|
"-a",
|
|
|
|
|
"--filter",
|
|
|
|
|
"status=exited",
|
|
|
|
|
"--filter",
|
|
|
|
|
"status=created",
|
|
|
|
|
"--format",
|
|
|
|
|
"{{.Names}}",
|
|
|
|
|
])
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
.output(),
|
|
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
{
|
|
|
|
|
Ok(result) => result,
|
|
|
|
|
Err(_) => {
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
warn!("Timeout listing stopped containers (60s)");
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
return RecoveryReport {
|
|
|
|
|
total: 0,
|
|
|
|
|
recovered: 0,
|
|
|
|
|
failed: Vec::new(),
|
|
|
|
|
};
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
}
|
|
|
|
|
};
|
2026-03-13 03:55:14 +00:00
|
|
|
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
let all_names: Vec<String> = match output {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout)
|
|
|
|
|
.lines()
|
|
|
|
|
.filter(|l| !l.is_empty())
|
|
|
|
|
.map(|s| s.to_string())
|
|
|
|
|
.collect(),
|
2026-03-13 03:55:14 +00:00
|
|
|
_ => Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
if all_names.is_empty() {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
return RecoveryReport {
|
|
|
|
|
total: 0,
|
|
|
|
|
recovered: 0,
|
|
|
|
|
failed: Vec::new(),
|
|
|
|
|
};
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Filter out user-stopped containers
|
|
|
|
|
let user_stopped = load_user_stopped(data_dir).await;
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
let names: Vec<String> = all_names
|
|
|
|
|
.into_iter()
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
.filter(|n| {
|
|
|
|
|
if user_stopped.contains(n) {
|
|
|
|
|
info!("Skipping user-stopped container: {}", n);
|
|
|
|
|
false
|
|
|
|
|
} else {
|
|
|
|
|
true
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
.collect();
|
|
|
|
|
|
2026-03-13 03:55:14 +00:00
|
|
|
if names.is_empty() {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
return RecoveryReport {
|
|
|
|
|
total: 0,
|
|
|
|
|
recovered: 0,
|
|
|
|
|
failed: Vec::new(),
|
|
|
|
|
};
|
2026-03-13 03:55:14 +00:00
|
|
|
}
|
|
|
|
|
|
2026-05-05 11:29:18 -04:00
|
|
|
let names: Vec<String> = names
|
|
|
|
|
.into_iter()
|
|
|
|
|
.filter(|n| should_auto_start_stopped_container(n))
|
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
|
|
if names.is_empty() {
|
|
|
|
|
return RecoveryReport {
|
|
|
|
|
total: 0,
|
|
|
|
|
recovered: 0,
|
|
|
|
|
failed: Vec::new(),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
// Sort by startup tier: databases first, then core, then dependent services, then apps
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
let mut records: Vec<RunningContainerRecord> = names
|
|
|
|
|
.iter()
|
|
|
|
|
.map(|n| RunningContainerRecord {
|
|
|
|
|
name: n.clone(),
|
|
|
|
|
image: String::new(),
|
|
|
|
|
})
|
2026-03-13 03:55:14 +00:00
|
|
|
.collect();
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
records.sort_by_key(|r| container_boot_tier(&r.name));
|
|
|
|
|
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
info!(
|
|
|
|
|
"Starting {} stopped containers after boot (skipped {} user-stopped)...",
|
|
|
|
|
records.len(),
|
|
|
|
|
user_stopped.len()
|
|
|
|
|
);
|
2026-03-13 03:55:14 +00:00
|
|
|
recover_containers(&records).await
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-05 11:29:18 -04:00
|
|
|
fn should_auto_start_stopped_container(name: &str) -> bool {
|
|
|
|
|
// Keep generic boot recovery narrow. The Rust manifest reconciler owns
|
|
|
|
|
// managed app stacks; starting every exited Podman container here races
|
|
|
|
|
// it and resurrects legacy/orphan helper containers.
|
|
|
|
|
matches!(name, "filebrowser" | "nostr-rs-relay")
|
|
|
|
|
}
|
|
|
|
|
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
/// Simple tier ordering for boot recovery (mirrors health_monitor tiers).
|
|
|
|
|
fn container_boot_tier(name: &str) -> u8 {
|
|
|
|
|
let id = name.strip_prefix("archy-").unwrap_or(name);
|
|
|
|
|
match id {
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
// Tier 0: Databases and data stores
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
"btcpay-db" | "mempool-db" | "mysql-mempool" | "penpot-postgres" | "immich_postgres"
|
|
|
|
|
| "immich_redis" | "penpot-valkey" | "endurain-db" | "nextcloud-db"
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
| "indeedhub-postgres" | "indeedhub-redis" | "indeedhub-minio" => 0,
|
|
|
|
|
// Tier 1: Core infrastructure
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
"bitcoin-knots" | "bitcoin-core" | "bitcoin" => 1,
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
// Tier 2: Dependent services
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
"lnd" | "electrumx" | "mempool-electrs" | "electrs" | "nbxplorer" | "mempool-api"
|
|
|
|
|
| "indeedhub-api" => 2,
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
// Tier 4: Frontend/UI
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
"mempool-web" | "bitcoin-ui" | "lnd-ui" | "electrs-ui" | "penpot-frontend"
|
|
|
|
|
| "penpot-exporter" | "indeedhub" => 4,
|
fix: overhaul container lifecycle — recovery, health, uninstall, UI state
Container recovery:
- Health monitor: MAX_RESTART_ATTEMPTS 3→10, interval 60s→120s
- Dependency-aware restarts: won't restart services before their deps
- Reset dependent counters when a dependency recovers
- Handle "created" state containers (were invisible to health monitor)
- Added IndeedHub, mempool-api, mysql to tier system
- Crash recovery: podman start timeout 30s→120s with retry
- Podman client: socket timeout 5s→30s, added restart policy
UI state representation:
- Exit code 0 shows "stopped" (gray), not "crashed" (red)
- Exit code 137 shows "killed (OOM)"
- Non-zero exit shows "crashed" (red)
- Added exit_code field to PackageDataEntry
Install/uninstall fixes:
- Install returns error when container doesn't start (was silent success)
- Post-install hooks awaited instead of fire-and-forget tokio::spawn
- Uninstall: graceful rm before force, volume prune, network cleanup
- Uninstall returns error on partial failure (was 200 OK)
Config consistency:
- DB passwords read from /var/lib/archipelago/secrets/ (was hardcoded)
- Bitcoin: added ZMQ ports 28332/28333 for LND block notifications
- IndeedHub port 7777→8190 (was conflicting with strfry)
- Marketplace versions: LND 0.17.4→0.18.4, Mempool 2.5.0→3.0.0
Performance:
- Metrics collector interval 60s→300s (was duplicating health monitor)
- Podman client: proper error propagation instead of unwrap_or_default
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 07:03:57 +01:00
|
|
|
// Tier 3: Everything else
|
security+feat: v1.3.0 — pentest remediation, container reliability, UI overhaul
Security (33 pentest findings addressed):
- CRITICAL: backend binds 127.0.0.1, path traversal in tor.rs/dwn fixed
- HIGH: federation requires signatures, XSS login redirect, RBAC viewer restricted
- HIGH: tar slip prevention, S3 SSRF validation, backup ID validation
- MEDIUM: remember-me random secret, TOTP session rotation, password re-auth
- LOW: CSP unsafe-inline removed, CORS dev-only, onion/webhook validation
Container reliability:
- Memory limits on all 37 containers (OOM prevention)
- Exited vs stopped state distinction with health-aware status badges
- Crash recovery coordination (no more restart cascade)
- User-stopped tracking survives reboots
- Tiered boot recovery (databases → core → services → apps)
UI:
- Wallet TransactionsModal, health-aware app status badges
- Restart button on containers, exited/crashed red state
- Mesh view overhaul, glass button updates, BaseModal/ToggleSwitch
- Apps sticky header removed, dev faucet, mutable mock wallet
Infrastructure:
- LND REST port 8080 exposed over Tor (LND Connect fix)
- Nginx cookie_session fix, deploy script Tor config updated
- Dev environment: podman auto-start, boot mode simulation
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:44:31 +00:00
|
|
|
_ => 3,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-28 11:31:48 +00:00
|
|
|
/// Run the reconciliation script after boot to fix any config drift.
|
|
|
|
|
/// Ensures all containers match their canonical specs from container-specs.sh.
|
2026-04-02 20:28:53 +01:00
|
|
|
#[allow(dead_code)]
|
2026-03-28 11:31:48 +00:00
|
|
|
pub async fn run_boot_reconciliation() {
|
|
|
|
|
let script = "/home/archipelago/archy/scripts/reconcile-containers.sh";
|
|
|
|
|
if !std::path::Path::new(script).exists() {
|
|
|
|
|
info!("Reconciliation script not found (dev mode?) — skipping boot reconciliation");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
info!("Running boot reconciliation...");
|
|
|
|
|
let result = tokio::time::timeout(
|
|
|
|
|
std::time::Duration::from_secs(300),
|
|
|
|
|
tokio::process::Command::new(script).output(),
|
|
|
|
|
)
|
|
|
|
|
.await;
|
|
|
|
|
match result {
|
|
|
|
|
Ok(Ok(output)) if output.status.success() => {
|
|
|
|
|
info!("Boot reconciliation complete");
|
|
|
|
|
}
|
|
|
|
|
Ok(Ok(output)) => {
|
|
|
|
|
let stderr = String::from_utf8_lossy(&output.stderr);
|
|
|
|
|
warn!(
|
|
|
|
|
"Boot reconciliation had failures: {}",
|
|
|
|
|
stderr.chars().take(500).collect::<String>()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
Ok(Err(e)) => warn!("Boot reconciliation failed to run: {}", e),
|
|
|
|
|
Err(_) => warn!("Boot reconciliation timed out (300s)"),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-03-12 00:19:30 +00:00
|
|
|
/// Spawn a background task that periodically saves the container snapshot.
|
|
|
|
|
pub fn spawn_snapshot_task(data_dir: PathBuf) {
|
|
|
|
|
tokio::spawn(async move {
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
// Wait 2 minutes before first snapshot (let crash recovery finish and containers stabilize)
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(120)).await;
|
2026-03-12 00:19:30 +00:00
|
|
|
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
let mut interval = tokio::time::interval(std::time::Duration::from_secs(120));
|
2026-04-07 20:25:09 +01:00
|
|
|
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
2026-03-12 00:19:30 +00:00
|
|
|
loop {
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
if let Err(e) = save_container_snapshot(&data_dir).await {
|
|
|
|
|
tracing::debug!("Container snapshot (non-fatal): {}", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
use tempfile::TempDir;
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_no_crash_without_pid_file() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
let result = check_for_crash(tmp.path()).await.unwrap();
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_crash_detected_with_pid_file() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
// Write a PID file with a non-existent PID
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
fs::write(tmp.path().join(PID_FILE), "999999999")
|
|
|
|
|
.await
|
|
|
|
|
.unwrap();
|
2026-03-12 00:19:30 +00:00
|
|
|
let result = check_for_crash(tmp.path()).await.unwrap();
|
|
|
|
|
// No snapshot file → crash detected but no containers to recover
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_crash_with_snapshot() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
// Write PID file
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
fs::write(tmp.path().join(PID_FILE), "999999999")
|
|
|
|
|
.await
|
|
|
|
|
.unwrap();
|
2026-03-12 00:19:30 +00:00
|
|
|
// Write container snapshot
|
|
|
|
|
let snapshot = ContainerSnapshot {
|
|
|
|
|
timestamp: 1000,
|
|
|
|
|
containers: vec![
|
|
|
|
|
RunningContainerRecord {
|
|
|
|
|
name: "archy-bitcoin-knots".to_string(),
|
|
|
|
|
image: "bitcoin-knots:27.1".to_string(),
|
|
|
|
|
},
|
|
|
|
|
RunningContainerRecord {
|
|
|
|
|
name: "archy-mempool-web".to_string(),
|
|
|
|
|
image: "mempool/frontend:3.0".to_string(),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
};
|
|
|
|
|
let json = serde_json::to_string(&snapshot).unwrap();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
fs::write(tmp.path().join(CONTAINER_STATE_FILE), json)
|
|
|
|
|
.await
|
|
|
|
|
.unwrap();
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
let result = check_for_crash(tmp.path()).await.unwrap();
|
|
|
|
|
assert!(result.is_some());
|
|
|
|
|
let containers = result.unwrap();
|
|
|
|
|
assert_eq!(containers.len(), 2);
|
|
|
|
|
assert_eq!(containers[0].name, "archy-bitcoin-knots");
|
|
|
|
|
assert_eq!(containers[1].name, "archy-mempool-web");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_write_and_remove_pid_marker() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
write_pid_marker(tmp.path()).await.unwrap();
|
|
|
|
|
assert!(tmp.path().join(PID_FILE).exists());
|
|
|
|
|
|
|
|
|
|
remove_pid_marker(tmp.path()).await;
|
|
|
|
|
assert!(!tmp.path().join(PID_FILE).exists());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_pid_marker_contains_current_pid() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
write_pid_marker(tmp.path()).await.unwrap();
|
|
|
|
|
let content = fs::read_to_string(tmp.path().join(PID_FILE)).await.unwrap();
|
|
|
|
|
let saved_pid: u32 = content.parse().unwrap();
|
|
|
|
|
assert_eq!(saved_pid, std::process::id());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_clean_shutdown_no_crash_on_restart() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
|
|
|
|
|
|
|
|
|
// Simulate: startup → write PID → clean shutdown → remove PID
|
|
|
|
|
write_pid_marker(tmp.path()).await.unwrap();
|
|
|
|
|
remove_pid_marker(tmp.path()).await;
|
|
|
|
|
|
|
|
|
|
// Next startup should detect no crash
|
|
|
|
|
let result = check_for_crash(tmp.path()).await.unwrap();
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_corrupt_snapshot_handled() {
|
|
|
|
|
let tmp = TempDir::new().unwrap();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
fs::write(tmp.path().join(PID_FILE), "999999999")
|
|
|
|
|
.await
|
|
|
|
|
.unwrap();
|
|
|
|
|
fs::write(tmp.path().join(CONTAINER_STATE_FILE), "not valid json")
|
|
|
|
|
.await
|
|
|
|
|
.unwrap();
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
|
|
|
// Should not crash, returns None (no recoverable containers)
|
|
|
|
|
let result = check_for_crash(tmp.path()).await.unwrap();
|
|
|
|
|
assert!(result.is_none());
|
|
|
|
|
}
|
2026-05-05 11:29:18 -04:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn generic_boot_recovery_skips_manifest_owned_and_legacy_stacks() {
|
|
|
|
|
assert!(should_auto_start_stopped_container("filebrowser"));
|
|
|
|
|
assert!(should_auto_start_stopped_container("nostr-rs-relay"));
|
|
|
|
|
assert!(!should_auto_start_stopped_container("bitcoin-knots"));
|
|
|
|
|
assert!(!should_auto_start_stopped_container("lnd"));
|
|
|
|
|
assert!(!should_auto_start_stopped_container("indeedhub-postgres"));
|
|
|
|
|
}
|
2026-03-12 00:19:30 +00:00
|
|
|
}
|