Merge branch 'iso-feedback-fixes-2026-07-02' into merge-iso-feedback

# Conflicts:
#	core/archipelago/src/api/rpc/middleware.rs
This commit is contained in:
archipelago 2026-07-02 08:03:25 -04:00
commit f5d2479605
64 changed files with 1513 additions and 208 deletions

View File

@ -1,7 +1,7 @@
app:
id: archy-btcpay-db
name: BTCPay Postgres
version: 15.17
version: "15.17"
description: Postgres backend for BTCPay and NBXplorer.
container:

View File

@ -17,6 +17,13 @@ app:
# the IBD sweet spot - 4GB on full nodes, 1GB on pruned. Container
# --memory=8g (config.rs::get_memory_limit) leaves headroom for
# mempool + connections.
#
# -printtoconsole=0: foreground bitcoind defaults console logging ON,
# which pushed every IBD "UpdateTip" line through conmon into journald
# (>1 GB/day on a fresh node). bitcoind still writes debug.log in the
# datadir (/var/lib/archipelago/bitcoin/debug.log, self-shrunk on
# restart) — use that for deep debugging; podman logs only carries
# entrypoint/startup errors.
- >-
BITCOIND="$(command -v bitcoind || true)";
if [ -z "$BITCOIND" ]; then
@ -36,9 +43,9 @@ app:
RPC_TXRELAY_FLAGS="$RPC_TXRELAY_FLAGS -rpcauth=$RPC_TXRELAY_AUTH -rpcwhitelist=txrelay:sendrawtransaction,submitpackage,testmempoolaccept,getmempoolinfo,getrawmempool,getmempoolentry,getnetworkinfo,getblockchaininfo,getblockcount,getblockhash,getblock,getblockheader,getrawtransaction,gettxout,gettxspendingprevout,decoderawtransaction,decodescript,estimatesmartfee,uptime,ping,getconnectioncount,getpeerinfo,getindexinfo,getdeploymentinfo,getchaintips";
fi;
if [ "${DISK_GB_VALUE:-0}" -lt 1000 ]; then
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=1024 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=1024 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
else
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
fi
derived_env:
- key: DISK_GB

View File

@ -17,6 +17,13 @@ app:
# the IBD sweet spot - 4GB on full nodes, 1GB on pruned. Container
# --memory=8g (config.rs::get_memory_limit) leaves headroom for
# mempool + connections.
#
# -printtoconsole=0: foreground bitcoind defaults console logging ON,
# which pushed every IBD "UpdateTip" line through conmon into journald
# (>1 GB/day on a fresh node). bitcoind still writes debug.log in the
# datadir (/var/lib/archipelago/bitcoin/debug.log, self-shrunk on
# restart) — use that for deep debugging; podman logs only carries
# entrypoint/startup errors.
- >-
BITCOIND="$(command -v bitcoind || true)";
if [ -z "$BITCOIND" ]; then
@ -36,9 +43,9 @@ app:
RPC_TXRELAY_FLAGS="$RPC_TXRELAY_FLAGS -rpcauth=$RPC_TXRELAY_AUTH -rpcwhitelist=txrelay:sendrawtransaction,submitpackage,testmempoolaccept,getmempoolinfo,getrawmempool,getmempoolentry,getnetworkinfo,getblockchaininfo,getblockcount,getblockhash,getblock,getblockheader,getrawtransaction,gettxout,gettxspendingprevout,decoderawtransaction,decodescript,estimatesmartfee,uptime,ping,getconnectioncount,getpeerinfo,getindexinfo,getdeploymentinfo,getchaintips";
fi;
if [ "${DISK_GB_VALUE:-0}" -lt 1000 ]; then
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=2048 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=2048 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
else
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS";
fi
derived_env:
- key: DISK_GB

View File

@ -8,6 +8,13 @@ app:
image: 146.59.87.168:3000/lfg2025/lnd:v0.18.4-beta
pull_policy: if-not-present
network: archy-net
# BITCOIND_HOST must follow the node's actual Bitcoin container — Knots or
# Core — resolved at apply time from host facts. Hardcoding either breaks
# LND's chain backend connection on the other (lnd.conf is likewise
# resolved in lnd::ensure_config).
derived_env:
- key: BITCOIND_HOST
template: "{{BITCOIN_HOST}}"
secret_env:
- key: BITCOIND_RPCPASS
secret_file: bitcoin-rpc-password
@ -45,7 +52,6 @@ app:
options: [rw]
environment:
- BITCOIND_HOST=bitcoin-knots
- BITCOIND_RPCUSER=archipelago
- NETWORK=mainnet

View File

@ -39,6 +39,17 @@ impl ApiHandler {
let (mut tx, mut rx) = ws_stream.split();
// Subscribe BEFORE taking the initial snapshot. Messages are full
// data dumps keyed by a monotonic revision, so a broadcast that
// races the snapshot is at worst a harmless duplicate/newer dump
// delivered right after — but subscribing after the snapshot send
// (the old order) let any update in that window vanish forever,
// since a tokio broadcast channel never delivers sends that
// predate subscribe(). That silently stuck clients (e.g. a fresh
// install's post-boot container scan) on a stale initial snapshot
// until a full page reload opened a new connection past the race.
let mut state_rx = state_manager.subscribe();
let initial_msg = state_manager.get_initial_message().await;
if let Ok(json_msg) = serde_json::to_string(&initial_msg) {
if let Err(e) = tx.send(Message::Text(json_msg)).await {
@ -47,8 +58,6 @@ impl ApiHandler {
}
debug!("Sent initial data dump at revision {}", initial_msg.rev);
}
let mut state_rx = state_manager.subscribe();
let ping_interval = tokio::time::interval(tokio::time::Duration::from_secs(30));
tokio::pin!(ping_interval);
let mut last_client_activity = Instant::now();

View File

@ -141,6 +141,19 @@ impl RpcHandler {
self.auth_manager.setup_user(password).await?;
tracing::info!("[onboarding] user setup complete");
// Persist the pending onboarding seed as the encrypted backup now that
// a passphrase (the login password) finally exists — otherwise "Reveal
// recovery phrase" has nothing to decrypt on this node, ever.
// Best-effort: a failure here must not break password setup.
match super::seed_rpc::save_pending_seed_encrypted(&self.config.data_dir, password).await {
Ok(true) => tracing::info!("[onboarding] encrypted seed backup saved"),
Ok(false) => tracing::info!(
"[onboarding] no pending mnemonic to back up (restored earlier or legacy node)"
),
Err(e) => tracing::warn!("[onboarding] encrypted seed backup failed: {e:#}"),
}
Ok(serde_json::json!(true))
}

View File

@ -77,6 +77,19 @@ pub(super) fn sanitize_error_message(msg: &str) -> String {
"No wireless radio",
"WiFi radio enabled but",
"Missing required field",
// seed.reveal / auth flows — user-actionable, no internals to leak.
// Without these the sanitizer collapsed every reveal failure into
// "Operation failed. Check server logs." (which isn't even a crash).
"Incorrect",
"This node has no encrypted seed",
"A 2FA code is required",
"2FA is enabled but",
"Could not decrypt the saved seed",
"Could not unlock 2FA",
"No mnemonic available",
"No pending seed generation",
"Submitted words",
"Already set up",
];
for prefix in &user_facing_prefixes {
if msg.starts_with(prefix) {
@ -96,6 +109,43 @@ pub(super) fn sanitize_error_message(msg: &str) -> String {
"Operation failed. Check server logs for details.".to_string()
}
#[cfg(test)]
mod sanitize_tests {
use super::sanitize_error_message;
#[test]
fn seed_reveal_errors_pass_through() {
// Every user-actionable seed.reveal failure must reach the user —
// masking them as "Check server logs" sent a real user hunting a
// crash that never happened.
for msg in [
"Incorrect password",
"This node has no encrypted seed backup, so the recovery phrase cannot be shown. It was only displayed once during setup.",
"A 2FA code is required to reveal the recovery phrase",
"2FA is enabled but no TOTP data found",
"Could not decrypt the saved seed. If you set a separate backup passphrase during setup, enter that passphrase.",
"Could not unlock 2FA with this password",
"No mnemonic available. Generate or restore a seed first.",
"Submitted words do not match generated seed",
"Already set up. Use auth.changePassword to change.",
] {
assert_ne!(
sanitize_error_message(msg),
"Operation failed. Check server logs for details.",
"masked: {msg}"
);
}
}
#[test]
fn internal_errors_stay_generic() {
assert_eq!(
sanitize_error_message("thread panicked at src/foo.rs:42"),
"Operation failed. Check server logs for details."
);
}
}
/// Derive a CSRF token from the session token via HMAC.
/// Deterministic: same session token always produces the same CSRF token.
/// Survives backend restarts because it depends only on the session token

View File

@ -114,6 +114,31 @@ impl RpcHandler {
Err(e) => {
error!("package.install {} failed: {:#}", package_id_spawn, e);
install_log(&format!("INSTALL FAIL: {}{:#}", package_id_spawn, e)).await;
// Dependency-gate rejections happen BEFORE any resource
// (container/image/data dir) exists for this package, so
// keeping the optimistic entry would leave a phantom
// "Stopped" tile whose Start fails with `no such object`
// (the log-confirmed LND fresh-install failure). Remove
// the entry so the card reverts to installable, and
// surface the reason as a notification instead.
if let Some(gate) = e.downcast_ref::<super::dependencies::DependencyGateError>()
{
let (mut data, _) = handler.state_manager.get_snapshot().await;
data.package_data.remove(&package_id_spawn);
data.notifications.push(crate::data_model::Notification {
id: format!("install-deps-{package_id_spawn}"),
level: crate::data_model::NotificationLevel::Error,
title: format!("Could not install {package_id_spawn}"),
message: gate.to_string(),
timestamp: chrono::Utc::now().to_rfc3339(),
app_id: Some(package_id_spawn.clone()),
});
while data.notifications.len() > 20 {
data.notifications.remove(0);
}
handler.state_manager.update_data(data).await;
return;
}
// Don't remove the entry — that's what made the card
// vanish from My Apps mid-install / between retry-loop
// attempts (e.g. tailscale's entrypoint failure). Leave

View File

@ -707,12 +707,17 @@ pub(super) async fn get_app_config(
// effectively pinned at 2 by --cpus=2 (now removed).
// -maxconnections=125 — default but explicit, so ops can
// tune downward on bandwidth-constrained nodes.
// Log volume: -printtoconsole=0 — bitcoind already writes
// debug.log in the datadir (self-shrunk on restart); echoing it
// to stdout too pushed every IBD "UpdateTip" line through
// conmon into journald (>1 GB/day on a fresh node). Deep
// debugging uses /var/lib/archipelago/bitcoin/debug.log.
Some(vec![
"-server=1".to_string(),
"-rpcbind=0.0.0.0".to_string(),
"-rpcallowip=0.0.0.0/0".to_string(),
"-rpcport=8332".to_string(),
"-printtoconsole=1".to_string(),
"-printtoconsole=0".to_string(),
"-datadir=/home/bitcoin/.bitcoin".to_string(),
format!("-dbcache={}", bitcoin_dbcache_mb()),
"-par=0".to_string(),

View File

@ -58,6 +58,7 @@ fn archival_bitcoin_required_message(package_id: &str) -> String {
}
/// Snapshot of which dependency services are currently running.
#[derive(Debug)]
pub(super) struct RunningDeps {
pub has_bitcoin: bool,
pub has_electrumx: bool,
@ -227,6 +228,190 @@ pub(super) fn check_install_deps(package_id: &str, deps: &RunningDeps) -> Result
}
}
// ---------------------------------------------------------------------------
// Bounded dependency wait (install race fix)
// ---------------------------------------------------------------------------
//
// Confirmed race on fresh nodes: the user clicks "Install LND" while
// bitcoin-knots is itself still installing/starting. `check_install_deps`
// rejected instantly ("LND requires a running Bitcoin node…") even though
// Bitcoin came up 55s later. The fix: when the dependency is INSTALLED
// (container exists in `podman ps -a`, or the package state knows about it)
// but not Running yet, poll for up to DEP_WAIT_MAX_ATTEMPTS × DEP_WAIT_INTERVAL
// (~3 minutes) before failing, surfacing "Waiting for X to start…" via the
// install-progress message. If the dependency is not installed at all, fail
// fast with the canonical `check_install_deps` message — waiting can't help.
/// Poll interval while waiting for an installed dependency to start.
pub(super) const DEP_WAIT_INTERVAL: std::time::Duration = std::time::Duration::from_secs(5);
/// 36 × 5s = 3 minutes of bounded waiting.
pub(super) const DEP_WAIT_MAX_ATTEMPTS: u32 = 36;
/// Marker error: the install was rejected by the dependency gate BEFORE any
/// resource (container, image, data dir) was created for the package. The
/// async install wrapper (`async_lifecycle.rs`) downcasts to this to remove
/// the optimistic `Installing` state entry instead of leaving a phantom
/// "Stopped" tile whose Start fails with `no such object`.
#[derive(Debug)]
pub(in crate::api::rpc) struct DependencyGateError(pub String);
impl std::fmt::Display for DependencyGateError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
impl std::error::Error for DependencyGateError {}
/// One unsatisfied install dependency: a user-facing label plus the container
/// name variants that would satisfy it.
struct MissingDep {
label: &'static str,
containers: &'static [&'static str],
}
/// Which dependencies `check_install_deps` would reject `package_id` over.
/// Must stay in lockstep with the match arms in `check_install_deps` (the
/// wait loop re-runs `check_install_deps` for the canonical error message).
fn missing_install_deps(package_id: &str, deps: &RunningDeps) -> Vec<MissingDep> {
const BITCOIN: MissingDep = MissingDep {
label: "Bitcoin",
containers: BITCOIN_NAMES,
};
const ELECTRUM: MissingDep = MissingDep {
label: "ElectrumX",
containers: ELECTRUM_NAMES,
};
let mut missing = Vec::new();
match package_id {
"electrumx" | "mempool-electrs" | "electrs" | "lnd" | "btcpay-server" | "btcpayserver" => {
if !deps.has_bitcoin {
missing.push(BITCOIN);
}
}
"mempool" | "mempool-web" => {
if !deps.has_bitcoin {
missing.push(BITCOIN);
}
if !deps.has_electrumx {
missing.push(ELECTRUM);
}
}
// fedimint deliberately absent: check_install_deps allows it without
// a local Bitcoin node (remote RPC configured in guardian setup).
_ => {}
}
missing
}
fn join_dep_labels(missing: &[MissingDep]) -> String {
missing
.iter()
.map(|d| d.label)
.collect::<Vec<_>>()
.join(" and ")
}
/// One snapshot of the dependency world, fed to [`wait_for_install_deps`].
pub(super) struct DepProbe {
/// Which dependency services are currently Running.
pub running: RunningDeps,
/// Container/package names that EXIST in any state — installed, but
/// possibly not running yet (`podman ps -a` package-state entries).
pub existing: Vec<String>,
}
/// All container names known to podman in any state (`podman ps -a`).
/// Conservative on probe failure: returns an empty list, which makes the
/// wait loop fall back to the pre-fix fail-fast behavior.
pub(super) async fn detect_existing_containers() -> Vec<String> {
let out = tokio::time::timeout(
std::time::Duration::from_secs(30),
tokio::process::Command::new("podman")
.args(["ps", "-a", "--format", "{{.Names}}"])
.output(),
)
.await;
match out {
Ok(Ok(o)) if o.status.success() => String::from_utf8_lossy(&o.stdout)
.lines()
.map(|l| l.trim().to_string())
.filter(|l| !l.is_empty())
.collect(),
_ => Vec::new(),
}
}
/// Bounded dependency gate. Returns the (satisfied) `RunningDeps` snapshot,
/// or a [`DependencyGateError`]:
/// - immediately, when a missing dependency is not installed at all
/// (canonical `check_install_deps` message), or
/// - after `max_attempts × interval`, when an installed dependency never
/// reached Running.
///
/// `probe` and `on_waiting` are injected so unit tests can drive the loop
/// without a podman runtime; production wires them to
/// `RpcHandler::dep_probe_for_install` / `set_install_message`.
pub(super) async fn wait_for_install_deps<P, PF, L, LF>(
package_id: &str,
mut probe: P,
mut on_waiting: L,
max_attempts: u32,
interval: std::time::Duration,
) -> Result<RunningDeps>
where
P: FnMut() -> PF,
PF: std::future::Future<Output = Result<DepProbe>>,
L: FnMut(String) -> LF,
LF: std::future::Future<Output = ()>,
{
let mut waited_attempts = 0u32;
loop {
let DepProbe { running, existing } = probe().await?;
let missing = missing_install_deps(package_id, &running);
if missing.is_empty() {
// Keep behavior in lockstep with the canonical gate (covers any
// future arm added there but not mirrored in missing_install_deps).
check_install_deps(package_id, &running)?;
return Ok(running);
}
// Fail fast if any missing dependency has no installed container
// under any name variant — waiting cannot satisfy it.
let some_dep_not_installed = missing
.iter()
.any(|dep| !dep.containers.iter().any(|c| existing.iter().any(|e| e == c)));
if some_dep_not_installed {
let msg = match check_install_deps(package_id, &running) {
Err(e) => e.to_string(),
Ok(()) => format!("{package_id} dependencies are not running"),
};
return Err(anyhow::Error::new(DependencyGateError(msg)));
}
if waited_attempts >= max_attempts {
let labels = join_dep_labels(&missing);
return Err(anyhow::Error::new(DependencyGateError(format!(
"{labels} is installed but did not reach the running state within \
{} seconds. Start {labels}, then install {package_id} again.",
u64::from(max_attempts) * interval.as_secs()
))));
}
waited_attempts += 1;
let labels = join_dep_labels(&missing);
if waited_attempts == 1 {
info!(
"Install {package_id}: dependency {labels} installed but not running yet — \
waiting up to {}s for it to start",
u64::from(max_attempts) * interval.as_secs()
);
}
on_waiting(format!("Waiting for {labels} to start…")).await;
tokio::time::sleep(interval).await;
}
}
/// ElectrumX and Mempool's Electrum backend need historical blocks from an
/// unpruned node while building their indexes. A pruned Bitcoin node can be
/// running and RPC-reachable but still leave them stuck with closed ports.
@ -625,6 +810,218 @@ mod tests {
assert!(!manifest_declares_archival_bitcoin("does-not-exist"));
}
mod dep_wait {
use super::super::{wait_for_install_deps, DepProbe, DependencyGateError, RunningDeps};
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration;
fn deps(has_bitcoin: bool, has_electrumx: bool) -> RunningDeps {
RunningDeps {
has_bitcoin,
has_electrumx,
has_lnd: false,
}
}
fn probe(has_bitcoin: bool, has_electrumx: bool, existing: &[&str]) -> DepProbe {
DepProbe {
running: deps(has_bitcoin, has_electrumx),
existing: existing.iter().map(|s| s.to_string()).collect(),
}
}
/// Collects "Waiting for X to start…" labels emitted during the wait.
fn label_sink() -> (Arc<Mutex<Vec<String>>>, impl FnMut(String) -> std::future::Ready<()>)
{
let labels = Arc::new(Mutex::new(Vec::new()));
let sink = {
let labels = Arc::clone(&labels);
move |msg: String| {
labels.lock().unwrap().push(msg);
std::future::ready(())
}
};
(labels, sink)
}
#[tokio::test]
async fn passes_immediately_when_dependency_is_running() {
let (labels, sink) = label_sink();
let result = wait_for_install_deps(
"lnd",
|| async { Ok(probe(true, false, &["bitcoin-knots"])) },
sink,
3,
Duration::ZERO,
)
.await;
assert!(result.is_ok());
assert!(labels.lock().unwrap().is_empty(), "no waiting expected");
}
#[tokio::test]
async fn fails_fast_when_dependency_not_installed_at_all() {
let calls = AtomicU32::new(0);
let (labels, sink) = label_sink();
let err = wait_for_install_deps(
"lnd",
|| {
calls.fetch_add(1, Ordering::SeqCst);
async { Ok(probe(false, false, &["uptime-kuma"])) }
},
sink,
36,
Duration::ZERO,
)
.await
.unwrap_err();
// Single probe — no polling when waiting cannot help.
assert_eq!(calls.load(Ordering::SeqCst), 1);
assert!(labels.lock().unwrap().is_empty());
// Canonical check_install_deps message, wrapped in the gate marker
// so async_lifecycle removes the optimistic Installing entry.
assert!(err.downcast_ref::<DependencyGateError>().is_some());
assert!(
err.to_string().contains("LND requires a running Bitcoin node"),
"unexpected message: {err}"
);
}
#[tokio::test]
async fn waits_while_installed_dependency_starts_then_passes() {
// Bitcoin container exists (installing/starting) but only reports
// Running from the 3rd probe onward — the log-confirmed LND race.
let calls = Arc::new(AtomicU32::new(0));
let (labels, sink) = label_sink();
let probe_calls = Arc::clone(&calls);
let result = wait_for_install_deps(
"lnd",
move || {
let n = probe_calls.fetch_add(1, Ordering::SeqCst);
async move { Ok(probe(n >= 2, false, &["bitcoin-knots"])) }
},
sink,
36,
Duration::ZERO,
)
.await;
assert!(result.is_ok(), "{result:?}");
assert_eq!(calls.load(Ordering::SeqCst), 3);
let labels = labels.lock().unwrap();
assert_eq!(labels.len(), 2, "one waiting label per polling attempt");
assert!(labels.iter().all(|l| l == "Waiting for Bitcoin to start…"));
}
#[tokio::test]
async fn times_out_when_installed_dependency_never_runs() {
let (labels, sink) = label_sink();
let err = wait_for_install_deps(
"lnd",
|| async { Ok(probe(false, false, &["bitcoin-knots"])) },
sink,
4,
Duration::ZERO,
)
.await
.unwrap_err();
assert!(err.downcast_ref::<DependencyGateError>().is_some());
assert!(
err.to_string()
.contains("did not reach the running state within 0 seconds"),
"unexpected message: {err}"
);
assert_eq!(labels.lock().unwrap().len(), 4);
}
#[tokio::test]
async fn mempool_waits_on_both_bitcoin_and_electrumx() {
let calls = Arc::new(AtomicU32::new(0));
let (labels, sink) = label_sink();
let probe_calls = Arc::clone(&calls);
let result = wait_for_install_deps(
"mempool",
move || {
let n = probe_calls.fetch_add(1, Ordering::SeqCst);
// Bitcoin comes up on probe 2, electrumx on probe 3.
async move { Ok(probe(n >= 1, n >= 2, &["bitcoin-knots", "electrumx"])) }
},
sink,
36,
Duration::ZERO,
)
.await;
assert!(result.is_ok(), "{result:?}");
let labels = labels.lock().unwrap();
assert_eq!(
labels.as_slice(),
&[
"Waiting for Bitcoin and ElectrumX to start…".to_string(),
"Waiting for ElectrumX to start…".to_string(),
]
);
}
#[tokio::test]
async fn mempool_fails_fast_when_one_dep_is_not_installed() {
// Bitcoin is installed (waiting could help) but ElectrumX is not
// installed at all — waiting can never satisfy the gate, so fail
// fast with the canonical message.
let (labels, sink) = label_sink();
let err = wait_for_install_deps(
"mempool",
|| async { Ok(probe(false, false, &["bitcoin-knots"])) },
sink,
36,
Duration::ZERO,
)
.await
.unwrap_err();
assert!(err.downcast_ref::<DependencyGateError>().is_some());
assert!(labels.lock().unwrap().is_empty());
assert!(
err.to_string().contains("Mempool requires"),
"unexpected message: {err}"
);
}
#[tokio::test]
async fn variant_container_names_count_as_installed() {
// bitcoin-core (not just bitcoin-knots) satisfies the "installed"
// check for the wait path.
let calls = Arc::new(AtomicU32::new(0));
let (_labels, sink) = label_sink();
let probe_calls = Arc::clone(&calls);
let result = wait_for_install_deps(
"electrumx",
move || {
let n = probe_calls.fetch_add(1, Ordering::SeqCst);
async move { Ok(probe(n >= 1, false, &["bitcoin-core"])) }
},
sink,
36,
Duration::ZERO,
)
.await;
assert!(result.is_ok(), "{result:?}");
}
#[tokio::test]
async fn apps_without_dependency_gate_pass_untouched() {
let (labels, sink) = label_sink();
let result = wait_for_install_deps(
"uptime-kuma",
|| async { Ok(probe(false, false, &[])) },
sink,
36,
Duration::ZERO,
)
.await;
assert!(result.is_ok());
assert!(labels.lock().unwrap().is_empty());
}
}
#[test]
fn mempool_api_is_directly_installable_and_covered_by_the_archival_gate() {
// `mempool-api` is a legitimate direct `package.install` target

View File

@ -3,9 +3,10 @@ use super::config::{
is_readonly_compatible, is_valid_docker_image,
};
use super::dependencies::{
check_bitcoin_pruning_compatibility, check_install_deps, configure_fedimint_lnd,
check_bitcoin_pruning_compatibility, configure_fedimint_lnd, detect_existing_containers,
detect_running_deps, detect_running_deps_from_package_data, log_optional_dep_info,
needs_archy_net, RunningDeps,
needs_archy_net, wait_for_install_deps, DepProbe, RunningDeps, DEP_WAIT_INTERVAL,
DEP_WAIT_MAX_ATTEMPTS,
};
use super::progress::parse_pull_progress;
use super::validation::validate_app_id;
@ -265,8 +266,7 @@ impl RpcHandler {
.await;
if matches!(package_id, "mempool" | "mempool-web") {
let deps = self.running_deps_for_install(package_id).await?;
check_install_deps(package_id, &deps)?;
self.gate_install_deps(package_id).await?;
check_bitcoin_pruning_compatibility(package_id).await?;
}
@ -289,9 +289,11 @@ impl RpcHandler {
// Dependency checks. Prefer the scanner's cached package state so a
// congested Podman API does not turn an already-running dependency into
// a false install failure. Fall back to a bounded direct Podman probe
// only when the cache does not show the dependency.
let deps = self.running_deps_for_install(package_id).await?;
check_install_deps(package_id, &deps)?;
// only when the cache does not show the dependency. When the dependency
// is installed but not Running yet (the "clicked Install LND 55s before
// Bitcoin was up" race), wait up to ~3 minutes for it instead of
// failing instantly.
let deps = self.gate_install_deps(package_id).await?;
check_bitcoin_pruning_compatibility(package_id).await?;
log_optional_dep_info(package_id, &deps);
let repaired_bitcoin_conf =
@ -945,6 +947,27 @@ impl RpcHandler {
}
}
/// Bounded dependency gate for installs: passes immediately when deps are
/// running, fails fast (with the phantom-tile marker) when a dependency
/// isn't installed at all, and otherwise waits up to
/// `DEP_WAIT_MAX_ATTEMPTS × DEP_WAIT_INTERVAL` for an installed-but-
/// starting dependency, surfacing "Waiting for X to start…" on the card.
pub(super) async fn gate_install_deps(&self, package_id: &str) -> Result<RunningDeps> {
wait_for_install_deps(
package_id,
|| async {
Ok(DepProbe {
running: self.running_deps_for_install(package_id).await?,
existing: detect_existing_containers().await,
})
},
|msg| async move { self.set_install_message(package_id, &msg).await },
DEP_WAIT_MAX_ATTEMPTS,
DEP_WAIT_INTERVAL,
)
.await
}
// -- Private helpers for install --
/// Pull the image from a registry or verify a local image exists.
@ -1295,6 +1318,11 @@ impl RpcHandler {
// Default to full archive — operators with 2TB+ drives shouldn't be
// silently pruned down to 550 MB. Users who want a pruned node can
// set `prune=N` in bitcoin.conf themselves after install.
//
// printtoconsole=0: bitcoind already writes debug.log in the datadir
// (self-shrunk on restart); duplicating it to stdout pushed every IBD
// "UpdateTip" line through conmon into journald (>1 GB/day). Deep
// debugging uses /var/lib/archipelago/bitcoin/debug.log.
let bitcoin_conf = format!(
"\
# rpcauth: salted hash only - no plaintext password in config or CLI\n\
@ -1304,7 +1332,7 @@ rpcallowip=0.0.0.0/0\n\
listen=1\n\
rpcthreads=16\n\
rpcworkqueue=256\n\
printtoconsole=1\n",
printtoconsole=0\n",
rpcauth_line
);
tokio::fs::create_dir_all(bitcoin_dir)

View File

@ -61,6 +61,31 @@ impl RpcHandler {
self.state_manager.update_data(data).await;
}
/// Set a user-facing install status message (e.g. "Waiting for Bitcoin
/// to start…") without disturbing the current phase/byte counters.
pub(super) async fn set_install_message(&self, package_id: &str, message: &str) {
let (mut data, _rev) = self.state_manager.get_snapshot().await;
let entry = data
.package_data
.entry(package_id.to_string())
.or_insert_with(|| create_installing_entry(package_id));
if entry.state != PackageState::Updating {
entry.state = PackageState::Installing;
}
let (size, downloaded, phase) = entry
.install_progress
.as_ref()
.map(|p| (p.size, p.downloaded, p.phase))
.unwrap_or((0, 0, None));
entry.install_progress = Some(InstallProgress {
size,
downloaded,
phase,
message: Some(message.to_string()),
});
self.state_manager.update_data(data).await;
}
/// Clear install progress after pull completes or fails.
pub(super) async fn clear_install_progress(&self, package_id: &str) {
let (mut data, _rev) = self.state_manager.get_snapshot().await;

View File

@ -1009,9 +1009,9 @@ impl RpcHandler {
return Ok(adopted);
}
// Dependency check: Bitcoin must be running
let deps = super::dependencies::detect_running_deps().await?;
super::dependencies::check_install_deps("btcpay-server", &deps)?;
// Dependency check: Bitcoin must be running. Bounded wait covers the
// "installed but still starting" race instead of failing instantly.
self.gate_install_deps("btcpay-server").await?;
install_log("INSTALL START: btcpay-server (stack: postgres + nbxplorer + btcpay)").await;

View File

@ -26,6 +26,36 @@ impl Drop for OnboardingMnemonicState {
const MNEMONIC_TTL: std::time::Duration = std::time::Duration::from_secs(600); // 10 minutes
/// Persist the pending onboarding mnemonic as `identity/master_seed.enc`,
/// encrypted with `passphrase`. Called from `auth.setup` — the first moment a
/// user password exists — so "Reveal recovery phrase" works after onboarding
/// without the frontend having to remember a separate save step (it never
/// did, which left every onboarded node with no encrypted seed backup).
///
/// Deliberately ignores MNEMONIC_TTL: the mnemonic stays in memory until
/// overwritten regardless, so using it here widens nothing, and onboarding
/// legitimately takes longer than 10 minutes when the user carefully writes
/// down 24 words. Clears the in-memory copy on success — password setup is
/// the end of onboarding, so the plaintext no longer needs to linger.
///
/// Returns Ok(true) if a seed was saved, Ok(false) if none was pending.
pub(in crate::api::rpc) async fn save_pending_seed_encrypted(
data_dir: &std::path::Path,
passphrase: &str,
) -> Result<bool> {
let mut state = ONBOARDING_MNEMONIC.lock().await;
let Some(pending) = state.as_ref() else {
return Ok(false);
};
let mnemonic: bip39::Mnemonic = pending
.words
.parse()
.context("Invalid mnemonic in memory")?;
crate::seed::save_seed_encrypted(data_dir, &mnemonic, passphrase).await?;
*state = None;
Ok(true)
}
/// Best-effort: install fips.yaml + start archipelago-fips.service after the
/// seed onboarding has written the fips_key to disk. Runs in a detached task
/// so the user-facing RPC returns immediately — the systemctl calls can take
@ -208,6 +238,17 @@ impl RpcHandler {
let phrase = words.join(" ");
let (_mnemonic, seed) = crate::seed::MasterSeed::from_mnemonic_words(&phrase)?;
// Stash the restored words like seed.generate does, so auth.setup can
// persist the encrypted backup once the user's password exists and
// "Reveal recovery phrase" works on restored nodes too.
{
let mut state = ONBOARDING_MNEMONIC.lock().await;
*state = Some(OnboardingMnemonicState {
words: phrase.clone(),
created_at: std::time::Instant::now(),
});
}
// Derive and write node Ed25519 key.
let identity_dir = self.config.data_dir.join("identity");
crate::identity::NodeIdentity::from_seed(&identity_dir, &seed).await?;

View File

@ -101,19 +101,45 @@ fn friendly_transient_error(has_cached_state: bool, err_msg: &str) -> String {
.trim_end_matches('.');
let lower = detail.to_lowercase();
let state = if lower.contains("verifying blocks") {
"verifying blocks after restart"
Some("verifying blocks after restart")
} else if lower.contains("connection reset") {
Some("starting up and not yet accepting RPC connections")
} else if lower.contains("connection refused") || lower.contains("tcp connect error") {
"waiting for the Bitcoin RPC listener"
Some("waiting for the Bitcoin RPC listener")
} else if lower.contains("timed out") || lower.contains("timeout") {
"busy and not answering RPC before the timeout"
Some("busy and not answering RPC before the timeout")
} else {
"starting or busy syncing"
None
};
if has_cached_state {
format!("Bitcoin node is {state}; showing last known state and retrying. Detail: {detail}")
// Recognized transient causes get a clean human sentence only — the raw
// transport error (URLs, repeated "os error 104" chains) is operator
// noise that was ending up verbatim on the app card. Unrecognized errors
// keep a bounded detail so a genuinely new failure stays diagnosable.
let (state, detail) = match state {
Some(state) => (state, None),
None => (
"starting or busy syncing",
Some(if detail.len() > 120 {
let mut cut = 120;
while !detail.is_char_boundary(cut) {
cut -= 1;
}
format!("{}", &detail[..cut])
} else {
detail.to_string()
}),
),
};
let base = if has_cached_state {
format!("Bitcoin node is {state}; showing last known state and retrying.")
} else {
format!("Bitcoin node is {state}; retrying automatically. Detail: {detail}")
format!("Bitcoin node is {state}; retrying automatically.")
};
match detail {
Some(detail) => format!("{base} Detail: {detail}"),
None => base,
}
}
@ -278,4 +304,39 @@ mod tests {
assert!(msg.contains("busy and not answering RPC before the timeout"));
}
#[test]
fn connection_reset_gets_clean_message_without_raw_detail() {
// The exact string a fresh install showed on the app card: the raw
// reqwest chain (URL + repeated "os error 104") must not surface.
let msg = friendly_transient_error(
false,
"getblockchaininfo: Bitcoin RPC request failed: error sending request for url (http://127.0.0.1:8332/): connection error: Connection reset by peer (os error 104): connection error: Connection reset by peer (os error 104): Connection reset by peer (os error 104)",
);
assert!(msg.contains("starting up and not yet accepting RPC connections"));
assert!(!msg.contains("os error"));
assert!(!msg.contains("127.0.0.1"));
assert!(!msg.contains("Detail:"));
}
#[test]
fn recognized_causes_omit_detail_entirely() {
for raw in [
"x: Connection refused (os error 111)",
"x: operation timed out",
r#"x: {"error":{"code":-28,"message":"Verifying blocks..."}}"#,
] {
let msg = friendly_transient_error(false, raw);
assert!(!msg.contains("Detail:"), "leaked detail for: {raw}");
}
}
#[test]
fn unknown_errors_keep_bounded_detail() {
let long = format!("weird new failure {}", "x".repeat(300));
let msg = friendly_transient_error(false, &long);
assert!(msg.contains("Detail: weird new failure"));
assert!(msg.len() < 260);
}
}

View File

@ -39,6 +39,16 @@ const KIOSK_LAUNCHER: &str =
const KIOSK_SERVICE_PATH: &str = "/etc/systemd/system/archipelago-kiosk.service";
const KIOSK_LAUNCHER_PATH: &str = "/usr/local/bin/archipelago-kiosk-launcher";
// Journald log-volume policy (size cap + per-service rate limit). Fresh ISOs
// write the identical file at build time (image-recipe/_archived/
// build-auto-installer-iso.sh); this heals already-deployed nodes via OTA.
// A fresh node produced >1 GB/day of journal (bitcoind IBD console spam plus
// debug-level backend logging) — the cap bounds disk use and the rate limit
// keeps one chatty service from drowning everything else.
const JOURNALD_DROPIN: &str =
include_str!("../../../image-recipe/configs/journald-archipelago.conf");
const JOURNALD_DROPIN_PATH: &str = "/etc/systemd/journald.conf.d/10-archipelago-persistent.conf";
const NGINX_CONF_PATH: &str = "/etc/nginx/sites-available/archipelago";
const NGINX_ENABLED_CONF_PATH: &str = "/etc/nginx/sites-enabled/archipelago";
/// Per-app proxy snippet included by the HTTPS (:443) server block. Carries its
@ -120,6 +130,11 @@ pub async fn ensure_doctor_installed() {
Ok(false) => debug!("Bitcoin RPC bind settings already usable"),
Err(e) => warn!("Bitcoin RPC repair failed (non-fatal): {:#}", e),
}
match run_journald_dropin().await {
Ok(true) => info!("Installed journald log-volume policy drop-in"),
Ok(false) => debug!("journald log-volume policy already in place"),
Err(e) => warn!("journald drop-in bootstrap failed (non-fatal): {:#}", e),
}
match tighten_secrets_dir().await {
Ok(n) if n > 0 => info!(tightened = n, "Tightened mode on secret files"),
Ok(_) => debug!("Secrets directory already at expected mode"),
@ -408,6 +423,14 @@ ensure_line() {
ensure_line server=1
ensure_line rpcallowip=0.0.0.0/0
ensure_line listen=1
# Log-volume fix: printtoconsole=1 duplicated every log line (incl. per-block
# IBD "UpdateTip" spam) into journald via conmon on top of the datadir
# debug.log bitcoind already writes. Console off; debug.log stays (bitcoind
# self-shrinks it on restart).
if grep -q '^printtoconsole=1' "$conf"; then
sed -i 's/^printtoconsole=1$/printtoconsole=0/' "$conf"
changed=1
fi
[ "$changed" -eq 0 ] && exit 0
exit 2
"#;
@ -428,6 +451,44 @@ exit 2
}
}
/// Install the journald log-volume policy drop-in (JOURNALD_DROPIN) so nodes
/// deployed before the ISO shipped it get the size cap + rate limit via OTA.
/// Idempotent; restarts journald only when the file actually changed (safe:
/// the sockets are held by pid1, so at most a few messages queue briefly).
async fn run_journald_dropin() -> Result<bool> {
// Same dev-box guards as the doctor bootstrap: never touch /etc on
// contributors' laptops (symlinked or absent /home/archipelago/archy).
let home_archy = Path::new("/home/archipelago/archy");
if fs::symlink_metadata(home_archy)
.await
.map(|m| m.file_type().is_symlink())
.unwrap_or(false)
{
debug!("/home/archipelago/archy is a symlink — skipping journald bootstrap (dev box)");
return Ok(false);
}
if fs::metadata(home_archy).await.is_err() {
debug!("/home/archipelago/archy missing — skipping journald bootstrap");
return Ok(false);
}
let dropin_dir = "/etc/systemd/journald.conf.d";
let status = host_sudo(&["mkdir", "-p", dropin_dir])
.await
.with_context(|| format!("mkdir {}", dropin_dir))?;
if !status.success() {
anyhow::bail!("mkdir {} exited with {}", dropin_dir, status);
}
let changed = write_root_if_needed(JOURNALD_DROPIN_PATH, JOURNALD_DROPIN).await?;
if changed {
if let Err(e) = host_sudo(&["systemctl", "restart", "systemd-journald"]).await {
warn!("journald restart after drop-in update failed: {:#}", e);
}
}
Ok(changed)
}
async fn run() -> Result<bool> {
// Dev-box guard: on contributors' laptops `/home/archipelago/archy` is
// typically a symlink into the git checkout, and writing through it

View File

@ -43,7 +43,11 @@ pub enum EnsureOutcome {
Unchanged,
}
pub async fn ensure_config(paths: &EnsurePaths, rpc_pass: &str) -> Result<EnsureOutcome> {
pub async fn ensure_config(
paths: &EnsurePaths,
rpc_pass: &str,
bitcoin_host: &str,
) -> Result<EnsureOutcome> {
fs::create_dir_all(&paths.data_dir)
.await
.with_context(|| format!("creating {}", paths.data_dir.display()))?;
@ -52,7 +56,7 @@ pub async fn ensure_config(paths: &EnsurePaths, rpc_pass: &str) -> Result<Ensure
let existing = fs::read_to_string(&paths.conf_path)
.await
.with_context(|| format!("reading {}", paths.conf_path.display()))?;
if has_required_lnd_flags(&existing, rpc_pass) {
if has_required_lnd_flags(&existing, rpc_pass, bitcoin_host) {
return Ok(EnsureOutcome::Unchanged);
}
}
@ -68,12 +72,11 @@ restlisten=0.0.0.0:8080\n\
bitcoin.active=true\n\
bitcoin.mainnet=true\n\
bitcoin.node=bitcoind\n\
bitcoind.rpchost=bitcoin-knots:8332\n\
bitcoind.rpchost={bitcoin_host}:8332\n\
bitcoind.rpcuser=archipelago\n\
bitcoind.rpcpass={}\n\
bitcoind.rpcpass={rpc_pass}\n\
bitcoind.rpcpolling=true\n\
bitcoind.estimatemode=ECONOMICAL\n",
rpc_pass
bitcoind.estimatemode=ECONOMICAL\n"
);
write_config_atomically(paths, &conf).await?;
@ -653,13 +656,14 @@ fn shell_quote(s: &str) -> String {
s.replace('\'', "'\\''")
}
fn has_required_lnd_flags(conf: &str, rpc_pass: &str) -> bool {
fn has_required_lnd_flags(conf: &str, rpc_pass: &str, bitcoin_host: &str) -> bool {
let rpc_pass_line = format!("bitcoind.rpcpass={rpc_pass}");
let rpc_host_line = format!("bitcoind.rpchost={bitcoin_host}:8332");
[
"bitcoin.active=true",
"bitcoin.mainnet=true",
"bitcoin.node=bitcoind",
"bitcoind.rpchost=bitcoin-knots:8332",
rpc_host_line.as_str(),
rpc_pass_line.as_str(),
]
.iter()
@ -678,7 +682,7 @@ mod tests {
conf_path: tmp.path().join("lnd/lnd.conf"),
};
let out = ensure_config(&paths, "secret").await.unwrap();
let out = ensure_config(&paths, "secret", "bitcoin-knots").await.unwrap();
assert_eq!(out, EnsureOutcome::Written);
let conf = fs::read_to_string(&paths.conf_path).await.unwrap();
assert!(conf.contains("bitcoin.active=true"));
@ -697,17 +701,46 @@ mod tests {
};
assert_eq!(
ensure_config(&paths, "first").await.unwrap(),
ensure_config(&paths, "first", "bitcoin-knots").await.unwrap(),
EnsureOutcome::Written
);
assert_eq!(
ensure_config(&paths, "second").await.unwrap(),
ensure_config(&paths, "second", "bitcoin-knots").await.unwrap(),
EnsureOutcome::Written
);
let conf = fs::read_to_string(&paths.conf_path).await.unwrap();
assert!(conf.contains("bitcoind.rpcpass=second"));
}
#[tokio::test]
async fn ensure_config_repairs_bitcoin_host_drift() {
// A conf written against bitcoin-knots must be rewritten when the
// node's Bitcoin variant is bitcoin-core, or LND dials a hostname
// that doesn't exist on archy-net and dies on startup.
let tmp = tempfile::TempDir::new().unwrap();
let paths = EnsurePaths {
data_dir: tmp.path().join("lnd"),
conf_path: tmp.path().join("lnd/lnd.conf"),
};
assert_eq!(
ensure_config(&paths, "pw", "bitcoin-knots").await.unwrap(),
EnsureOutcome::Written
);
assert_eq!(
ensure_config(&paths, "pw", "bitcoin-core").await.unwrap(),
EnsureOutcome::Written
);
let conf = fs::read_to_string(&paths.conf_path).await.unwrap();
assert!(conf.contains("bitcoind.rpchost=bitcoin-core:8332"));
assert!(!conf.contains("bitcoind.rpchost=bitcoin-knots:8332"));
assert_eq!(
ensure_config(&paths, "pw", "bitcoin-core").await.unwrap(),
EnsureOutcome::Unchanged
);
}
#[tokio::test]
async fn ensure_config_repairs_incomplete_existing_config() {
let tmp = tempfile::TempDir::new().unwrap();
@ -721,7 +754,7 @@ mod tests {
.unwrap();
assert_eq!(
ensure_config(&paths, "repaired").await.unwrap(),
ensure_config(&paths, "repaired", "bitcoin-knots").await.unwrap(),
EnsureOutcome::Written
);
let conf = fs::read_to_string(&paths.conf_path).await.unwrap();

View File

@ -1368,6 +1368,7 @@ impl ProdContainerOrchestrator {
.list_containers()
.await
.context("list_containers during adoption")?;
let user_stopped = crate::crash_recovery::load_user_stopped(&self.data_dir).await;
let state = self.state.read().await;
let mut report = AdoptionReport::default();
for (app_id, lm) in state.manifests.iter() {
@ -1377,6 +1378,21 @@ impl ProdContainerOrchestrator {
.any(|c| c.name == expected || c.name == format!("/{expected}"))
{
report.adopted.push(app_id.clone());
// Adopted apps will be (re)started by boot recovery, the first
// reconcile pass, or the doctor — whichever reaches them first
// can be minutes away. Register them as pending boot-starts now
// so the scanner shows "Restarting" (not "Stopped") from the
// very first post-boot scan. Cleared per-app by the first
// reconcile pass, so a genuinely failed start surfaces.
if !state.disabled.contains(app_id)
&& !user_stopped.contains(app_id)
&& !user_stopped.contains(&expected)
{
crate::crash_recovery::pending_boot_starts_add([
app_id.clone(),
expected.clone(),
]);
}
}
}
Ok(report)
@ -1425,8 +1441,19 @@ impl ProdContainerOrchestrator {
};
let mut report = ReconcileReport::default();
let disk_gb = self.disk_gb();
// Register every candidate before the (sequential, possibly slow)
// pass so the scanner overlays queued-but-down apps as Restarting
// instead of Stopped. Each app is deregistered as its turn finishes,
// so a start that genuinely failed shows its real state again.
crate::crash_recovery::pending_boot_starts_add(manifests.iter().flat_map(|lm| {
[
lm.manifest.app.id.clone(),
compute_container_name(&lm.manifest),
]
}));
for lm in manifests {
let app_id = lm.manifest.app.id.clone();
let container_name = compute_container_name(&lm.manifest);
if mode == ReconcileMode::ExistingOnly
&& requires_archival_bitcoin(&app_id)
&& disk_gb < ARCHIVAL_BITCOIN_DISK_GB
@ -1435,6 +1462,8 @@ impl ProdContainerOrchestrator {
&app_id,
ReconcileAction::Left("requires-archival-bitcoin".into()),
);
crate::crash_recovery::pending_boot_start_done(&app_id);
crate::crash_recovery::pending_boot_start_done(&container_name);
continue;
}
match self.ensure_running_with_mode(&lm, mode).await {
@ -2559,7 +2588,8 @@ impl ProdContainerOrchestrator {
}
.read("bitcoin-rpc-password")
.context("lnd pre-start: read bitcoin RPC password")?;
let outcome = lnd::ensure_config(&self.lnd_paths, &rpc_pass)
let bitcoin_host = self.bitcoin_host();
let outcome = lnd::ensure_config(&self.lnd_paths, &rpc_pass, &bitcoin_host)
.await
.context("lnd pre-start: ensure lnd.conf")?;
Ok(Some(match outcome {
@ -2571,6 +2601,30 @@ impl ProdContainerOrchestrator {
self.ensure_btcpay_stack_dirs().await?;
Ok(Some(HookOutcome::Unchanged))
}
"fedimint-clientd" => {
// First-boot (root context) created /var/lib/archipelago/fmcd
// as root:root, but the rootless container's uid 0 maps to
// host 1000 — fmcd then crash-loops with "Permission denied
// (os error 13)". Repair ownership on every start so nodes
// installed before the first-boot fix self-heal too. (The
// generic running-container ownership sweep can't catch this:
// fmcd exits within seconds, so it's never Running when the
// sweep probes.)
let dir = "/var/lib/archipelago/fmcd";
let mkdir = host_sudo(&["mkdir", "-p", dir])
.await
.with_context(|| format!("mkdir {dir}"))?;
if !mkdir.success() {
return Err(anyhow::anyhow!("mkdir -p {dir} failed with status {mkdir}"));
}
let chown = host_sudo(&["chown", "-R", "1000:1000", dir])
.await
.with_context(|| format!("chown {dir}"))?;
if !chown.success() {
return Err(anyhow::anyhow!("chown {dir} failed with status {chown}"));
}
Ok(Some(HookOutcome::Unchanged))
}
"grafana" => {
self.cleanup_stale_grafana_port().await;
Ok(Some(HookOutcome::Unchanged))

View File

@ -49,6 +49,46 @@ pub fn is_recovery_complete() -> bool {
RECOVERY_COMPLETE.load(Ordering::SeqCst)
}
// ── Pending boot-start tracking ─────────────────────────────────────────
// Containers that boot recovery / the reconciler is about to start (or is
// starting right now). The package scanner overlays these as `Restarting`
// instead of the raw podman `Stopped`/`Exited`, so a freshly rebooted node
// doesn't tell the user their apps are "Stopped" while the sequential
// recovery pass (3s stagger + up to minutes for heavyweights like bitcoin)
// is still working through the queue. Writers register names when a pass
// begins and remove each name once its start attempt finishes, whatever
// the outcome — a container that truly failed goes back to showing its
// real state on the next scan.
static PENDING_BOOT_STARTS: std::sync::LazyLock<std::sync::RwLock<std::collections::HashSet<String>>> =
std::sync::LazyLock::new(|| std::sync::RwLock::new(std::collections::HashSet::new()));
/// Register container/app names an active recovery or reconcile pass
/// intends to start.
pub fn pending_boot_starts_add<I: IntoIterator<Item = String>>(names: I) {
if let Ok(mut set) = PENDING_BOOT_STARTS.write() {
set.extend(names);
}
}
/// A start attempt for `name` finished (success or failure) — stop
/// overlaying it.
pub fn pending_boot_start_done(name: &str) {
if let Ok(mut set) = PENDING_BOOT_STARTS.write() {
set.remove(name);
}
}
/// Whether `name` (a container name or scanner app id) is queued for a
/// boot/reconcile start. Container names may carry an `archy-` prefix the
/// scanner strips when deriving app ids, so check both forms.
pub fn is_pending_boot_start(name: &str) -> bool {
let Ok(set) = PENDING_BOOT_STARTS.read() else {
return false;
};
set.contains(name) || set.contains(&format!("archy-{name}"))
}
// ── User-stopped tracking ───────────────────────────────────────────────
// When a user explicitly stops a container via the UI, we record it here
// so crash recovery and health monitor don't auto-restart it.
@ -178,10 +218,17 @@ pub async fn check_for_crash(data_dir: &Path) -> Result<Option<Vec<RunningContai
old_pid
);
// Check if that PID is actually still running (zombie/stuck process)
// Check if that PID is actually still running (zombie/stuck process).
// Guard against PID reuse: after a reboot the old PID often belongs to an
// unrelated process (or, before the main.rs ordering fix, to OURSELVES) —
// only treat it as "previous instance still alive" if it's a live process
// that is not us and whose cmdline looks like the archipelago binary.
if !old_pid.is_empty() {
if let Ok(pid) = old_pid.parse::<u32>() {
if is_process_running(pid) {
if pid != std::process::id()
&& is_process_running(pid)
&& process_is_archipelago(pid)
{
warn!(
"Previous process (PID {}) is still running — not a crash, skipping recovery",
pid
@ -311,6 +358,8 @@ pub async fn recover_containers(containers: &[RunningContainerRecord]) -> Recove
failed: Vec::new(),
};
pending_boot_starts_add(containers.iter().map(|r| r.name.clone()));
for (i, record) in containers.iter().enumerate() {
info!(
"Recovering container: {} (image: {})",
@ -373,6 +422,7 @@ pub async fn recover_containers(containers: &[RunningContainerRecord]) -> Recove
if !started {
report.failed.push(record.name.clone());
}
pending_boot_start_done(&record.name);
}
report
@ -391,6 +441,16 @@ fn is_process_running(pid: u32) -> bool {
std::path::Path::new(&format!("/proc/{}", pid)).exists()
}
/// Whether the process at `pid` looks like an archipelago instance. Used to
/// tell "the previous instance is genuinely still alive" apart from PID
/// reuse by an unrelated process after a reboot.
fn process_is_archipelago(pid: u32) -> bool {
match std::fs::read(format!("/proc/{pid}/cmdline")) {
Ok(cmdline) => String::from_utf8_lossy(&cmdline).contains("archipelago"),
Err(_) => false,
}
}
/// Start all stopped containers that were previously installed.
/// Runs on every startup to ensure containers come back after clean reboots.
/// The crash recovery (PID-based) handles dirty shutdowns; this handles clean ones.
@ -425,16 +485,34 @@ async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport {
);
repair_stack_network_aliases(stack).await;
// Register the whole stack up front: the per-member dependency waits
// below can take minutes, and the UI should say "Restarting", not
// "Stopped", for members still queued behind them.
pending_boot_starts_add(
stack
.containers
.iter()
.filter(|c| !user_stopped.contains(**c))
.map(|c| (*c).to_string()),
);
for container in stack.containers {
if user_stopped.contains(*container) {
info!("Skipping user-stopped container: {}", container);
continue;
}
match container_state(container).await {
Some(state) if state == "running" => continue,
let state = container_state(container).await;
match state {
Some(state) if state == "running" => {
pending_boot_start_done(container);
continue;
}
Some(_) => {}
None => continue,
None => {
pending_boot_start_done(container);
continue;
}
}
repair_stack_network_aliases(stack).await;
@ -446,6 +524,7 @@ async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport {
} else {
report.failed.push((*container).to_string());
}
pending_boot_start_done(container);
}
}

View File

@ -98,11 +98,15 @@ async fn main() -> Result<()> {
let startup_start = std::time::Instant::now();
crash_recovery::init_start_time();
// Initialize tracing
// Initialize tracing. Default to `info`: production units don't set
// RUST_LOG, and the old `archipelago=debug` default flooded journald
// with per-request debug lines ("RPC method: …", cookie-flag notes) —
// part of a >1 GB/day journal on a fresh node. Set RUST_LOG (e.g.
// RUST_LOG=archipelago=debug) to get debug logs back when debugging.
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "archipelago=debug,info".into()),
.unwrap_or_else(|_| "info".into()),
)
.init();
@ -149,13 +153,18 @@ async fn main() -> Result<()> {
);
}
// Write PID marker early so we can detect crashes on next startup
// Check for a crash marker BEFORE writing our own. The old order wrote
// the marker first, so the check always read the CURRENT process's PID,
// found it alive, and skipped recovery — on every boot, forever.
let crash_containers = crash_recovery::check_for_crash(&config.data_dir).await;
// Now mark this instance as running so the next startup can detect a crash.
crash_recovery::write_pid_marker(&config.data_dir).await?;
// Run crash recovery before starting the manifest reconciler. Both paths
// mutate Podman; running them concurrently can corrupt transient runtime
// state and leave netavark/conmon unable to start containers.
match crash_recovery::check_for_crash(&config.data_dir).await {
match crash_containers {
Ok(Some(containers)) => {
info!(
"🔧 Recovering {} containers from previous crash...",

View File

@ -1203,6 +1203,21 @@ fn merge_preserving_transitional(
}
}
/// Package ids whose `Restarting` state was written by the scanner's
/// pending-boot-start overlay (not by an RPC restart task). For these, the
/// scan is the owner: once podman reports a settled state and the id is no
/// longer queued for a boot start, the fresh state wins immediately instead
/// of being preserved for the transitional-stuck timeout.
static SCANNER_RESTARTING: std::sync::LazyLock<std::sync::Mutex<std::collections::HashSet<String>>> =
std::sync::LazyLock::new(|| std::sync::Mutex::new(std::collections::HashSet::new()));
fn take_scanner_restarting(id: &str) -> bool {
SCANNER_RESTARTING
.lock()
.map(|mut set| set.remove(id))
.unwrap_or(false)
}
fn is_podman_scan_timeout(error: &anyhow::Error) -> bool {
let msg = format!("{:#}", error);
msg.contains("podman ps") && msg.contains("timed out")
@ -1223,6 +1238,25 @@ async fn scan_and_update_packages(
pkg.state = crate::data_model::PackageState::Stopped;
pkg.exit_code = None;
}
// A down container that boot recovery / the reconciler is queued to
// start is "Restarting", not "Stopped" — after a reboot the sequential
// recovery pass can take minutes to reach heavyweights, and telling
// the user their app stopped when it's about to come back is wrong.
// Ids overlaid here are recorded in SCANNER_RESTARTING so the merge
// below knows this Restarting is scanner-authored (resolve it as soon
// as podman reports a settled state) and not owned by an RPC restart
// task (whose transitional state must be preserved).
if matches!(
pkg.state,
crate::data_model::PackageState::Stopped | crate::data_model::PackageState::Exited
) && crate::crash_recovery::is_pending_boot_start(id)
{
pkg.state = crate::data_model::PackageState::Restarting;
pkg.exit_code = None;
if let Ok(mut set) = SCANNER_RESTARTING.lock() {
set.insert(id.clone());
}
}
}
normalize_reachable_package_health(&mut packages).await;
@ -1273,6 +1307,19 @@ async fn scan_and_update_packages(
absence_tracker.remove(id);
let existing = merged.get(id);
let overwrite = match existing {
// Scanner-authored Restarting (the pending-boot-start overlay)
// resolves as soon as the fresh scan reports anything else: the
// scan is its owner — no RPC task will ever write a final state
// back. Without this, a successfully recovered container would
// sit wedged in "Restarting" until the 20-minute stuck timeout.
Some(existing_entry)
if existing_entry.state == crate::data_model::PackageState::Restarting
&& pkg.state != crate::data_model::PackageState::Restarting
&& take_scanner_restarting(id) =>
{
transitional_since.remove(id);
true
}
Some(existing_entry) if is_transitional(&existing_entry.state) => {
let entered = *transitional_since.entry(id.clone()).or_insert(now);
let timeout = transitional_stuck_timeout(&existing_entry.state);

View File

@ -0,0 +1,142 @@
# Handover — fresh-ISO feedback bug-bash (2026-07-02)
**For: the agent building the next ISO + fleet deploy.** All fixes below are
**uncommitted in this working tree** (per the user's flow: you audit, build the
ISO, deploy). Source feedback: user's fresh ISO install on a Framework
(11th-gen Tiger Lake) machine, node `192.168.1.81` (SSH `archipelago` /
`archipelago`). Diagnostic bundle: `/home/archipelago/incoming-logs/node-logs-192.168.1.81/`.
## ⚠️ Outstanding user request for the deploy
- **Change .81's web-UI password to `ThisIsWeb54321@`** — the user forgot the
current one. Node was unreachable from .116 during this session (flaky WiFi
AP, IP flapped .68↔.81). Do this during deploy (SSH works from the user's
machine; `archipelago`/`archipelago`).
## What changed (by file)
### Backend (core/archipelago/src) — builds clean, targeted tests pass
- `api/handler/websocket.rs`**subscribe BEFORE initial snapshot** (the
"everything needs ctrl-r" root cause: broadcasts in the snapshot→subscribe
gap were silently lost; a stale client never learned containers-scanned).
- `main.rs` — crash check now runs BEFORE writing the PID marker (**crash
recovery had never run on any node** — it always saw its own PID and
skipped); tracing default demoted debug→info (journal volume).
- `crash_recovery.rs` — PID-reuse guard (`process_is_archipelago`); new
**pending-boot-starts registry** (names queued for recovery/reconcile) with
writers in `recover_containers` + stack recovery.
- `server.rs` — scanner overlays Stopped/Exited → **Restarting** for
pending-boot-start ids (user ask: "status should be restarting if they are
being restarted"); `SCANNER_RESTARTING` ownership set so scanner-authored
Restarting resolves immediately instead of wedging in the 20-min
transitional-preserve.
- `container/prod_orchestrator.rs` — reconcile pass + `adopt_existing`
register/deregister pending boot-starts; LND pre-start hook passes detected
`bitcoin_host()` (Knots vs Core) into `lnd::ensure_config`; new
`fedimint-clientd` pre-start hook (mkdir + chown 1000:1000 of
`/var/lib/archipelago/fmcd` — self-heals the crash-loop).
- `container/lnd.rs``ensure_config(paths, rpc_pass, bitcoin_host)`;
bitcoind.rpchost no longer hardcoded `bitcoin-knots`; drift check rewrites
host changes; +unit test `ensure_config_repairs_bitcoin_host_drift`.
- `api/rpc/package/dependencies.rs` — bounded **dependency wait**
(`wait_for_install_deps`, 36×5s): installed-but-starting deps wait with
"Waiting for Bitcoin to start…" on the card; not-installed deps fail fast
with `DependencyGateError` marker; +5 unit tests.
- `api/rpc/package/install.rs`, `stacks.rs` — call sites wired to
`gate_install_deps` (lnd/electrumx/mempool/btcpay).
- `api/rpc/package/async_lifecycle.rs``DependencyGateError` removes the
optimistic entry (**no more phantom "Stopped" LND tile**) + pushes an Error
notification with the reason.
- `api/rpc/package/progress.rs``set_install_message` helper.
- `api/rpc/seed_rpc.rs``save_pending_seed_encrypted`; seed.restore also
stashes the mnemonic; `auth.rs` — **auth.setup persists the encrypted seed
backup** (recovery-phrase reveal previously failed on EVERY node because
nothing ever wrote `master_seed.enc`).
- `api/rpc/middleware.rs` — sanitizer allowlist extended (seed/2FA/auth
errors reach the user instead of "Check server logs"); +2 tests.
- `bitcoin_status.rs` — friendly status for "connection reset" (bitcoind
starting); raw URL/os-error chains no longer shown; +3 tests.
- `bootstrap.rs` — journald drop-in self-heal (OTA nodes get log caps);
bitcoin.conf printtoconsole heal. (Log-spam agent's work; verified.)
- `api/rpc/package/config.rs` — bitcoin args `-printtoconsole=0`.
### Manifests / scripts / configs
- `apps/lnd/manifest.yml` — BITCOIND_HOST now `derived_env {{BITCOIN_HOST}}`.
- `apps/bitcoin-knots/manifest.yml`, `apps/bitcoin-core/manifest.yml`
`-printtoconsole=0` (90.6% of the journal was IBD UpdateTip spam;
debug.log in the datadir keeps full logs).
- `scripts/first-boot-containers.sh` — chown 1000:1000 of
`/var/lib/archipelago/fmcd` in BOTH fmcd blocks (root-owned dir was the
fedimint-clientd "Permission denied os error 13" crash-loop);
printtoconsole=0.
- `scripts/container-doctor.sh`, `scripts/reconcile-containers.sh`
printtoconsole=0.
- `image-recipe/configs/journald-archipelago.conf` (NEW) — SystemMaxUse=500M,
rate limits; baked by ISO builder + bootstrap self-heal.
- `image-recipe/configs/nginx-archipelago.conf``/assets/` 404s no longer
cacheable (the `always` immutable header could pin a missing background for
a YEAR); HTTPS block gained the missing `/assets/` location (was silently
serving index.html as images).
- `image-recipe/configs/archipelago-kiosk.service` — MemoryMax 1500→2800M,
MemoryHigh 1200→2200M (kiosk was riding reclaim-throttle = the lag).
- `image-recipe/_archived/build-auto-installer-iso.sh` — kiosk launcher/service
now spliced from `image-recipe/configs/` at build time (was a stale inline
heredoc that force-disabled GPU); **+ `firmware-intel-graphics` +
`firmware-amd-graphics`** (Debian trixie split the i915 DMC blobs out of
firmware-misc-nonfree; the .81 kernel logged tgl_dmc missing).
### Frontend (neode-ui) — vue-tsc clean, vitest green
- `views/Login.vue` — Enter in field 1 → focus confirm; Enter in confirm →
submit; submit button always clickable (shows inline mismatch/length error
instead of being silently disabled); errors clear on input; **Restart
Onboarding needs a confirming second click** (5s window) — this button is
the likely cause of the "onboarding restarted after mismatch" report.
+`login.restartConfirm` key in en/es locales.
- `stores/sync.ts` — 30s staleness reconciliation (server.get-state) while
connected; already-connected fast path now refetches too.
- `composables/useContainersScanTimeout.ts` (NEW, +tests) — 20s escape hatch;
wired into `Apps.vue` / `Discover.vue` / `Marketplace.vue`; fresh empty node
reaches the real "no apps yet" empty state; "Checking…" can never persist.
- Backgrounds: 10 heaviest bg JPEGs → **WebP q90** (9.4MB→6.6MB; refs updated
in OnboardingWrapper/Dashboard/useRouteTransitions); 7 remaining images
stayed JPEG (WebP came out LARGER on those — noisy sources; deliberate).
- `public/assets/video/video-intro.mp4` — re-encoded CRF20 (SSIM 0.988) with
**+faststart** (moov was at EOF → browser had to download all 15MB before
playing = the intro lag). 12.7MB now, streams immediately.
- LND icon: stale dist artifact; any fresh `npm run build` ships
`app-icons/lnd.png` correctly.
## Verification done here
- `cargo build -p archipelago` + `cargo check` clean; targeted tests
(bitcoin_status, middleware sanitize, dep_wait, lnd, crash_recovery,
boot_reconciler, bitcoin_host, prod_orchestrator lnd hooks): **52 passed,
0 failed**. Full suite: **898 passed, 0 failed, 1 ignored** (22s).
- `npm run build` green; dist verified: 10 bg-*.webp present, `lnd.png`
icon present, `restartConfirm` string in bundle, optimized faststart
video (12,740,782 bytes) in place. Note: main had a latent build breaker
(unused template ref in `Web5ConnectedNodes.vue` from commit 8256fde1,
vue-tsc TS6133) — fixed here by removing the dead ref/binding; without
this fix `npm run build` fails on current main.
- vitest: new composable tests + related suites pass.
- `bash -n` clean on all touched scripts; nginx conf live-verified by agent
(200/404/cache headers on both HTTP+HTTPS blocks).
- ISO kiosk splice byte-verified against configs/ by agent simulation.
## NOT done / left for you
1. **Full test-suite run + gate**: run the complete `cargo test` and (after
deploy) `tests/lifecycle/run-gate.sh` ON .228 per CLAUDE.md before any tag.
2. **Frontend bundle grep before shipping** (per memory/feedback): verify new
strings (e.g. `restartConfirm`, `bg-home.webp`) in the built tarball.
3. **Diagnostics collector** (`data-dir-listing.txt` = 15MB of podman overlay
internals; dmidecode empty) — collector script wasn't found in this repo
(likely lives on-node or in the user's collection script); fix when found.
4. **podman healthcheck cgroup EPERM spam** (1,250 journal errors, healthchecks
unreliable fleet-wide) — real open bug, Quadlet-phase territory, NOT fixed.
5. **DP link-training failures on .81** (display corruption) — likely
cable/dock/port hardware; firmware fix may help; tell user to try another
cable/port if corruption recurs.
6. **LoRa/RNode onboarding surface** — never scoped; user may want it as a
feature (mesh device-found modal exists only on Mesh page post-login).
7. The concurrent audit agent's files (`docs/1.8.0-RELEASE-HARDENING-PLAN.md`,
`core/.../trust/*`, parts of `bootstrap.rs`) are ALSO uncommitted here —
coordinate before committing; don't mix attribution.

View File

@ -354,6 +354,8 @@ RUN apt-get update && apt-get -y full-upgrade && apt-get install -y --no-install
firmware-iwlwifi \
firmware-misc-nonfree \
firmware-linux-nonfree \
firmware-intel-graphics \
firmware-amd-graphics \
intel-microcode \
amd64-microcode \
xorg \
@ -528,11 +530,15 @@ RUN mkdir -p /var/lib/archipelago/data /var/lib/archipelago/config /var/lib/arch
# Persist journalctl across reboots — without /var/log/journal systemd
# journal uses tmpfs and everything before the last boot is lost. We
# need the full history to diagnose first-boot / install / onboarding
# issues after the fact. Size cap keeps it from eating the disk.
# issues after the fact. Size cap keeps it from eating the disk, and the
# explicit rate limit stops a single chatty service (e.g. a container
# spamming conmon->journald during Bitcoin IBD) from drowning the journal.
# Keep this byte-identical to image-recipe/configs/journald-archipelago.conf —
# the backend self-heals the same file onto deployed nodes (bootstrap.rs).
RUN mkdir -p /var/log/journal && \
systemd-tmpfiles --create --prefix /var/log/journal 2>/dev/null || true && \
install -d -m 0755 /etc/systemd/journald.conf.d && \
printf '[Journal]\nStorage=persistent\nSystemMaxUse=500M\nRuntimeMaxUse=100M\nForwardToSyslog=no\n' > /etc/systemd/journald.conf.d/10-archipelago-persistent.conf
printf '[Journal]\nStorage=persistent\nSystemMaxUse=500M\nRuntimeMaxUse=100M\nForwardToSyslog=no\nRateLimitIntervalSec=30s\nRateLimitBurst=10000\n' > /etc/systemd/journald.conf.d/10-archipelago-persistent.conf
# Clean up
RUN apt-get clean && \
@ -2651,98 +2657,48 @@ RestartSec=5
WantedBy=multi-user.target
CLAUDESVC
# Kiosk mode — X11 + Chromium fullscreen on attached display
# Not enabled by default; toggle via: sudo archipelago-kiosk enable/disable
cat > /mnt/target/usr/local/bin/archipelago-kiosk-launcher <<'KIOSKLAUNCHER'
#!/bin/bash
# Start X server on VT7 (VT1 stays on MOTD/console)
/usr/bin/Xorg :0 vt7 -nolisten tcp -keeptty &
XPID=$!
sleep 3
INSTALLER_SCRIPT
# Switch to kiosk display
chvt 7 2>/dev/null || true
if ! kill -0 $XPID 2>/dev/null; then
echo 'ERROR: Xorg failed to start'
# -----------------------------------------------------------------------------
# Kiosk launcher + systemd service: spliced into auto-install.sh at BUILD time
# from image-recipe/configs/ — the single source of truth (the same files are
# embedded in the Rust binary via include_str! in core/archipelago/src/bootstrap.rs
# and self-healed onto nodes by ensure_kiosk_hardened()).
#
# A previous inline heredoc copy here had silently diverged (unconditional
# --disable-gpu, no CPU/memory limits, VT7 scheme) and shipped stale kiosk
# behavior on fresh ISOs. Never re-inline these payloads.
# -----------------------------------------------------------------------------
KIOSK_LAUNCHER_SRC="$SCRIPT_DIR/../configs/archipelago-kiosk-launcher.sh"
KIOSK_SERVICE_SRC="$SCRIPT_DIR/../configs/archipelago-kiosk.service"
for _kiosk_src in "$KIOSK_LAUNCHER_SRC" "$KIOSK_SERVICE_SRC"; do
if [ ! -f "$_kiosk_src" ]; then
echo "ERROR: kiosk config file missing: $_kiosk_src" >&2
echo " The ISO must ship the maintained kiosk launcher/service from" >&2
echo " image-recipe/configs/ — refusing to build (no stale fallback)." >&2
exit 1
fi
done
# Guard: payloads must not contain the heredoc terminators we wrap them in.
if grep -qx 'KIOSKLAUNCHER' "$KIOSK_LAUNCHER_SRC" || grep -qx 'KIOSKSVC' "$KIOSK_SERVICE_SRC"; then
echo "ERROR: kiosk config contains a reserved heredoc terminator line (KIOSKLAUNCHER/KIOSKSVC)" >&2
exit 1
fi
{
echo "# Kiosk mode — X11 + Chromium fullscreen on attached display"
echo "# Not enabled by default; toggle via: sudo archipelago-kiosk enable/disable"
echo "# Payloads spliced at ISO-build time from image-recipe/configs/ (source of truth)."
echo "cat > /mnt/target/usr/local/bin/archipelago-kiosk-launcher <<'KIOSKLAUNCHER'"
cat "$KIOSK_LAUNCHER_SRC"
echo "KIOSKLAUNCHER"
echo "chmod +x /mnt/target/usr/local/bin/archipelago-kiosk-launcher"
echo ""
echo "cat > /mnt/target/etc/systemd/system/archipelago-kiosk.service <<'KIOSKSVC'"
cat "$KIOSK_SERVICE_SRC"
echo "KIOSKSVC"
} >> "$ARCH_DIR/auto-install.sh"
export DISPLAY=:0
export HOME=/home/archipelago
xhost +SI:localuser:archipelago 2>/dev/null
xset s off 2>/dev/null
xset -dpms 2>/dev/null
xset s noblank 2>/dev/null
unclutter -idle 3 -root &
while true; do
# Get screen resolution for window sizing
SCREEN_RES=$(xdpyinfo 2>/dev/null | awk '/dimensions:/{print $2}')
SCREEN_RES=${SCREEN_RES:-1920x1080}
sudo -u archipelago env DISPLAY=:0 HOME=/home/archipelago chromium \
--kiosk \
--start-fullscreen \
--start-maximized \
--window-position=0,0 \
--window-size=${SCREEN_RES/x/,} \
--app=http://localhost/kiosk \
--noerrdialogs \
--disable-infobars \
--disable-translate \
--no-first-run \
--check-for-update-interval=31536000 \
--disable-features=TranslateUI,PasswordManagerOnboarding,AutofillServerCommunication,PasswordManagerEnabled \
--disable-session-crashed-bubble \
--disable-save-password-bubble \
--disable-suggestions-service \
--password-store=basic \
--disable-component-update \
--credentials_enable_service=false \
--disable-gpu \
--disable-breakpad \
--disable-metrics \
--disable-metrics-reporting \
--metrics-recording-only \
--disable-domain-reliability \
--disable-background-networking \
--disable-background-timer-throttling \
--disable-backgrounding-occluded-windows \
--user-data-dir=/var/lib/archipelago/chromium-kiosk
sleep 3
done
kill $XPID 2>/dev/null
KIOSKLAUNCHER
chmod +x /mnt/target/usr/local/bin/archipelago-kiosk-launcher
cat > /mnt/target/etc/systemd/system/archipelago-kiosk.service <<'KIOSKSVC'
[Unit]
Description=Archipelago Kiosk (X11 + Chromium)
After=archipelago.service systemd-user-sessions.service network-online.target
Wants=archipelago.service network-online.target
ConditionPathExists=/usr/local/bin/archipelago-kiosk-launcher
Conflicts=getty@tty1.service
[Service]
Type=simple
# First-boot health-poll window is 300s (150 × 2s). Slow hardware
# (e.g. the atom-class box at .198) was blowing past the old 60s /
# 120s window, so Chromium launched against a not-yet-ready backend
# and showed a blank window that only recovered on reboot. At 300s
# even the unbundled-FileBrowser-pull + archipelago state sync + frontend
# settle fits with headroom. TimeoutStartSec is bumped in lockstep.
ExecStartPre=/bin/bash -c 'for i in $(seq 1 150); do curl -sf http://localhost/health >/dev/null 2>&1 && exit 0; sleep 2; done; exit 0'
ExecStart=/usr/local/bin/archipelago-kiosk-launcher
TimeoutStartSec=360
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
KIOSKSVC
cat >> "$ARCH_DIR/auto-install.sh" <<'INSTALLER_SCRIPT'
# Toggle script: sudo archipelago-kiosk enable|disable|status
cat > /mnt/target/usr/local/bin/archipelago-kiosk <<'KIOSKTOGGLE'
@ -2790,8 +2746,8 @@ case "${1:-status}" in
echo " status — Show current mode"
echo ""
echo "Keyboard shortcuts (from terminal):"
echo " Ctrl+Alt+F7 — Switch to kiosk display"
echo " Ctrl+Alt+F1 — Switch to terminal"
echo " Ctrl+Alt+F1 — Kiosk display (when enabled; console login when disabled)"
echo " Ctrl+Alt+F2 — Text console"
exit 1
;;
esac

View File

@ -27,8 +27,12 @@ RestartSec=5
# also binds the chromium/Xorg children in this unit's cgroup.
Delegate=yes
CPUQuota=75%
MemoryMax=1500M
MemoryHigh=1200M
# Raised from 1500M/1200M: a Framework (Tiger Lake) kiosk sat at 806M used /
# 1.1G peak, riding the old MemoryHigh reclaim-throttle line — the throttling
# itself was the perceived UI lag. Keep Max well above real peaks; High stays
# the soft reclaim line so a runaway kiosk still can't take the machine down.
MemoryMax=2800M
MemoryHigh=2200M
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,7 @@
[Journal]
Storage=persistent
SystemMaxUse=500M
RuntimeMaxUse=100M
ForwardToSyslog=no
RateLimitIntervalSec=30s
RateLimitBurst=10000

View File

@ -115,11 +115,19 @@ server {
# Versioned Vite assets must never fall through to index.html. During OTA
# a browser can keep an old HTML shell that references now-removed hashed
# chunks; returning HTML for /assets/*.js triggers strict MIME failures.
# A real 404 plus immutable/no-cache split lets the app/browser recover on
# refresh without caching the wrong content type.
# The immutable header must NOT use `always`: with `always` a transient
# 404 (e.g. mid web-ui swap on first boot) gets cached by the browser for
# a year and the asset stays "missing" until a hard cache clear. Without
# `always` the header applies only to 2xx/3xx; 404s are routed to a
# named location that marks them no-store so the browser retries.
location /assets/ {
try_files $uri =404;
add_header Cache-Control "public, max-age=31536000, immutable" always;
add_header Cache-Control "public, max-age=31536000, immutable";
error_page 404 = @asset_missing;
}
location @asset_missing {
add_header Cache-Control "no-store" always;
return 404;
}
location ~* ^/(registerSW\.js|sw\.js|workbox-[^/]+\.js)$ {
@ -994,6 +1002,19 @@ server {
try_files $uri =404;
}
# Versioned Vite assets must never fall through to index.html (mirrors the
# HTTP block). No `always` on the immutable header: a transient 404 must
# not be cached for a year — 404s go to @asset_missing (no-store) instead.
location /assets/ {
try_files $uri =404;
add_header Cache-Control "public, max-age=31536000, immutable";
error_page 404 = @asset_missing;
}
location @asset_missing {
add_header Cache-Control "no-store" always;
return 404;
}
location / {
try_files $uri $uri/ /index.html;
}

View File

@ -31,13 +31,13 @@ To change the intro splash and dashboard tab backgrounds **without touching any
| Filename | Tab |
|----------|-----|
| **`bg-home.jpg`** | Home |
| **`bg-home.webp`** | Home |
| **`bg-web5.jpg`** | Web5 |
| **`bg-network.jpg`** | Server / Network |
| **`bg-settings.jpg`** | Settings |
| **`bg-myapps.jpg`** | My Apps |
| **`bg-appstore.jpg`** | App Store / Marketplace |
| **`bg-cloud.jpg`** | Cloud |
| **`bg-settings.webp`** | Settings |
| **`bg-myapps.webp`** | My Apps |
| **`bg-appstore.webp`** | App Store / Marketplace |
| **`bg-cloud.webp`** | Cloud |
| **`bg-intro.jpg`** | Default (also intro) |
| **`bg-intro-3.jpg`** | Alternate layer during transitions |
@ -47,12 +47,12 @@ To change the intro splash and dashboard tab backgrounds **without touching any
| Filename | Used for |
|----------|----------|
| **`bg-intro-1.jpg`** | Onboarding done, login |
| **`bg-intro-1.webp`** | Onboarding done, login |
| **`bg-intro-2.jpg`** | Onboarding verify |
| **`bg-intro-3.jpg`** | Onboarding path, dashboard transition layer |
| **`bg-intro-4.jpg`** | Onboarding options |
| **`bg-intro-5.jpg`** | Onboarding did |
| **`bg-intro-6.jpg`** | Onboarding backup |
| **`bg-intro-4.webp`** | Onboarding options |
| **`bg-intro-5.webp`** | Onboarding did |
| **`bg-intro-6.webp`** | Onboarding backup |
---
@ -62,16 +62,16 @@ To change the intro splash and dashboard tab backgrounds **without touching any
|-------|-----------|
| Intro image | `neode-ui/public/assets/img/bg-intro.jpg` |
| Intro video | `neode-ui/public/assets/video/video-intro.mp4` |
| Home | `neode-ui/public/assets/img/bg-home.jpg` |
| Home | `neode-ui/public/assets/img/bg-home.webp` |
| Web5 | `neode-ui/public/assets/img/bg-web5.jpg` |
| Network | `neode-ui/public/assets/img/bg-network.jpg` |
| Settings | `neode-ui/public/assets/img/bg-settings.jpg` |
| My Apps | `neode-ui/public/assets/img/bg-myapps.jpg` |
| App Store | `neode-ui/public/assets/img/bg-appstore.jpg` |
| Cloud | `neode-ui/public/assets/img/bg-cloud.jpg` |
| Settings | `neode-ui/public/assets/img/bg-settings.webp` |
| My Apps | `neode-ui/public/assets/img/bg-myapps.webp` |
| App Store | `neode-ui/public/assets/img/bg-appstore.webp` |
| Cloud | `neode-ui/public/assets/img/bg-cloud.webp` |
| Default | `neode-ui/public/assets/img/bg-intro.jpg` |
| Transition | `neode-ui/public/assets/img/bg-intro-3.jpg` |
| Intro 16 | `neode-ui/public/assets/img/bg-intro-1.jpg` … `bg-intro-6.jpg` |
| Intro 16 | `neode-ui/public/assets/img/bg-intro-1.webp` … `bg-intro-6.webp` (intro-2 and intro-3 remain `.jpg` — WebP came out larger for those) |
---

Binary file not shown.

Before

Width:  |  Height:  |  Size: 965 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 799 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 954 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 510 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 943 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 434 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 954 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 510 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 854 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 555 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 956 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 688 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 952 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 674 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 778 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 528 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 965 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 799 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 919 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 915 KiB

View File

@ -0,0 +1,63 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
import { ref, nextTick } from 'vue'
import { useContainersScanTimeout } from '../useContainersScanTimeout'
describe('useContainersScanTimeout', () => {
beforeEach(() => {
vi.useFakeTimers()
})
afterEach(() => {
vi.useRealTimers()
})
it('reflects the real scanned flag when it arrives before the timeout', async () => {
const scanned = ref(false)
const loaded = ref(true)
const { effectiveContainersScanned, scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000)
expect(effectiveContainersScanned.value).toBe(false)
scanned.value = true
await nextTick()
expect(effectiveContainersScanned.value).toBe(true)
expect(scanTimedOut.value).toBe(false)
})
it('does not start the timeout until initial data has loaded', async () => {
const scanned = ref(false)
const loaded = ref(false)
const { effectiveContainersScanned } = useContainersScanTimeout(scanned, loaded, 20_000)
vi.advanceTimersByTime(60_000)
expect(effectiveContainersScanned.value).toBe(false)
loaded.value = true
await nextTick()
vi.advanceTimersByTime(20_000)
expect(effectiveContainersScanned.value).toBe(true)
})
it('falls through after the timeout even if the flag never arrives', async () => {
const scanned = ref(false)
const loaded = ref(true)
const { effectiveContainersScanned, scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000)
vi.advanceTimersByTime(19_999)
expect(effectiveContainersScanned.value).toBe(false)
vi.advanceTimersByTime(1)
expect(effectiveContainersScanned.value).toBe(true)
expect(scanTimedOut.value).toBe(true)
})
it('cancels the escape hatch when the real flag arrives', async () => {
const scanned = ref(false)
const loaded = ref(true)
const { scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000)
vi.advanceTimersByTime(10_000)
scanned.value = true
await nextTick()
vi.advanceTimersByTime(60_000)
expect(scanTimedOut.value).toBe(false)
})
})

View File

@ -0,0 +1,56 @@
// Escape hatch for the "Checking containers…" / "Checking..." states.
//
// If the server never flips `containers-scanned` to true (e.g. the UI missed
// a websocket broadcast), views gating on it would spin forever. This starts
// a timeout once initial data has loaded and, if the scan flag is still false
// when it fires, treats the scan as complete so the UI falls through to its
// real empty/install states. A periodic store-level resync exists too — this
// is the belt-and-suspenders guarantee that the spinner is always bounded.
import { computed, getCurrentInstance, onBeforeUnmount, ref, watch, type Ref } from 'vue'
const DEFAULT_SCAN_TIMEOUT_MS = 20_000
export function useContainersScanTimeout(
containersScanned: Ref<boolean>,
hasLoadedInitialData: Ref<boolean>,
timeoutMs: number = DEFAULT_SCAN_TIMEOUT_MS,
) {
const scanTimedOut = ref(false)
let timer: ReturnType<typeof setTimeout> | undefined
function clearTimer(): void {
if (timer !== undefined) {
clearTimeout(timer)
timer = undefined
}
}
watch(
[containersScanned, hasLoadedInitialData],
([scanned, loaded]) => {
if (scanned) {
// Real signal arrived — cancel the escape hatch.
clearTimer()
scanTimedOut.value = false
return
}
if (loaded && timer === undefined && !scanTimedOut.value) {
timer = setTimeout(() => {
scanTimedOut.value = true
timer = undefined
}, timeoutMs)
}
},
{ immediate: true },
)
if (getCurrentInstance()) onBeforeUnmount(clearTimer)
/** True once the server reports the scan done OR the timeout has elapsed. */
const effectiveContainersScanned = computed(
() => containersScanned.value || scanTimedOut.value,
)
return { effectiveContainersScanned, scanTimedOut }
}

View File

@ -96,6 +96,7 @@
"serverStarting": "Server starting up...",
"replayIntro": "Replay Intro",
"onboarding": "Onboarding",
"restartConfirm": "Are you sure? This wipes onboarding progress — click again to confirm",
"resetting": "Resetting...",
"recoveryNote": "Password recovery requires SSH access to the server.",
"errorMinLength": "Password must be at least 8 characters",

View File

@ -96,6 +96,7 @@
"serverStarting": "El servidor est\u00e1 iniciando...",
"replayIntro": "Repetir introducci\u00f3n",
"onboarding": "Configuraci\u00f3n inicial",
"restartConfirm": "Are you sure? This wipes onboarding progress \u2014 click again to confirm",
"resetting": "Restableciendo...",
"recoveryNote": "La recuperaci\u00f3n de contrase\u00f1a requiere acceso SSH al servidor.",
"errorMinLength": "La contrase\u00f1a debe tener al menos 8 caracteres",

View File

@ -14,6 +14,9 @@ export const useSyncStore = defineStore('sync', () => {
const hasLoadedInitialData = ref(false)
let isWsSubscribed = false
let isWsConnecting = false
let isRefreshingState = false
let stalenessTimer: ReturnType<typeof setInterval> | null = null
const STALENESS_INTERVAL_MS = 30_000
// Computed
const serverInfo = computed(() => data.value?.['server-info'])
@ -21,6 +24,47 @@ export const useSyncStore = defineStore('sync', () => {
const peerHealth = computed<Record<string, boolean>>(() => data.value?.['peer-health'] || {})
const uiData = computed(() => data.value?.ui)
/**
* Refetch the full state snapshot via RPC and apply it through the same
* path as the post-connect fetch (revision/hasLoadedInitialData handling
* stays identical). Guarded against overlapping fetches.
*/
async function refreshStateFromServer(): Promise<void> {
if (isRefreshingState) return
isRefreshingState = true
try {
const freshState = await rpcClient.call<{ data: DataModel }>({ method: 'server.get-state' })
if (freshState?.data) {
data.value = freshState.data
hasLoadedInitialData.value = true
}
} catch {
// Non-fatal: WebSocket patches will still work
if (import.meta.env.DEV) console.warn('[Store] Failed to refresh state from server')
} finally {
isRefreshingState = false
}
}
/**
* Belt-and-suspenders against missed broadcasts: while the WebSocket is
* connected, periodically resync the full state so a dropped patch can
* never permanently strand the UI on stale data.
*/
function startStalenessReconciliation(): void {
if (stalenessTimer) return
stalenessTimer = setInterval(() => {
if (wsClient.isConnected()) void refreshStateFromServer()
}, STALENESS_INTERVAL_MS)
}
function stopStalenessReconciliation(): void {
if (stalenessTimer) {
clearInterval(stalenessTimer)
stalenessTimer = null
}
}
// Actions
async function connectWebSocket(): Promise<void> {
// Prevent concurrent connection attempts
@ -82,6 +126,10 @@ export const useSyncStore = defineStore('sync', () => {
if (import.meta.env.DEV) console.log('[Store] WebSocket already connected')
isConnected.value = true
isReconnecting.value = false
// Re-entrant call while already connected (e.g. after onboarding
// completes): resync state in case a broadcast was missed.
void refreshStateFromServer()
startStalenessReconciliation()
return
}
@ -89,16 +137,8 @@ export const useSyncStore = defineStore('sync', () => {
if (import.meta.env.DEV) console.log('[Store] WebSocket connected')
// Fetch fresh state after reconnect to avoid stale patch application
try {
const freshState = await rpcClient.call<{ data: DataModel }>({ method: 'server.get-state' })
if (freshState?.data) {
data.value = freshState.data
hasLoadedInitialData.value = true
}
} catch {
// Non-fatal: WebSocket patches will still work
if (import.meta.env.DEV) console.warn('[Store] Failed to fetch fresh state after reconnect')
}
await refreshStateFromServer()
startStalenessReconciliation()
// Connection state will be updated via the callback
if (wsClient.isConnected()) {
@ -158,6 +198,7 @@ export const useSyncStore = defineStore('sync', () => {
/** Reset sync state on logout — called by auth store */
function resetOnLogout(): void {
stopStalenessReconciliation()
data.value = null
hasLoadedInitialData.value = false
isWsSubscribed = false

View File

@ -397,6 +397,7 @@ import { useAppsActions } from './apps/useAppsActions'
import { validateSideloadRequest } from './apps/sideloadValidation'
import { useMarketplaceApp } from '@/composables/useMarketplaceApp'
import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs'
import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout'
import {
type AppsTab, filterEntriesForTab, isWebOnlyApp, isWebsitePackage, opensInTab, resolveRuntimeLaunchUrl,
WEB_ONLY_APPS, WEB_ONLY_APP_URLS, buildAllCategories, useCategoriesWithApps,
@ -461,7 +462,14 @@ const ALL_CATEGORIES = computed(() => buildAllCategories(t))
const SERVICE_CATEGORIES = computed(() => buildServiceCategories(t))
const livePackages = computed(() => store.packages || {})
const containersScanned = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] !== false)
// Field missing from server data = not scanned yet (consistent with Discover/Marketplace)
const containersScannedRaw = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false)
// Escape hatch: never show "Checking containers" forever after a timeout,
// fall through to the real (empty) state even if the scanned flag never arrives.
const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout(
containersScannedRaw,
computed(() => store.hasLoadedInitialData),
)
const {
packages: stablePackages,
isUsingLastKnownPackages,

View File

@ -178,12 +178,12 @@ const backgroundImage = computed(() => {
if (mapped) return mapped
// Detail/sub pages inherit their parent tab's background so they stay
// visually "inside" the section instead of snapping to the home backdrop.
if (route.path.startsWith('/dashboard/cloud/')) return 'bg-cloud.jpg'
if (route.path.startsWith('/dashboard/cloud/')) return 'bg-cloud.webp'
if (route.path.startsWith('/dashboard/web5/')) return 'bg-web5.jpg'
if (route.path.startsWith('/dashboard/server/')) return 'bg-web5.jpg'
if (route.path.startsWith('/dashboard/settings/')) return 'bg-settings.jpg'
if (route.path.startsWith('/dashboard/settings/')) return 'bg-settings.webp'
if (isDetailRoute(route.path)) return 'bg-intro.jpg'
return 'bg-home.jpg'
return 'bg-home.webp'
})
const isDarkRoute = computed(() => {

View File

@ -235,6 +235,7 @@ import { useMarketplaceApp } from '@/composables/useMarketplaceApp'
import { useAppLauncherStore } from '@/stores/appLauncher'
import { useToast } from '@/composables/useToast'
import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs'
import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout'
import { APP_STORE_SECTIONS } from './appStoreCategories'
import DiscoverHero from './discover/DiscoverHero.vue'
import FeaturedApps from './discover/FeaturedApps.vue'
@ -335,7 +336,13 @@ function retryNostr() {
}
const installedPackages = computed(() => store.data?.['package-data'] || {})
const containersScanned = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false)
const containersScannedRaw = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false)
// Escape hatch: never leave app cards on "Checking..." forever after a
// timeout, treat the scan as done so cards render their normal install state.
const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout(
containersScannedRaw,
computed(() => store.hasLoadedInitialData),
)
const allApps = computed(() => {

View File

@ -66,7 +66,8 @@
data-form-type="other"
class="w-full px-4 py-3 bg-transparent border border-white/20 rounded-lg text-white placeholder-white/40 focus:outline-none focus:border-white/40 focus:ring-1 focus:ring-white/20 transition-colors"
:placeholder="t('login.enterPasswordSetup')"
@keydown.enter="handleSetupWithSound"
@keydown.enter="confirmPasswordInputRef?.focus()"
@input="error = null"
:disabled="loading || formDisabled"
/>
</div>
@ -77,6 +78,7 @@
</label>
<input
id="setup-confirm-password"
ref="confirmPasswordInputRef"
v-model="confirmPassword"
type="password"
autocomplete="new-password"
@ -84,14 +86,16 @@
class="w-full px-4 py-3 bg-transparent border border-white/20 rounded-lg text-white placeholder-white/40 focus:outline-none focus:border-white/40 focus:ring-1 focus:ring-white/20 transition-colors"
:placeholder="t('login.confirmPasswordPlaceholder')"
@keydown.enter="handleSetupWithSound"
@input="error = null"
:disabled="loading || formDisabled"
/>
</div>
<button
@click="handleSetupWithSound"
:disabled="loading || formDisabled || !password || password.length < 8 || password !== confirmPassword"
:disabled="loading || formDisabled"
class="w-full glass-button px-6 py-3 rounded-lg font-medium transition-all hover:bg-black/70 hover:border-white/30 disabled:opacity-50 disabled:cursor-not-allowed"
:class="{ 'opacity-60': !password || password.length < 8 || password !== confirmPassword }"
>
<span v-if="!loading">{{ t('login.setupButton') }}</span>
<span v-else class="flex items-center justify-center">
@ -213,9 +217,10 @@
<button
@click="restartOnboarding"
:disabled="isResettingOnboarding"
class="text-xs text-white/50 hover:text-white/70 transition-colors underline-offset-2 hover:underline disabled:opacity-50 disabled:cursor-not-allowed"
class="text-xs transition-colors underline-offset-2 hover:underline disabled:opacity-50 disabled:cursor-not-allowed"
:class="confirmingRestartOnboarding ? 'text-orange-400 hover:text-orange-300' : 'text-white/50 hover:text-white/70'"
>
{{ isResettingOnboarding ? t('login.resetting') : t('login.onboarding') }}
{{ isResettingOnboarding ? t('login.resetting') : (confirmingRestartOnboarding ? t('login.restartConfirm') : t('login.onboarding')) }}
</button>
</template>
</div>
@ -260,6 +265,7 @@ const requiresTotp = ref(false)
const totpCode = ref('')
const useBackupCode = ref(false)
const totpInputRef = ref<HTMLInputElement | null>(null)
const confirmPasswordInputRef = ref<HTMLInputElement | null>(null)
// Server startup state
const serverReady = ref(false)
@ -338,6 +344,7 @@ onBeforeUnmount(() => {
removeUnlockListeners()
if (startupPollTimer) clearTimeout(startupPollTimer)
if (startupProgressInterval) clearInterval(startupProgressInterval)
if (confirmRestartTimer) clearTimeout(confirmRestartTimer)
})
onMounted(async () => {
@ -536,9 +543,26 @@ function replayIntro() {
}
const isResettingOnboarding = ref(false)
const confirmingRestartOnboarding = ref(false)
let confirmRestartTimer: ReturnType<typeof setTimeout> | null = null
async function restartOnboarding() {
if (isResettingOnboarding.value) return
// First click arms a confirmation state; only a second explicit click restarts.
if (!confirmingRestartOnboarding.value) {
confirmingRestartOnboarding.value = true
if (confirmRestartTimer) clearTimeout(confirmRestartTimer)
confirmRestartTimer = setTimeout(() => {
confirmingRestartOnboarding.value = false
confirmRestartTimer = null
}, 5000)
return
}
if (confirmRestartTimer) {
clearTimeout(confirmRestartTimer)
confirmRestartTimer = null
}
confirmingRestartOnboarding.value = false
isResettingOnboarding.value = true
// Local-only reset no RPC needed since user isn't logged in.
// Onboarding pages are all public, so clearing localStorage is enough.

View File

@ -170,6 +170,7 @@ import { useMarketplaceApp } from '@/composables/useMarketplaceApp'
import { useAppLauncherStore } from '@/stores/appLauncher'
import { useToast } from '@/composables/useToast'
import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs'
import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout'
import { APP_STORE_CATEGORIES, APP_STORE_SECTIONS } from './appStoreCategories'
import MarketplaceAppCard from './marketplace/MarketplaceAppCard.vue'
import {
@ -286,9 +287,15 @@ const installedPackages = computed(() => {
return store.data?.['package-data'] || {}
})
const containersScanned = computed(() => {
const containersScannedRaw = computed(() => {
return store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false
})
// Escape hatch: never leave app cards on "Checking..." forever after a
// timeout, treat the scan as done so cards render their normal install state.
const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout(
containersScannedRaw,
computed(() => store.hasLoadedInitialData),
)
// Combine curated apps with Nostr relay-discovered apps
const allApps = computed(() => {

View File

@ -109,16 +109,16 @@ const useVideoBackground = computed(() => {
// Note: bg-intro.jpg is used for splash and /onboarding/intro for seamless transition
const routeBackgrounds: Record<string, string> = {
'/onboarding/intro': 'bg-intro.jpg', // Video will be used instead
'/onboarding/options': 'bg-intro-4.jpg',
'/onboarding/options': 'bg-intro-4.webp',
'/onboarding/path': 'bg-intro-3.jpg',
'/onboarding/seed': 'bg-intro-1.jpg',
'/onboarding/seed-verify': 'bg-intro-1.jpg',
'/onboarding/seed-restore': 'bg-intro-1.jpg',
'/onboarding/did': 'bg-intro-4.jpg',
'/onboarding/identity': 'bg-intro-1.jpg',
'/onboarding/backup': 'bg-intro-6.jpg',
'/onboarding/seed': 'bg-intro-1.webp',
'/onboarding/seed-verify': 'bg-intro-1.webp',
'/onboarding/seed-restore': 'bg-intro-1.webp',
'/onboarding/did': 'bg-intro-4.webp',
'/onboarding/identity': 'bg-intro-1.webp',
'/onboarding/backup': 'bg-intro-6.webp',
'/onboarding/verify': 'bg-intro-2.jpg',
'/onboarding/done': 'bg-intro-1.jpg',
'/onboarding/done': 'bg-intro-1.webp',
'/login': 'bg-intro.jpg' // Video loops from splash (same as intro)
}
@ -126,12 +126,12 @@ const routeBackgrounds: Record<string, string> = {
// identical on every logout. Cycles through bg-intro-1..6 using a
// counter persisted to localStorage so subsequent visits advance.
const LOGIN_BACKGROUNDS = [
'bg-intro-1.jpg',
'bg-intro-1.webp',
'bg-intro-2.jpg',
'bg-intro-3.jpg',
'bg-intro-4.jpg',
'bg-intro-5.jpg',
'bg-intro-6.jpg',
'bg-intro-4.webp',
'bg-intro-5.webp',
'bg-intro-6.webp',
]
function pickNextLoginBackground(): string {
try {
@ -307,7 +307,7 @@ watch(() => route.path, (newPath, oldPath) => {
// Login route: set background immediately, no zoom, no transition (glitch is always-on)
if (newPath === '/login') {
currentBackground.value = 'bg-intro-1.jpg'
currentBackground.value = 'bg-intro-1.webp'
isTransitioning.value = false
isGlitching.value = false
return

View File

@ -19,19 +19,19 @@ const WEB5_TAB_ORDER = ['/dashboard/web5', '/dashboard/cloud', '/dashboard/serve
/** Route-to-background image mapping */
export const ROUTE_BACKGROUNDS: Record<string, string> = {
'/dashboard': 'bg-home.jpg',
'/dashboard/': 'bg-home.jpg',
'/dashboard/apps': 'bg-myapps.jpg',
'/dashboard/discover': 'bg-appstore.jpg',
'/dashboard/marketplace': 'bg-appstore.jpg',
'/dashboard/cloud': 'bg-cloud.jpg',
'/dashboard/mesh': 'bg-mesh.jpg',
'/dashboard': 'bg-home.webp',
'/dashboard/': 'bg-home.webp',
'/dashboard/apps': 'bg-myapps.webp',
'/dashboard/discover': 'bg-appstore.webp',
'/dashboard/marketplace': 'bg-appstore.webp',
'/dashboard/cloud': 'bg-cloud.webp',
'/dashboard/mesh': 'bg-mesh.webp',
'/dashboard/server': 'bg-network.jpg',
'/dashboard/web5': 'bg-web5.jpg',
'/dashboard/server/federation': 'bg-web5.jpg',
'/dashboard/monitoring': 'bg-web5.jpg',
'/dashboard/fleet': 'bg-web5.jpg',
'/dashboard/settings': 'bg-settings.jpg',
'/dashboard/settings': 'bg-settings.webp',
'/dashboard/chat': 'bg-aiui.jpg',
}

View File

@ -269,7 +269,7 @@ rpcpassword=$BTC_RPC_PASS
rpcallowip=127.0.0.1/32
rpcallowip=10.88.0.0/16
listen=1
printtoconsole=1
printtoconsole=0
BCONF
log "Updated bitcoin.conf with full RPC settings"
fi

View File

@ -518,7 +518,7 @@ deploy_node() {
if [ -f /var/lib/archipelago/bitcoin/bitcoin.conf ]; then
if grep -q 'rpcbind' /var/lib/archipelago/bitcoin/bitcoin.conf 2>/dev/null; then
echo ' Cleaning old bitcoin.conf (conflicting rpcbind)...'
printf 'printtoconsole=1\n' | sudo tee /var/lib/archipelago/bitcoin/bitcoin.conf > /dev/null
printf 'printtoconsole=0\n' | sudo tee /var/lib/archipelago/bitcoin/bitcoin.conf > /dev/null
sudo chown 100101:100101 /var/lib/archipelago/bitcoin/bitcoin.conf 2>/dev/null
fi
fi

View File

@ -180,6 +180,10 @@ FBEOF
mkdir -p /var/lib/archipelago/fmcd
FMCD_PW_FILE=/var/lib/archipelago/fmcd/password
[ -s "$FMCD_PW_FILE" ] || head -c 24 /dev/urandom | base64 | tr -dc 'A-Za-z0-9' > "$FMCD_PW_FILE"
# This script runs as root but the container is rootless (container
# uid 0 → host 1000): a root-owned /data leaves fmcd crash-looping
# with "Permission denied (os error 13)". Match manifest data_uid.
chown -R 1000:1000 /var/lib/archipelago/fmcd
FMCD_PW="$(cat "$FMCD_PW_FILE")"
FMCD_DEFAULT_INVITE="fed11qgqyj3mfwfhksw309uuxywtxxfjrjc35xuexverpxdsnxcnrxucxvenzveskgc3kvvun2c34xp3k2ep38yunzdpexcekxe3hvd3rvvmx8pnrvdenx5mnzvtzqqqjqt0t6pc3s5z0ynqjw9s4njf6svwgu59kweawc0vvrddcjeemw6yyn4pcdp"
pull_with_fallback "${FMCD_IMAGE}"
@ -366,7 +370,9 @@ rpcauth=${RPCAUTH}
server=1
rpcallowip=0.0.0.0/0
listen=1
printtoconsole=1
# printtoconsole=0: journald log-volume fix — bitcoind's datadir debug.log
# already has everything; console duplication spammed journald during IBD.
printtoconsole=0
# ZMQ publishers for LND and other services that need real-time block/tx notifications
zmqpubrawblock=tcp://0.0.0.0:28332
zmqpubrawtx=tcp://0.0.0.0:28333
@ -649,7 +655,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -qE 'bitcoin-knots|arch
-v /var/lib/archipelago/bitcoin:/home/bitcoin/.bitcoin \
"${BITCOIN_KNOTS_IMAGE}" \
$BTC_EXTRA_ARGS \
-printtoconsole=1 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 -rpcthreads=16 -rpcworkqueue=256 2>>"$LOG"; then
-printtoconsole=0 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 -rpcthreads=16 -rpcworkqueue=256 2>>"$LOG"; then
log "Bitcoin Knots started"
else
log "Bitcoin Knots failed (may already exist)"
@ -1055,6 +1061,9 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q '^fedimint-clientd$'
mkdir -p /var/lib/archipelago/fmcd
FMCD_PW_FILE=/var/lib/archipelago/fmcd/password
[ -s "$FMCD_PW_FILE" ] || head -c 24 /dev/urandom | base64 | tr -dc 'A-Za-z0-9' > "$FMCD_PW_FILE"
# Root-created /data breaks the rootless container (uid 0 → host 1000):
# fmcd crash-loops with "Permission denied (os error 13)". Match data_uid.
chown -R 1000:1000 /var/lib/archipelago/fmcd
FMCD_PW="$(cat "$FMCD_PW_FILE")"
FMCD_DEFAULT_INVITE="fed11qgqyj3mfwfhksw309uuxywtxxfjrjc35xuexverpxdsnxcnrxucxvenzveskgc3kvvun2c34xp3k2ep38yunzdpexcekxe3hvd3rvvmx8pnrvdenx5mnzvtzqqqjqt0t6pc3s5z0ynqjw9s4njf6svwgu59kweawc0vvrddcjeemw6yyn4pcdp"
$DOCKER run -d --name fedimint-clientd --restart unless-stopped \

View File

@ -722,10 +722,11 @@ ensure_bitcoin_conf() {
hash=$(echo -n "$BITCOIN_RPC_PASS" | openssl dgst -sha256 -hmac "$salt" -hex 2>/dev/null | awk '{print $NF}')
rpcauth="${BITCOIN_RPC_USER}:${salt}\$${hash}"
# Only rpcauth + printtoconsole here — all other options are in SPEC_CUSTOM_ARGS
# to avoid duplicate bind conflicts
# to avoid duplicate bind conflicts. printtoconsole=0: datadir debug.log
# already has everything; console duplication spammed journald during IBD.
sudo tee "$BITCOIN_CONF" >/dev/null << BTCEOF
rpcauth=${rpcauth}
printtoconsole=1
printtoconsole=0
BTCEOF
info "Generated bitcoin.conf"
fi