diff --git a/apps/archy-btcpay-db/manifest.yml b/apps/archy-btcpay-db/manifest.yml index 2d75cc37..5fce05d8 100644 --- a/apps/archy-btcpay-db/manifest.yml +++ b/apps/archy-btcpay-db/manifest.yml @@ -1,7 +1,7 @@ app: id: archy-btcpay-db name: BTCPay Postgres - version: 15.17 + version: "15.17" description: Postgres backend for BTCPay and NBXplorer. container: diff --git a/apps/bitcoin-core/manifest.yml b/apps/bitcoin-core/manifest.yml index 6493ce93..7bd9d9f1 100644 --- a/apps/bitcoin-core/manifest.yml +++ b/apps/bitcoin-core/manifest.yml @@ -17,6 +17,13 @@ app: # the IBD sweet spot - 4GB on full nodes, 1GB on pruned. Container # --memory=8g (config.rs::get_memory_limit) leaves headroom for # mempool + connections. + # + # -printtoconsole=0: foreground bitcoind defaults console logging ON, + # which pushed every IBD "UpdateTip" line through conmon into journald + # (>1 GB/day on a fresh node). bitcoind still writes debug.log in the + # datadir (/var/lib/archipelago/bitcoin/debug.log, self-shrunk on + # restart) — use that for deep debugging; podman logs only carries + # entrypoint/startup errors. - >- BITCOIND="$(command -v bitcoind || true)"; if [ -z "$BITCOIND" ]; then @@ -36,9 +43,9 @@ app: RPC_TXRELAY_FLAGS="$RPC_TXRELAY_FLAGS -rpcauth=$RPC_TXRELAY_AUTH -rpcwhitelist=txrelay:sendrawtransaction,submitpackage,testmempoolaccept,getmempoolinfo,getrawmempool,getmempoolentry,getnetworkinfo,getblockchaininfo,getblockcount,getblockhash,getblock,getblockheader,getrawtransaction,gettxout,gettxspendingprevout,decoderawtransaction,decodescript,estimatesmartfee,uptime,ping,getconnectioncount,getpeerinfo,getindexinfo,getdeploymentinfo,getchaintips"; fi; if [ "${DISK_GB_VALUE:-0}" -lt 1000 ]; then - exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=1024 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; + exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=1024 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; else - exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; + exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; fi derived_env: - key: DISK_GB diff --git a/apps/bitcoin-knots/manifest.yml b/apps/bitcoin-knots/manifest.yml index 7cf9fa0f..a54e0a54 100644 --- a/apps/bitcoin-knots/manifest.yml +++ b/apps/bitcoin-knots/manifest.yml @@ -17,6 +17,13 @@ app: # the IBD sweet spot - 4GB on full nodes, 1GB on pruned. Container # --memory=8g (config.rs::get_memory_limit) leaves headroom for # mempool + connections. + # + # -printtoconsole=0: foreground bitcoind defaults console logging ON, + # which pushed every IBD "UpdateTip" line through conmon into journald + # (>1 GB/day on a fresh node). bitcoind still writes debug.log in the + # datadir (/var/lib/archipelago/bitcoin/debug.log, self-shrunk on + # restart) — use that for deep debugging; podman logs only carries + # entrypoint/startup errors. - >- BITCOIND="$(command -v bitcoind || true)"; if [ -z "$BITCOIND" ]; then @@ -36,9 +43,9 @@ app: RPC_TXRELAY_FLAGS="$RPC_TXRELAY_FLAGS -rpcauth=$RPC_TXRELAY_AUTH -rpcwhitelist=txrelay:sendrawtransaction,submitpackage,testmempoolaccept,getmempoolinfo,getrawmempool,getmempoolentry,getnetworkinfo,getblockchaininfo,getblockcount,getblockhash,getblock,getblockheader,getrawtransaction,gettxout,gettxspendingprevout,decoderawtransaction,decodescript,estimatesmartfee,uptime,ping,getconnectioncount,getpeerinfo,getindexinfo,getdeploymentinfo,getchaintips"; fi; if [ "${DISK_GB_VALUE:-0}" -lt 1000 ]; then - exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=2048 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; + exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=2048 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; else - exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; + exec "$BITCOIND" -datadir=/home/bitcoin/.bitcoin -noconf -printtoconsole=0 -server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 $RPC_HEADROOM $RPC_TXRELAY_FLAGS -rpcuser="$RPC_USER" -rpcpassword="$RPC_PASS"; fi derived_env: - key: DISK_GB diff --git a/apps/lnd/manifest.yml b/apps/lnd/manifest.yml index ec5e2ee4..446e34e8 100644 --- a/apps/lnd/manifest.yml +++ b/apps/lnd/manifest.yml @@ -8,6 +8,13 @@ app: image: 146.59.87.168:3000/lfg2025/lnd:v0.18.4-beta pull_policy: if-not-present network: archy-net + # BITCOIND_HOST must follow the node's actual Bitcoin container — Knots or + # Core — resolved at apply time from host facts. Hardcoding either breaks + # LND's chain backend connection on the other (lnd.conf is likewise + # resolved in lnd::ensure_config). + derived_env: + - key: BITCOIND_HOST + template: "{{BITCOIN_HOST}}" secret_env: - key: BITCOIND_RPCPASS secret_file: bitcoin-rpc-password @@ -45,7 +52,6 @@ app: options: [rw] environment: - - BITCOIND_HOST=bitcoin-knots - BITCOIND_RPCUSER=archipelago - NETWORK=mainnet diff --git a/core/archipelago/src/api/handler/websocket.rs b/core/archipelago/src/api/handler/websocket.rs index 7b11e724..5ea6990d 100644 --- a/core/archipelago/src/api/handler/websocket.rs +++ b/core/archipelago/src/api/handler/websocket.rs @@ -39,6 +39,17 @@ impl ApiHandler { let (mut tx, mut rx) = ws_stream.split(); + // Subscribe BEFORE taking the initial snapshot. Messages are full + // data dumps keyed by a monotonic revision, so a broadcast that + // races the snapshot is at worst a harmless duplicate/newer dump + // delivered right after — but subscribing after the snapshot send + // (the old order) let any update in that window vanish forever, + // since a tokio broadcast channel never delivers sends that + // predate subscribe(). That silently stuck clients (e.g. a fresh + // install's post-boot container scan) on a stale initial snapshot + // until a full page reload opened a new connection past the race. + let mut state_rx = state_manager.subscribe(); + let initial_msg = state_manager.get_initial_message().await; if let Ok(json_msg) = serde_json::to_string(&initial_msg) { if let Err(e) = tx.send(Message::Text(json_msg)).await { @@ -47,8 +58,6 @@ impl ApiHandler { } debug!("Sent initial data dump at revision {}", initial_msg.rev); } - - let mut state_rx = state_manager.subscribe(); let ping_interval = tokio::time::interval(tokio::time::Duration::from_secs(30)); tokio::pin!(ping_interval); let mut last_client_activity = Instant::now(); diff --git a/core/archipelago/src/api/rpc/auth.rs b/core/archipelago/src/api/rpc/auth.rs index 649a8d2a..5b275ce4 100644 --- a/core/archipelago/src/api/rpc/auth.rs +++ b/core/archipelago/src/api/rpc/auth.rs @@ -141,6 +141,19 @@ impl RpcHandler { self.auth_manager.setup_user(password).await?; tracing::info!("[onboarding] user setup complete"); + + // Persist the pending onboarding seed as the encrypted backup now that + // a passphrase (the login password) finally exists — otherwise "Reveal + // recovery phrase" has nothing to decrypt on this node, ever. + // Best-effort: a failure here must not break password setup. + match super::seed_rpc::save_pending_seed_encrypted(&self.config.data_dir, password).await { + Ok(true) => tracing::info!("[onboarding] encrypted seed backup saved"), + Ok(false) => tracing::info!( + "[onboarding] no pending mnemonic to back up (restored earlier or legacy node)" + ), + Err(e) => tracing::warn!("[onboarding] encrypted seed backup failed: {e:#}"), + } + Ok(serde_json::json!(true)) } diff --git a/core/archipelago/src/api/rpc/middleware.rs b/core/archipelago/src/api/rpc/middleware.rs index ff007f27..0c9bd6ae 100644 --- a/core/archipelago/src/api/rpc/middleware.rs +++ b/core/archipelago/src/api/rpc/middleware.rs @@ -77,6 +77,19 @@ pub(super) fn sanitize_error_message(msg: &str) -> String { "No wireless radio", "WiFi radio enabled but", "Missing required field", + // seed.reveal / auth flows — user-actionable, no internals to leak. + // Without these the sanitizer collapsed every reveal failure into + // "Operation failed. Check server logs." (which isn't even a crash). + "Incorrect", + "This node has no encrypted seed", + "A 2FA code is required", + "2FA is enabled but", + "Could not decrypt the saved seed", + "Could not unlock 2FA", + "No mnemonic available", + "No pending seed generation", + "Submitted words", + "Already set up", ]; for prefix in &user_facing_prefixes { if msg.starts_with(prefix) { @@ -96,6 +109,43 @@ pub(super) fn sanitize_error_message(msg: &str) -> String { "Operation failed. Check server logs for details.".to_string() } +#[cfg(test)] +mod sanitize_tests { + use super::sanitize_error_message; + + #[test] + fn seed_reveal_errors_pass_through() { + // Every user-actionable seed.reveal failure must reach the user — + // masking them as "Check server logs" sent a real user hunting a + // crash that never happened. + for msg in [ + "Incorrect password", + "This node has no encrypted seed backup, so the recovery phrase cannot be shown. It was only displayed once during setup.", + "A 2FA code is required to reveal the recovery phrase", + "2FA is enabled but no TOTP data found", + "Could not decrypt the saved seed. If you set a separate backup passphrase during setup, enter that passphrase.", + "Could not unlock 2FA with this password", + "No mnemonic available. Generate or restore a seed first.", + "Submitted words do not match generated seed", + "Already set up. Use auth.changePassword to change.", + ] { + assert_ne!( + sanitize_error_message(msg), + "Operation failed. Check server logs for details.", + "masked: {msg}" + ); + } + } + + #[test] + fn internal_errors_stay_generic() { + assert_eq!( + sanitize_error_message("thread panicked at src/foo.rs:42"), + "Operation failed. Check server logs for details." + ); + } +} + /// Derive a CSRF token from the session token via HMAC. /// Deterministic: same session token always produces the same CSRF token. /// Survives backend restarts because it depends only on the session token diff --git a/core/archipelago/src/api/rpc/package/async_lifecycle.rs b/core/archipelago/src/api/rpc/package/async_lifecycle.rs index 7d6ad6b9..17dd0d51 100644 --- a/core/archipelago/src/api/rpc/package/async_lifecycle.rs +++ b/core/archipelago/src/api/rpc/package/async_lifecycle.rs @@ -114,6 +114,31 @@ impl RpcHandler { Err(e) => { error!("package.install {} failed: {:#}", package_id_spawn, e); install_log(&format!("INSTALL FAIL: {} — {:#}", package_id_spawn, e)).await; + // Dependency-gate rejections happen BEFORE any resource + // (container/image/data dir) exists for this package, so + // keeping the optimistic entry would leave a phantom + // "Stopped" tile whose Start fails with `no such object` + // (the log-confirmed LND fresh-install failure). Remove + // the entry so the card reverts to installable, and + // surface the reason as a notification instead. + if let Some(gate) = e.downcast_ref::() + { + let (mut data, _) = handler.state_manager.get_snapshot().await; + data.package_data.remove(&package_id_spawn); + data.notifications.push(crate::data_model::Notification { + id: format!("install-deps-{package_id_spawn}"), + level: crate::data_model::NotificationLevel::Error, + title: format!("Could not install {package_id_spawn}"), + message: gate.to_string(), + timestamp: chrono::Utc::now().to_rfc3339(), + app_id: Some(package_id_spawn.clone()), + }); + while data.notifications.len() > 20 { + data.notifications.remove(0); + } + handler.state_manager.update_data(data).await; + return; + } // Don't remove the entry — that's what made the card // vanish from My Apps mid-install / between retry-loop // attempts (e.g. tailscale's entrypoint failure). Leave diff --git a/core/archipelago/src/api/rpc/package/config.rs b/core/archipelago/src/api/rpc/package/config.rs index 2b6240b1..214e3ec6 100644 --- a/core/archipelago/src/api/rpc/package/config.rs +++ b/core/archipelago/src/api/rpc/package/config.rs @@ -707,12 +707,17 @@ pub(super) async fn get_app_config( // effectively pinned at 2 by --cpus=2 (now removed). // -maxconnections=125 — default but explicit, so ops can // tune downward on bandwidth-constrained nodes. + // Log volume: -printtoconsole=0 — bitcoind already writes + // debug.log in the datadir (self-shrunk on restart); echoing it + // to stdout too pushed every IBD "UpdateTip" line through + // conmon into journald (>1 GB/day on a fresh node). Deep + // debugging uses /var/lib/archipelago/bitcoin/debug.log. Some(vec![ "-server=1".to_string(), "-rpcbind=0.0.0.0".to_string(), "-rpcallowip=0.0.0.0/0".to_string(), "-rpcport=8332".to_string(), - "-printtoconsole=1".to_string(), + "-printtoconsole=0".to_string(), "-datadir=/home/bitcoin/.bitcoin".to_string(), format!("-dbcache={}", bitcoin_dbcache_mb()), "-par=0".to_string(), diff --git a/core/archipelago/src/api/rpc/package/dependencies.rs b/core/archipelago/src/api/rpc/package/dependencies.rs index a04a50ee..05a89fa7 100644 --- a/core/archipelago/src/api/rpc/package/dependencies.rs +++ b/core/archipelago/src/api/rpc/package/dependencies.rs @@ -58,6 +58,7 @@ fn archival_bitcoin_required_message(package_id: &str) -> String { } /// Snapshot of which dependency services are currently running. +#[derive(Debug)] pub(super) struct RunningDeps { pub has_bitcoin: bool, pub has_electrumx: bool, @@ -227,6 +228,190 @@ pub(super) fn check_install_deps(package_id: &str, deps: &RunningDeps) -> Result } } +// --------------------------------------------------------------------------- +// Bounded dependency wait (install race fix) +// --------------------------------------------------------------------------- +// +// Confirmed race on fresh nodes: the user clicks "Install LND" while +// bitcoin-knots is itself still installing/starting. `check_install_deps` +// rejected instantly ("LND requires a running Bitcoin node…") even though +// Bitcoin came up 55s later. The fix: when the dependency is INSTALLED +// (container exists in `podman ps -a`, or the package state knows about it) +// but not Running yet, poll for up to DEP_WAIT_MAX_ATTEMPTS × DEP_WAIT_INTERVAL +// (~3 minutes) before failing, surfacing "Waiting for X to start…" via the +// install-progress message. If the dependency is not installed at all, fail +// fast with the canonical `check_install_deps` message — waiting can't help. + +/// Poll interval while waiting for an installed dependency to start. +pub(super) const DEP_WAIT_INTERVAL: std::time::Duration = std::time::Duration::from_secs(5); +/// 36 × 5s = 3 minutes of bounded waiting. +pub(super) const DEP_WAIT_MAX_ATTEMPTS: u32 = 36; + +/// Marker error: the install was rejected by the dependency gate BEFORE any +/// resource (container, image, data dir) was created for the package. The +/// async install wrapper (`async_lifecycle.rs`) downcasts to this to remove +/// the optimistic `Installing` state entry instead of leaving a phantom +/// "Stopped" tile whose Start fails with `no such object`. +#[derive(Debug)] +pub(in crate::api::rpc) struct DependencyGateError(pub String); + +impl std::fmt::Display for DependencyGateError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +impl std::error::Error for DependencyGateError {} + +/// One unsatisfied install dependency: a user-facing label plus the container +/// name variants that would satisfy it. +struct MissingDep { + label: &'static str, + containers: &'static [&'static str], +} + +/// Which dependencies `check_install_deps` would reject `package_id` over. +/// Must stay in lockstep with the match arms in `check_install_deps` (the +/// wait loop re-runs `check_install_deps` for the canonical error message). +fn missing_install_deps(package_id: &str, deps: &RunningDeps) -> Vec { + const BITCOIN: MissingDep = MissingDep { + label: "Bitcoin", + containers: BITCOIN_NAMES, + }; + const ELECTRUM: MissingDep = MissingDep { + label: "ElectrumX", + containers: ELECTRUM_NAMES, + }; + let mut missing = Vec::new(); + match package_id { + "electrumx" | "mempool-electrs" | "electrs" | "lnd" | "btcpay-server" | "btcpayserver" => { + if !deps.has_bitcoin { + missing.push(BITCOIN); + } + } + "mempool" | "mempool-web" => { + if !deps.has_bitcoin { + missing.push(BITCOIN); + } + if !deps.has_electrumx { + missing.push(ELECTRUM); + } + } + // fedimint deliberately absent: check_install_deps allows it without + // a local Bitcoin node (remote RPC configured in guardian setup). + _ => {} + } + missing +} + +fn join_dep_labels(missing: &[MissingDep]) -> String { + missing + .iter() + .map(|d| d.label) + .collect::>() + .join(" and ") +} + +/// One snapshot of the dependency world, fed to [`wait_for_install_deps`]. +pub(super) struct DepProbe { + /// Which dependency services are currently Running. + pub running: RunningDeps, + /// Container/package names that EXIST in any state — installed, but + /// possibly not running yet (`podman ps -a` ∪ package-state entries). + pub existing: Vec, +} + +/// All container names known to podman in any state (`podman ps -a`). +/// Conservative on probe failure: returns an empty list, which makes the +/// wait loop fall back to the pre-fix fail-fast behavior. +pub(super) async fn detect_existing_containers() -> Vec { + let out = tokio::time::timeout( + std::time::Duration::from_secs(30), + tokio::process::Command::new("podman") + .args(["ps", "-a", "--format", "{{.Names}}"]) + .output(), + ) + .await; + match out { + Ok(Ok(o)) if o.status.success() => String::from_utf8_lossy(&o.stdout) + .lines() + .map(|l| l.trim().to_string()) + .filter(|l| !l.is_empty()) + .collect(), + _ => Vec::new(), + } +} + +/// Bounded dependency gate. Returns the (satisfied) `RunningDeps` snapshot, +/// or a [`DependencyGateError`]: +/// - immediately, when a missing dependency is not installed at all +/// (canonical `check_install_deps` message), or +/// - after `max_attempts × interval`, when an installed dependency never +/// reached Running. +/// +/// `probe` and `on_waiting` are injected so unit tests can drive the loop +/// without a podman runtime; production wires them to +/// `RpcHandler::dep_probe_for_install` / `set_install_message`. +pub(super) async fn wait_for_install_deps( + package_id: &str, + mut probe: P, + mut on_waiting: L, + max_attempts: u32, + interval: std::time::Duration, +) -> Result +where + P: FnMut() -> PF, + PF: std::future::Future>, + L: FnMut(String) -> LF, + LF: std::future::Future, +{ + let mut waited_attempts = 0u32; + loop { + let DepProbe { running, existing } = probe().await?; + let missing = missing_install_deps(package_id, &running); + if missing.is_empty() { + // Keep behavior in lockstep with the canonical gate (covers any + // future arm added there but not mirrored in missing_install_deps). + check_install_deps(package_id, &running)?; + return Ok(running); + } + + // Fail fast if any missing dependency has no installed container + // under any name variant — waiting cannot satisfy it. + let some_dep_not_installed = missing + .iter() + .any(|dep| !dep.containers.iter().any(|c| existing.iter().any(|e| e == c))); + if some_dep_not_installed { + let msg = match check_install_deps(package_id, &running) { + Err(e) => e.to_string(), + Ok(()) => format!("{package_id} dependencies are not running"), + }; + return Err(anyhow::Error::new(DependencyGateError(msg))); + } + + if waited_attempts >= max_attempts { + let labels = join_dep_labels(&missing); + return Err(anyhow::Error::new(DependencyGateError(format!( + "{labels} is installed but did not reach the running state within \ + {} seconds. Start {labels}, then install {package_id} again.", + u64::from(max_attempts) * interval.as_secs() + )))); + } + waited_attempts += 1; + + let labels = join_dep_labels(&missing); + if waited_attempts == 1 { + info!( + "Install {package_id}: dependency {labels} installed but not running yet — \ + waiting up to {}s for it to start", + u64::from(max_attempts) * interval.as_secs() + ); + } + on_waiting(format!("Waiting for {labels} to start…")).await; + tokio::time::sleep(interval).await; + } +} + /// ElectrumX and Mempool's Electrum backend need historical blocks from an /// unpruned node while building their indexes. A pruned Bitcoin node can be /// running and RPC-reachable but still leave them stuck with closed ports. @@ -625,6 +810,218 @@ mod tests { assert!(!manifest_declares_archival_bitcoin("does-not-exist")); } + mod dep_wait { + use super::super::{wait_for_install_deps, DepProbe, DependencyGateError, RunningDeps}; + use std::sync::atomic::{AtomicU32, Ordering}; + use std::sync::{Arc, Mutex}; + use std::time::Duration; + + fn deps(has_bitcoin: bool, has_electrumx: bool) -> RunningDeps { + RunningDeps { + has_bitcoin, + has_electrumx, + has_lnd: false, + } + } + + fn probe(has_bitcoin: bool, has_electrumx: bool, existing: &[&str]) -> DepProbe { + DepProbe { + running: deps(has_bitcoin, has_electrumx), + existing: existing.iter().map(|s| s.to_string()).collect(), + } + } + + /// Collects "Waiting for X to start…" labels emitted during the wait. + fn label_sink() -> (Arc>>, impl FnMut(String) -> std::future::Ready<()>) + { + let labels = Arc::new(Mutex::new(Vec::new())); + let sink = { + let labels = Arc::clone(&labels); + move |msg: String| { + labels.lock().unwrap().push(msg); + std::future::ready(()) + } + }; + (labels, sink) + } + + #[tokio::test] + async fn passes_immediately_when_dependency_is_running() { + let (labels, sink) = label_sink(); + let result = wait_for_install_deps( + "lnd", + || async { Ok(probe(true, false, &["bitcoin-knots"])) }, + sink, + 3, + Duration::ZERO, + ) + .await; + assert!(result.is_ok()); + assert!(labels.lock().unwrap().is_empty(), "no waiting expected"); + } + + #[tokio::test] + async fn fails_fast_when_dependency_not_installed_at_all() { + let calls = AtomicU32::new(0); + let (labels, sink) = label_sink(); + let err = wait_for_install_deps( + "lnd", + || { + calls.fetch_add(1, Ordering::SeqCst); + async { Ok(probe(false, false, &["uptime-kuma"])) } + }, + sink, + 36, + Duration::ZERO, + ) + .await + .unwrap_err(); + // Single probe — no polling when waiting cannot help. + assert_eq!(calls.load(Ordering::SeqCst), 1); + assert!(labels.lock().unwrap().is_empty()); + // Canonical check_install_deps message, wrapped in the gate marker + // so async_lifecycle removes the optimistic Installing entry. + assert!(err.downcast_ref::().is_some()); + assert!( + err.to_string().contains("LND requires a running Bitcoin node"), + "unexpected message: {err}" + ); + } + + #[tokio::test] + async fn waits_while_installed_dependency_starts_then_passes() { + // Bitcoin container exists (installing/starting) but only reports + // Running from the 3rd probe onward — the log-confirmed LND race. + let calls = Arc::new(AtomicU32::new(0)); + let (labels, sink) = label_sink(); + let probe_calls = Arc::clone(&calls); + let result = wait_for_install_deps( + "lnd", + move || { + let n = probe_calls.fetch_add(1, Ordering::SeqCst); + async move { Ok(probe(n >= 2, false, &["bitcoin-knots"])) } + }, + sink, + 36, + Duration::ZERO, + ) + .await; + assert!(result.is_ok(), "{result:?}"); + assert_eq!(calls.load(Ordering::SeqCst), 3); + let labels = labels.lock().unwrap(); + assert_eq!(labels.len(), 2, "one waiting label per polling attempt"); + assert!(labels.iter().all(|l| l == "Waiting for Bitcoin to start…")); + } + + #[tokio::test] + async fn times_out_when_installed_dependency_never_runs() { + let (labels, sink) = label_sink(); + let err = wait_for_install_deps( + "lnd", + || async { Ok(probe(false, false, &["bitcoin-knots"])) }, + sink, + 4, + Duration::ZERO, + ) + .await + .unwrap_err(); + assert!(err.downcast_ref::().is_some()); + assert!( + err.to_string() + .contains("did not reach the running state within 0 seconds"), + "unexpected message: {err}" + ); + assert_eq!(labels.lock().unwrap().len(), 4); + } + + #[tokio::test] + async fn mempool_waits_on_both_bitcoin_and_electrumx() { + let calls = Arc::new(AtomicU32::new(0)); + let (labels, sink) = label_sink(); + let probe_calls = Arc::clone(&calls); + let result = wait_for_install_deps( + "mempool", + move || { + let n = probe_calls.fetch_add(1, Ordering::SeqCst); + // Bitcoin comes up on probe 2, electrumx on probe 3. + async move { Ok(probe(n >= 1, n >= 2, &["bitcoin-knots", "electrumx"])) } + }, + sink, + 36, + Duration::ZERO, + ) + .await; + assert!(result.is_ok(), "{result:?}"); + let labels = labels.lock().unwrap(); + assert_eq!( + labels.as_slice(), + &[ + "Waiting for Bitcoin and ElectrumX to start…".to_string(), + "Waiting for ElectrumX to start…".to_string(), + ] + ); + } + + #[tokio::test] + async fn mempool_fails_fast_when_one_dep_is_not_installed() { + // Bitcoin is installed (waiting could help) but ElectrumX is not + // installed at all — waiting can never satisfy the gate, so fail + // fast with the canonical message. + let (labels, sink) = label_sink(); + let err = wait_for_install_deps( + "mempool", + || async { Ok(probe(false, false, &["bitcoin-knots"])) }, + sink, + 36, + Duration::ZERO, + ) + .await + .unwrap_err(); + assert!(err.downcast_ref::().is_some()); + assert!(labels.lock().unwrap().is_empty()); + assert!( + err.to_string().contains("Mempool requires"), + "unexpected message: {err}" + ); + } + + #[tokio::test] + async fn variant_container_names_count_as_installed() { + // bitcoin-core (not just bitcoin-knots) satisfies the "installed" + // check for the wait path. + let calls = Arc::new(AtomicU32::new(0)); + let (_labels, sink) = label_sink(); + let probe_calls = Arc::clone(&calls); + let result = wait_for_install_deps( + "electrumx", + move || { + let n = probe_calls.fetch_add(1, Ordering::SeqCst); + async move { Ok(probe(n >= 1, false, &["bitcoin-core"])) } + }, + sink, + 36, + Duration::ZERO, + ) + .await; + assert!(result.is_ok(), "{result:?}"); + } + + #[tokio::test] + async fn apps_without_dependency_gate_pass_untouched() { + let (labels, sink) = label_sink(); + let result = wait_for_install_deps( + "uptime-kuma", + || async { Ok(probe(false, false, &[])) }, + sink, + 36, + Duration::ZERO, + ) + .await; + assert!(result.is_ok()); + assert!(labels.lock().unwrap().is_empty()); + } + } + #[test] fn mempool_api_is_directly_installable_and_covered_by_the_archival_gate() { // `mempool-api` is a legitimate direct `package.install` target diff --git a/core/archipelago/src/api/rpc/package/install.rs b/core/archipelago/src/api/rpc/package/install.rs index d4f506a5..b8b9dc1c 100644 --- a/core/archipelago/src/api/rpc/package/install.rs +++ b/core/archipelago/src/api/rpc/package/install.rs @@ -3,9 +3,10 @@ use super::config::{ is_readonly_compatible, is_valid_docker_image, }; use super::dependencies::{ - check_bitcoin_pruning_compatibility, check_install_deps, configure_fedimint_lnd, + check_bitcoin_pruning_compatibility, configure_fedimint_lnd, detect_existing_containers, detect_running_deps, detect_running_deps_from_package_data, log_optional_dep_info, - needs_archy_net, RunningDeps, + needs_archy_net, wait_for_install_deps, DepProbe, RunningDeps, DEP_WAIT_INTERVAL, + DEP_WAIT_MAX_ATTEMPTS, }; use super::progress::parse_pull_progress; use super::validation::validate_app_id; @@ -265,8 +266,7 @@ impl RpcHandler { .await; if matches!(package_id, "mempool" | "mempool-web") { - let deps = self.running_deps_for_install(package_id).await?; - check_install_deps(package_id, &deps)?; + self.gate_install_deps(package_id).await?; check_bitcoin_pruning_compatibility(package_id).await?; } @@ -289,9 +289,11 @@ impl RpcHandler { // Dependency checks. Prefer the scanner's cached package state so a // congested Podman API does not turn an already-running dependency into // a false install failure. Fall back to a bounded direct Podman probe - // only when the cache does not show the dependency. - let deps = self.running_deps_for_install(package_id).await?; - check_install_deps(package_id, &deps)?; + // only when the cache does not show the dependency. When the dependency + // is installed but not Running yet (the "clicked Install LND 55s before + // Bitcoin was up" race), wait up to ~3 minutes for it instead of + // failing instantly. + let deps = self.gate_install_deps(package_id).await?; check_bitcoin_pruning_compatibility(package_id).await?; log_optional_dep_info(package_id, &deps); let repaired_bitcoin_conf = @@ -945,6 +947,27 @@ impl RpcHandler { } } + /// Bounded dependency gate for installs: passes immediately when deps are + /// running, fails fast (with the phantom-tile marker) when a dependency + /// isn't installed at all, and otherwise waits up to + /// `DEP_WAIT_MAX_ATTEMPTS × DEP_WAIT_INTERVAL` for an installed-but- + /// starting dependency, surfacing "Waiting for X to start…" on the card. + pub(super) async fn gate_install_deps(&self, package_id: &str) -> Result { + wait_for_install_deps( + package_id, + || async { + Ok(DepProbe { + running: self.running_deps_for_install(package_id).await?, + existing: detect_existing_containers().await, + }) + }, + |msg| async move { self.set_install_message(package_id, &msg).await }, + DEP_WAIT_MAX_ATTEMPTS, + DEP_WAIT_INTERVAL, + ) + .await + } + // -- Private helpers for install -- /// Pull the image from a registry or verify a local image exists. @@ -1295,6 +1318,11 @@ impl RpcHandler { // Default to full archive — operators with 2TB+ drives shouldn't be // silently pruned down to 550 MB. Users who want a pruned node can // set `prune=N` in bitcoin.conf themselves after install. + // + // printtoconsole=0: bitcoind already writes debug.log in the datadir + // (self-shrunk on restart); duplicating it to stdout pushed every IBD + // "UpdateTip" line through conmon into journald (>1 GB/day). Deep + // debugging uses /var/lib/archipelago/bitcoin/debug.log. let bitcoin_conf = format!( "\ # rpcauth: salted hash only - no plaintext password in config or CLI\n\ @@ -1304,7 +1332,7 @@ rpcallowip=0.0.0.0/0\n\ listen=1\n\ rpcthreads=16\n\ rpcworkqueue=256\n\ -printtoconsole=1\n", +printtoconsole=0\n", rpcauth_line ); tokio::fs::create_dir_all(bitcoin_dir) diff --git a/core/archipelago/src/api/rpc/package/progress.rs b/core/archipelago/src/api/rpc/package/progress.rs index 671651f3..47767e5a 100644 --- a/core/archipelago/src/api/rpc/package/progress.rs +++ b/core/archipelago/src/api/rpc/package/progress.rs @@ -61,6 +61,31 @@ impl RpcHandler { self.state_manager.update_data(data).await; } + /// Set a user-facing install status message (e.g. "Waiting for Bitcoin + /// to start…") without disturbing the current phase/byte counters. + pub(super) async fn set_install_message(&self, package_id: &str, message: &str) { + let (mut data, _rev) = self.state_manager.get_snapshot().await; + let entry = data + .package_data + .entry(package_id.to_string()) + .or_insert_with(|| create_installing_entry(package_id)); + if entry.state != PackageState::Updating { + entry.state = PackageState::Installing; + } + let (size, downloaded, phase) = entry + .install_progress + .as_ref() + .map(|p| (p.size, p.downloaded, p.phase)) + .unwrap_or((0, 0, None)); + entry.install_progress = Some(InstallProgress { + size, + downloaded, + phase, + message: Some(message.to_string()), + }); + self.state_manager.update_data(data).await; + } + /// Clear install progress after pull completes or fails. pub(super) async fn clear_install_progress(&self, package_id: &str) { let (mut data, _rev) = self.state_manager.get_snapshot().await; diff --git a/core/archipelago/src/api/rpc/package/stacks.rs b/core/archipelago/src/api/rpc/package/stacks.rs index 75974076..505f8700 100644 --- a/core/archipelago/src/api/rpc/package/stacks.rs +++ b/core/archipelago/src/api/rpc/package/stacks.rs @@ -1009,9 +1009,9 @@ impl RpcHandler { return Ok(adopted); } - // Dependency check: Bitcoin must be running - let deps = super::dependencies::detect_running_deps().await?; - super::dependencies::check_install_deps("btcpay-server", &deps)?; + // Dependency check: Bitcoin must be running. Bounded wait covers the + // "installed but still starting" race instead of failing instantly. + self.gate_install_deps("btcpay-server").await?; install_log("INSTALL START: btcpay-server (stack: postgres + nbxplorer + btcpay)").await; diff --git a/core/archipelago/src/api/rpc/seed_rpc.rs b/core/archipelago/src/api/rpc/seed_rpc.rs index 13c6870d..1a5dc52e 100644 --- a/core/archipelago/src/api/rpc/seed_rpc.rs +++ b/core/archipelago/src/api/rpc/seed_rpc.rs @@ -26,6 +26,36 @@ impl Drop for OnboardingMnemonicState { const MNEMONIC_TTL: std::time::Duration = std::time::Duration::from_secs(600); // 10 minutes +/// Persist the pending onboarding mnemonic as `identity/master_seed.enc`, +/// encrypted with `passphrase`. Called from `auth.setup` — the first moment a +/// user password exists — so "Reveal recovery phrase" works after onboarding +/// without the frontend having to remember a separate save step (it never +/// did, which left every onboarded node with no encrypted seed backup). +/// +/// Deliberately ignores MNEMONIC_TTL: the mnemonic stays in memory until +/// overwritten regardless, so using it here widens nothing, and onboarding +/// legitimately takes longer than 10 minutes when the user carefully writes +/// down 24 words. Clears the in-memory copy on success — password setup is +/// the end of onboarding, so the plaintext no longer needs to linger. +/// +/// Returns Ok(true) if a seed was saved, Ok(false) if none was pending. +pub(in crate::api::rpc) async fn save_pending_seed_encrypted( + data_dir: &std::path::Path, + passphrase: &str, +) -> Result { + let mut state = ONBOARDING_MNEMONIC.lock().await; + let Some(pending) = state.as_ref() else { + return Ok(false); + }; + let mnemonic: bip39::Mnemonic = pending + .words + .parse() + .context("Invalid mnemonic in memory")?; + crate::seed::save_seed_encrypted(data_dir, &mnemonic, passphrase).await?; + *state = None; + Ok(true) +} + /// Best-effort: install fips.yaml + start archipelago-fips.service after the /// seed onboarding has written the fips_key to disk. Runs in a detached task /// so the user-facing RPC returns immediately — the systemctl calls can take @@ -208,6 +238,17 @@ impl RpcHandler { let phrase = words.join(" "); let (_mnemonic, seed) = crate::seed::MasterSeed::from_mnemonic_words(&phrase)?; + // Stash the restored words like seed.generate does, so auth.setup can + // persist the encrypted backup once the user's password exists and + // "Reveal recovery phrase" works on restored nodes too. + { + let mut state = ONBOARDING_MNEMONIC.lock().await; + *state = Some(OnboardingMnemonicState { + words: phrase.clone(), + created_at: std::time::Instant::now(), + }); + } + // Derive and write node Ed25519 key. let identity_dir = self.config.data_dir.join("identity"); crate::identity::NodeIdentity::from_seed(&identity_dir, &seed).await?; diff --git a/core/archipelago/src/bitcoin_status.rs b/core/archipelago/src/bitcoin_status.rs index 4107b7bd..f53ae4a4 100644 --- a/core/archipelago/src/bitcoin_status.rs +++ b/core/archipelago/src/bitcoin_status.rs @@ -101,19 +101,45 @@ fn friendly_transient_error(has_cached_state: bool, err_msg: &str) -> String { .trim_end_matches('.'); let lower = detail.to_lowercase(); let state = if lower.contains("verifying blocks") { - "verifying blocks after restart" + Some("verifying blocks after restart") + } else if lower.contains("connection reset") { + Some("starting up and not yet accepting RPC connections") } else if lower.contains("connection refused") || lower.contains("tcp connect error") { - "waiting for the Bitcoin RPC listener" + Some("waiting for the Bitcoin RPC listener") } else if lower.contains("timed out") || lower.contains("timeout") { - "busy and not answering RPC before the timeout" + Some("busy and not answering RPC before the timeout") } else { - "starting or busy syncing" + None }; - if has_cached_state { - format!("Bitcoin node is {state}; showing last known state and retrying. Detail: {detail}") + // Recognized transient causes get a clean human sentence only — the raw + // transport error (URLs, repeated "os error 104" chains) is operator + // noise that was ending up verbatim on the app card. Unrecognized errors + // keep a bounded detail so a genuinely new failure stays diagnosable. + let (state, detail) = match state { + Some(state) => (state, None), + None => ( + "starting or busy syncing", + Some(if detail.len() > 120 { + let mut cut = 120; + while !detail.is_char_boundary(cut) { + cut -= 1; + } + format!("{}…", &detail[..cut]) + } else { + detail.to_string() + }), + ), + }; + + let base = if has_cached_state { + format!("Bitcoin node is {state}; showing last known state and retrying.") } else { - format!("Bitcoin node is {state}; retrying automatically. Detail: {detail}") + format!("Bitcoin node is {state}; retrying automatically.") + }; + match detail { + Some(detail) => format!("{base} Detail: {detail}"), + None => base, } } @@ -278,4 +304,39 @@ mod tests { assert!(msg.contains("busy and not answering RPC before the timeout")); } + + #[test] + fn connection_reset_gets_clean_message_without_raw_detail() { + // The exact string a fresh install showed on the app card: the raw + // reqwest chain (URL + repeated "os error 104") must not surface. + let msg = friendly_transient_error( + false, + "getblockchaininfo: Bitcoin RPC request failed: error sending request for url (http://127.0.0.1:8332/): connection error: Connection reset by peer (os error 104): connection error: Connection reset by peer (os error 104): Connection reset by peer (os error 104)", + ); + + assert!(msg.contains("starting up and not yet accepting RPC connections")); + assert!(!msg.contains("os error")); + assert!(!msg.contains("127.0.0.1")); + assert!(!msg.contains("Detail:")); + } + + #[test] + fn recognized_causes_omit_detail_entirely() { + for raw in [ + "x: Connection refused (os error 111)", + "x: operation timed out", + r#"x: {"error":{"code":-28,"message":"Verifying blocks..."}}"#, + ] { + let msg = friendly_transient_error(false, raw); + assert!(!msg.contains("Detail:"), "leaked detail for: {raw}"); + } + } + + #[test] + fn unknown_errors_keep_bounded_detail() { + let long = format!("weird new failure {}", "x".repeat(300)); + let msg = friendly_transient_error(false, &long); + assert!(msg.contains("Detail: weird new failure")); + assert!(msg.len() < 260); + } } diff --git a/core/archipelago/src/bootstrap.rs b/core/archipelago/src/bootstrap.rs index e42076d3..caa8f08d 100644 --- a/core/archipelago/src/bootstrap.rs +++ b/core/archipelago/src/bootstrap.rs @@ -39,6 +39,16 @@ const KIOSK_LAUNCHER: &str = const KIOSK_SERVICE_PATH: &str = "/etc/systemd/system/archipelago-kiosk.service"; const KIOSK_LAUNCHER_PATH: &str = "/usr/local/bin/archipelago-kiosk-launcher"; +// Journald log-volume policy (size cap + per-service rate limit). Fresh ISOs +// write the identical file at build time (image-recipe/_archived/ +// build-auto-installer-iso.sh); this heals already-deployed nodes via OTA. +// A fresh node produced >1 GB/day of journal (bitcoind IBD console spam plus +// debug-level backend logging) — the cap bounds disk use and the rate limit +// keeps one chatty service from drowning everything else. +const JOURNALD_DROPIN: &str = + include_str!("../../../image-recipe/configs/journald-archipelago.conf"); +const JOURNALD_DROPIN_PATH: &str = "/etc/systemd/journald.conf.d/10-archipelago-persistent.conf"; + const NGINX_CONF_PATH: &str = "/etc/nginx/sites-available/archipelago"; const NGINX_ENABLED_CONF_PATH: &str = "/etc/nginx/sites-enabled/archipelago"; /// Per-app proxy snippet included by the HTTPS (:443) server block. Carries its @@ -120,6 +130,11 @@ pub async fn ensure_doctor_installed() { Ok(false) => debug!("Bitcoin RPC bind settings already usable"), Err(e) => warn!("Bitcoin RPC repair failed (non-fatal): {:#}", e), } + match run_journald_dropin().await { + Ok(true) => info!("Installed journald log-volume policy drop-in"), + Ok(false) => debug!("journald log-volume policy already in place"), + Err(e) => warn!("journald drop-in bootstrap failed (non-fatal): {:#}", e), + } match tighten_secrets_dir().await { Ok(n) if n > 0 => info!(tightened = n, "Tightened mode on secret files"), Ok(_) => debug!("Secrets directory already at expected mode"), @@ -408,6 +423,14 @@ ensure_line() { ensure_line server=1 ensure_line rpcallowip=0.0.0.0/0 ensure_line listen=1 +# Log-volume fix: printtoconsole=1 duplicated every log line (incl. per-block +# IBD "UpdateTip" spam) into journald via conmon on top of the datadir +# debug.log bitcoind already writes. Console off; debug.log stays (bitcoind +# self-shrinks it on restart). +if grep -q '^printtoconsole=1' "$conf"; then + sed -i 's/^printtoconsole=1$/printtoconsole=0/' "$conf" + changed=1 +fi [ "$changed" -eq 0 ] && exit 0 exit 2 "#; @@ -428,6 +451,44 @@ exit 2 } } +/// Install the journald log-volume policy drop-in (JOURNALD_DROPIN) so nodes +/// deployed before the ISO shipped it get the size cap + rate limit via OTA. +/// Idempotent; restarts journald only when the file actually changed (safe: +/// the sockets are held by pid1, so at most a few messages queue briefly). +async fn run_journald_dropin() -> Result { + // Same dev-box guards as the doctor bootstrap: never touch /etc on + // contributors' laptops (symlinked or absent /home/archipelago/archy). + let home_archy = Path::new("/home/archipelago/archy"); + if fs::symlink_metadata(home_archy) + .await + .map(|m| m.file_type().is_symlink()) + .unwrap_or(false) + { + debug!("/home/archipelago/archy is a symlink — skipping journald bootstrap (dev box)"); + return Ok(false); + } + if fs::metadata(home_archy).await.is_err() { + debug!("/home/archipelago/archy missing — skipping journald bootstrap"); + return Ok(false); + } + + let dropin_dir = "/etc/systemd/journald.conf.d"; + let status = host_sudo(&["mkdir", "-p", dropin_dir]) + .await + .with_context(|| format!("mkdir {}", dropin_dir))?; + if !status.success() { + anyhow::bail!("mkdir {} exited with {}", dropin_dir, status); + } + + let changed = write_root_if_needed(JOURNALD_DROPIN_PATH, JOURNALD_DROPIN).await?; + if changed { + if let Err(e) = host_sudo(&["systemctl", "restart", "systemd-journald"]).await { + warn!("journald restart after drop-in update failed: {:#}", e); + } + } + Ok(changed) +} + async fn run() -> Result { // Dev-box guard: on contributors' laptops `/home/archipelago/archy` is // typically a symlink into the git checkout, and writing through it diff --git a/core/archipelago/src/container/lnd.rs b/core/archipelago/src/container/lnd.rs index a7ce21f9..df9ef5f0 100644 --- a/core/archipelago/src/container/lnd.rs +++ b/core/archipelago/src/container/lnd.rs @@ -43,7 +43,11 @@ pub enum EnsureOutcome { Unchanged, } -pub async fn ensure_config(paths: &EnsurePaths, rpc_pass: &str) -> Result { +pub async fn ensure_config( + paths: &EnsurePaths, + rpc_pass: &str, + bitcoin_host: &str, +) -> Result { fs::create_dir_all(&paths.data_dir) .await .with_context(|| format!("creating {}", paths.data_dir.display()))?; @@ -52,7 +56,7 @@ pub async fn ensure_config(paths: &EnsurePaths, rpc_pass: &str) -> Result String { s.replace('\'', "'\\''") } -fn has_required_lnd_flags(conf: &str, rpc_pass: &str) -> bool { +fn has_required_lnd_flags(conf: &str, rpc_pass: &str, bitcoin_host: &str) -> bool { let rpc_pass_line = format!("bitcoind.rpcpass={rpc_pass}"); + let rpc_host_line = format!("bitcoind.rpchost={bitcoin_host}:8332"); [ "bitcoin.active=true", "bitcoin.mainnet=true", "bitcoin.node=bitcoind", - "bitcoind.rpchost=bitcoin-knots:8332", + rpc_host_line.as_str(), rpc_pass_line.as_str(), ] .iter() @@ -678,7 +682,7 @@ mod tests { conf_path: tmp.path().join("lnd/lnd.conf"), }; - let out = ensure_config(&paths, "secret").await.unwrap(); + let out = ensure_config(&paths, "secret", "bitcoin-knots").await.unwrap(); assert_eq!(out, EnsureOutcome::Written); let conf = fs::read_to_string(&paths.conf_path).await.unwrap(); assert!(conf.contains("bitcoin.active=true")); @@ -697,17 +701,46 @@ mod tests { }; assert_eq!( - ensure_config(&paths, "first").await.unwrap(), + ensure_config(&paths, "first", "bitcoin-knots").await.unwrap(), EnsureOutcome::Written ); assert_eq!( - ensure_config(&paths, "second").await.unwrap(), + ensure_config(&paths, "second", "bitcoin-knots").await.unwrap(), EnsureOutcome::Written ); let conf = fs::read_to_string(&paths.conf_path).await.unwrap(); assert!(conf.contains("bitcoind.rpcpass=second")); } + #[tokio::test] + async fn ensure_config_repairs_bitcoin_host_drift() { + // A conf written against bitcoin-knots must be rewritten when the + // node's Bitcoin variant is bitcoin-core, or LND dials a hostname + // that doesn't exist on archy-net and dies on startup. + let tmp = tempfile::TempDir::new().unwrap(); + let paths = EnsurePaths { + data_dir: tmp.path().join("lnd"), + conf_path: tmp.path().join("lnd/lnd.conf"), + }; + + assert_eq!( + ensure_config(&paths, "pw", "bitcoin-knots").await.unwrap(), + EnsureOutcome::Written + ); + assert_eq!( + ensure_config(&paths, "pw", "bitcoin-core").await.unwrap(), + EnsureOutcome::Written + ); + let conf = fs::read_to_string(&paths.conf_path).await.unwrap(); + assert!(conf.contains("bitcoind.rpchost=bitcoin-core:8332")); + assert!(!conf.contains("bitcoind.rpchost=bitcoin-knots:8332")); + + assert_eq!( + ensure_config(&paths, "pw", "bitcoin-core").await.unwrap(), + EnsureOutcome::Unchanged + ); + } + #[tokio::test] async fn ensure_config_repairs_incomplete_existing_config() { let tmp = tempfile::TempDir::new().unwrap(); @@ -721,7 +754,7 @@ mod tests { .unwrap(); assert_eq!( - ensure_config(&paths, "repaired").await.unwrap(), + ensure_config(&paths, "repaired", "bitcoin-knots").await.unwrap(), EnsureOutcome::Written ); let conf = fs::read_to_string(&paths.conf_path).await.unwrap(); diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 43c283d1..5825afa6 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -1368,6 +1368,7 @@ impl ProdContainerOrchestrator { .list_containers() .await .context("list_containers during adoption")?; + let user_stopped = crate::crash_recovery::load_user_stopped(&self.data_dir).await; let state = self.state.read().await; let mut report = AdoptionReport::default(); for (app_id, lm) in state.manifests.iter() { @@ -1377,6 +1378,21 @@ impl ProdContainerOrchestrator { .any(|c| c.name == expected || c.name == format!("/{expected}")) { report.adopted.push(app_id.clone()); + // Adopted apps will be (re)started by boot recovery, the first + // reconcile pass, or the doctor — whichever reaches them first + // can be minutes away. Register them as pending boot-starts now + // so the scanner shows "Restarting" (not "Stopped") from the + // very first post-boot scan. Cleared per-app by the first + // reconcile pass, so a genuinely failed start surfaces. + if !state.disabled.contains(app_id) + && !user_stopped.contains(app_id) + && !user_stopped.contains(&expected) + { + crate::crash_recovery::pending_boot_starts_add([ + app_id.clone(), + expected.clone(), + ]); + } } } Ok(report) @@ -1425,8 +1441,19 @@ impl ProdContainerOrchestrator { }; let mut report = ReconcileReport::default(); let disk_gb = self.disk_gb(); + // Register every candidate before the (sequential, possibly slow) + // pass so the scanner overlays queued-but-down apps as Restarting + // instead of Stopped. Each app is deregistered as its turn finishes, + // so a start that genuinely failed shows its real state again. + crate::crash_recovery::pending_boot_starts_add(manifests.iter().flat_map(|lm| { + [ + lm.manifest.app.id.clone(), + compute_container_name(&lm.manifest), + ] + })); for lm in manifests { let app_id = lm.manifest.app.id.clone(); + let container_name = compute_container_name(&lm.manifest); if mode == ReconcileMode::ExistingOnly && requires_archival_bitcoin(&app_id) && disk_gb < ARCHIVAL_BITCOIN_DISK_GB @@ -1435,6 +1462,8 @@ impl ProdContainerOrchestrator { &app_id, ReconcileAction::Left("requires-archival-bitcoin".into()), ); + crate::crash_recovery::pending_boot_start_done(&app_id); + crate::crash_recovery::pending_boot_start_done(&container_name); continue; } match self.ensure_running_with_mode(&lm, mode).await { @@ -2559,7 +2588,8 @@ impl ProdContainerOrchestrator { } .read("bitcoin-rpc-password") .context("lnd pre-start: read bitcoin RPC password")?; - let outcome = lnd::ensure_config(&self.lnd_paths, &rpc_pass) + let bitcoin_host = self.bitcoin_host(); + let outcome = lnd::ensure_config(&self.lnd_paths, &rpc_pass, &bitcoin_host) .await .context("lnd pre-start: ensure lnd.conf")?; Ok(Some(match outcome { @@ -2571,6 +2601,30 @@ impl ProdContainerOrchestrator { self.ensure_btcpay_stack_dirs().await?; Ok(Some(HookOutcome::Unchanged)) } + "fedimint-clientd" => { + // First-boot (root context) created /var/lib/archipelago/fmcd + // as root:root, but the rootless container's uid 0 maps to + // host 1000 — fmcd then crash-loops with "Permission denied + // (os error 13)". Repair ownership on every start so nodes + // installed before the first-boot fix self-heal too. (The + // generic running-container ownership sweep can't catch this: + // fmcd exits within seconds, so it's never Running when the + // sweep probes.) + let dir = "/var/lib/archipelago/fmcd"; + let mkdir = host_sudo(&["mkdir", "-p", dir]) + .await + .with_context(|| format!("mkdir {dir}"))?; + if !mkdir.success() { + return Err(anyhow::anyhow!("mkdir -p {dir} failed with status {mkdir}")); + } + let chown = host_sudo(&["chown", "-R", "1000:1000", dir]) + .await + .with_context(|| format!("chown {dir}"))?; + if !chown.success() { + return Err(anyhow::anyhow!("chown {dir} failed with status {chown}")); + } + Ok(Some(HookOutcome::Unchanged)) + } "grafana" => { self.cleanup_stale_grafana_port().await; Ok(Some(HookOutcome::Unchanged)) diff --git a/core/archipelago/src/crash_recovery.rs b/core/archipelago/src/crash_recovery.rs index 90d8fda4..e65cce43 100644 --- a/core/archipelago/src/crash_recovery.rs +++ b/core/archipelago/src/crash_recovery.rs @@ -49,6 +49,46 @@ pub fn is_recovery_complete() -> bool { RECOVERY_COMPLETE.load(Ordering::SeqCst) } +// ── Pending boot-start tracking ───────────────────────────────────────── +// Containers that boot recovery / the reconciler is about to start (or is +// starting right now). The package scanner overlays these as `Restarting` +// instead of the raw podman `Stopped`/`Exited`, so a freshly rebooted node +// doesn't tell the user their apps are "Stopped" while the sequential +// recovery pass (3s stagger + up to minutes for heavyweights like bitcoin) +// is still working through the queue. Writers register names when a pass +// begins and remove each name once its start attempt finishes, whatever +// the outcome — a container that truly failed goes back to showing its +// real state on the next scan. + +static PENDING_BOOT_STARTS: std::sync::LazyLock>> = + std::sync::LazyLock::new(|| std::sync::RwLock::new(std::collections::HashSet::new())); + +/// Register container/app names an active recovery or reconcile pass +/// intends to start. +pub fn pending_boot_starts_add>(names: I) { + if let Ok(mut set) = PENDING_BOOT_STARTS.write() { + set.extend(names); + } +} + +/// A start attempt for `name` finished (success or failure) — stop +/// overlaying it. +pub fn pending_boot_start_done(name: &str) { + if let Ok(mut set) = PENDING_BOOT_STARTS.write() { + set.remove(name); + } +} + +/// Whether `name` (a container name or scanner app id) is queued for a +/// boot/reconcile start. Container names may carry an `archy-` prefix the +/// scanner strips when deriving app ids, so check both forms. +pub fn is_pending_boot_start(name: &str) -> bool { + let Ok(set) = PENDING_BOOT_STARTS.read() else { + return false; + }; + set.contains(name) || set.contains(&format!("archy-{name}")) +} + // ── User-stopped tracking ─────────────────────────────────────────────── // When a user explicitly stops a container via the UI, we record it here // so crash recovery and health monitor don't auto-restart it. @@ -178,10 +218,17 @@ pub async fn check_for_crash(data_dir: &Path) -> Result() { - if is_process_running(pid) { + if pid != std::process::id() + && is_process_running(pid) + && process_is_archipelago(pid) + { warn!( "Previous process (PID {}) is still running — not a crash, skipping recovery", pid @@ -311,6 +358,8 @@ pub async fn recover_containers(containers: &[RunningContainerRecord]) -> Recove failed: Vec::new(), }; + pending_boot_starts_add(containers.iter().map(|r| r.name.clone())); + for (i, record) in containers.iter().enumerate() { info!( "Recovering container: {} (image: {})", @@ -373,6 +422,7 @@ pub async fn recover_containers(containers: &[RunningContainerRecord]) -> Recove if !started { report.failed.push(record.name.clone()); } + pending_boot_start_done(&record.name); } report @@ -391,6 +441,16 @@ fn is_process_running(pid: u32) -> bool { std::path::Path::new(&format!("/proc/{}", pid)).exists() } +/// Whether the process at `pid` looks like an archipelago instance. Used to +/// tell "the previous instance is genuinely still alive" apart from PID +/// reuse by an unrelated process after a reboot. +fn process_is_archipelago(pid: u32) -> bool { + match std::fs::read(format!("/proc/{pid}/cmdline")) { + Ok(cmdline) => String::from_utf8_lossy(&cmdline).contains("archipelago"), + Err(_) => false, + } +} + /// Start all stopped containers that were previously installed. /// Runs on every startup to ensure containers come back after clean reboots. /// The crash recovery (PID-based) handles dirty shutdowns; this handles clean ones. @@ -425,16 +485,34 @@ async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport { ); repair_stack_network_aliases(stack).await; + // Register the whole stack up front: the per-member dependency waits + // below can take minutes, and the UI should say "Restarting", not + // "Stopped", for members still queued behind them. + pending_boot_starts_add( + stack + .containers + .iter() + .filter(|c| !user_stopped.contains(**c)) + .map(|c| (*c).to_string()), + ); + for container in stack.containers { if user_stopped.contains(*container) { info!("Skipping user-stopped container: {}", container); continue; } - match container_state(container).await { - Some(state) if state == "running" => continue, + let state = container_state(container).await; + match state { + Some(state) if state == "running" => { + pending_boot_start_done(container); + continue; + } Some(_) => {} - None => continue, + None => { + pending_boot_start_done(container); + continue; + } } repair_stack_network_aliases(stack).await; @@ -446,6 +524,7 @@ async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport { } else { report.failed.push((*container).to_string()); } + pending_boot_start_done(container); } } diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index f937d058..23805fb9 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -98,11 +98,15 @@ async fn main() -> Result<()> { let startup_start = std::time::Instant::now(); crash_recovery::init_start_time(); - // Initialize tracing + // Initialize tracing. Default to `info`: production units don't set + // RUST_LOG, and the old `archipelago=debug` default flooded journald + // with per-request debug lines ("RPC method: …", cookie-flag notes) — + // part of a >1 GB/day journal on a fresh node. Set RUST_LOG (e.g. + // RUST_LOG=archipelago=debug) to get debug logs back when debugging. tracing_subscriber::fmt() .with_env_filter( tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| "archipelago=debug,info".into()), + .unwrap_or_else(|_| "info".into()), ) .init(); @@ -149,13 +153,18 @@ async fn main() -> Result<()> { ); } - // Write PID marker early so we can detect crashes on next startup + // Check for a crash marker BEFORE writing our own. The old order wrote + // the marker first, so the check always read the CURRENT process's PID, + // found it alive, and skipped recovery — on every boot, forever. + let crash_containers = crash_recovery::check_for_crash(&config.data_dir).await; + + // Now mark this instance as running so the next startup can detect a crash. crash_recovery::write_pid_marker(&config.data_dir).await?; // Run crash recovery before starting the manifest reconciler. Both paths // mutate Podman; running them concurrently can corrupt transient runtime // state and leave netavark/conmon unable to start containers. - match crash_recovery::check_for_crash(&config.data_dir).await { + match crash_containers { Ok(Some(containers)) => { info!( "🔧 Recovering {} containers from previous crash...", diff --git a/core/archipelago/src/server.rs b/core/archipelago/src/server.rs index b5f6de3c..093557c4 100644 --- a/core/archipelago/src/server.rs +++ b/core/archipelago/src/server.rs @@ -1203,6 +1203,21 @@ fn merge_preserving_transitional( } } +/// Package ids whose `Restarting` state was written by the scanner's +/// pending-boot-start overlay (not by an RPC restart task). For these, the +/// scan is the owner: once podman reports a settled state and the id is no +/// longer queued for a boot start, the fresh state wins immediately instead +/// of being preserved for the transitional-stuck timeout. +static SCANNER_RESTARTING: std::sync::LazyLock>> = + std::sync::LazyLock::new(|| std::sync::Mutex::new(std::collections::HashSet::new())); + +fn take_scanner_restarting(id: &str) -> bool { + SCANNER_RESTARTING + .lock() + .map(|mut set| set.remove(id)) + .unwrap_or(false) +} + fn is_podman_scan_timeout(error: &anyhow::Error) -> bool { let msg = format!("{:#}", error); msg.contains("podman ps") && msg.contains("timed out") @@ -1223,6 +1238,25 @@ async fn scan_and_update_packages( pkg.state = crate::data_model::PackageState::Stopped; pkg.exit_code = None; } + // A down container that boot recovery / the reconciler is queued to + // start is "Restarting", not "Stopped" — after a reboot the sequential + // recovery pass can take minutes to reach heavyweights, and telling + // the user their app stopped when it's about to come back is wrong. + // Ids overlaid here are recorded in SCANNER_RESTARTING so the merge + // below knows this Restarting is scanner-authored (resolve it as soon + // as podman reports a settled state) and not owned by an RPC restart + // task (whose transitional state must be preserved). + if matches!( + pkg.state, + crate::data_model::PackageState::Stopped | crate::data_model::PackageState::Exited + ) && crate::crash_recovery::is_pending_boot_start(id) + { + pkg.state = crate::data_model::PackageState::Restarting; + pkg.exit_code = None; + if let Ok(mut set) = SCANNER_RESTARTING.lock() { + set.insert(id.clone()); + } + } } normalize_reachable_package_health(&mut packages).await; @@ -1273,6 +1307,19 @@ async fn scan_and_update_packages( absence_tracker.remove(id); let existing = merged.get(id); let overwrite = match existing { + // Scanner-authored Restarting (the pending-boot-start overlay) + // resolves as soon as the fresh scan reports anything else: the + // scan is its owner — no RPC task will ever write a final state + // back. Without this, a successfully recovered container would + // sit wedged in "Restarting" until the 20-minute stuck timeout. + Some(existing_entry) + if existing_entry.state == crate::data_model::PackageState::Restarting + && pkg.state != crate::data_model::PackageState::Restarting + && take_scanner_restarting(id) => + { + transitional_since.remove(id); + true + } Some(existing_entry) if is_transitional(&existing_entry.state) => { let entered = *transitional_since.entry(id.clone()).or_insert(now); let timeout = transitional_stuck_timeout(&existing_entry.state); diff --git a/docs/HANDOVER-2026-07-02-iso-feedback.md b/docs/HANDOVER-2026-07-02-iso-feedback.md new file mode 100644 index 00000000..ca259937 --- /dev/null +++ b/docs/HANDOVER-2026-07-02-iso-feedback.md @@ -0,0 +1,142 @@ +# Handover — fresh-ISO feedback bug-bash (2026-07-02) + +**For: the agent building the next ISO + fleet deploy.** All fixes below are +**uncommitted in this working tree** (per the user's flow: you audit, build the +ISO, deploy). Source feedback: user's fresh ISO install on a Framework +(11th-gen Tiger Lake) machine, node `192.168.1.81` (SSH `archipelago` / +`archipelago`). Diagnostic bundle: `/home/archipelago/incoming-logs/node-logs-192.168.1.81/`. + +## ⚠️ Outstanding user request for the deploy + +- **Change .81's web-UI password to `ThisIsWeb54321@`** — the user forgot the + current one. Node was unreachable from .116 during this session (flaky WiFi + AP, IP flapped .68↔.81). Do this during deploy (SSH works from the user's + machine; `archipelago`/`archipelago`). + +## What changed (by file) + +### Backend (core/archipelago/src) — builds clean, targeted tests pass +- `api/handler/websocket.rs` — **subscribe BEFORE initial snapshot** (the + "everything needs ctrl-r" root cause: broadcasts in the snapshot→subscribe + gap were silently lost; a stale client never learned containers-scanned). +- `main.rs` — crash check now runs BEFORE writing the PID marker (**crash + recovery had never run on any node** — it always saw its own PID and + skipped); tracing default demoted debug→info (journal volume). +- `crash_recovery.rs` — PID-reuse guard (`process_is_archipelago`); new + **pending-boot-starts registry** (names queued for recovery/reconcile) with + writers in `recover_containers` + stack recovery. +- `server.rs` — scanner overlays Stopped/Exited → **Restarting** for + pending-boot-start ids (user ask: "status should be restarting if they are + being restarted"); `SCANNER_RESTARTING` ownership set so scanner-authored + Restarting resolves immediately instead of wedging in the 20-min + transitional-preserve. +- `container/prod_orchestrator.rs` — reconcile pass + `adopt_existing` + register/deregister pending boot-starts; LND pre-start hook passes detected + `bitcoin_host()` (Knots vs Core) into `lnd::ensure_config`; new + `fedimint-clientd` pre-start hook (mkdir + chown 1000:1000 of + `/var/lib/archipelago/fmcd` — self-heals the crash-loop). +- `container/lnd.rs` — `ensure_config(paths, rpc_pass, bitcoin_host)`; + bitcoind.rpchost no longer hardcoded `bitcoin-knots`; drift check rewrites + host changes; +unit test `ensure_config_repairs_bitcoin_host_drift`. +- `api/rpc/package/dependencies.rs` — bounded **dependency wait** + (`wait_for_install_deps`, 36×5s): installed-but-starting deps wait with + "Waiting for Bitcoin to start…" on the card; not-installed deps fail fast + with `DependencyGateError` marker; +5 unit tests. +- `api/rpc/package/install.rs`, `stacks.rs` — call sites wired to + `gate_install_deps` (lnd/electrumx/mempool/btcpay). +- `api/rpc/package/async_lifecycle.rs` — `DependencyGateError` removes the + optimistic entry (**no more phantom "Stopped" LND tile**) + pushes an Error + notification with the reason. +- `api/rpc/package/progress.rs` — `set_install_message` helper. +- `api/rpc/seed_rpc.rs` — `save_pending_seed_encrypted`; seed.restore also + stashes the mnemonic; `auth.rs` — **auth.setup persists the encrypted seed + backup** (recovery-phrase reveal previously failed on EVERY node because + nothing ever wrote `master_seed.enc`). +- `api/rpc/middleware.rs` — sanitizer allowlist extended (seed/2FA/auth + errors reach the user instead of "Check server logs"); +2 tests. +- `bitcoin_status.rs` — friendly status for "connection reset" (bitcoind + starting); raw URL/os-error chains no longer shown; +3 tests. +- `bootstrap.rs` — journald drop-in self-heal (OTA nodes get log caps); + bitcoin.conf printtoconsole heal. (Log-spam agent's work; verified.) +- `api/rpc/package/config.rs` — bitcoin args `-printtoconsole=0`. + +### Manifests / scripts / configs +- `apps/lnd/manifest.yml` — BITCOIND_HOST now `derived_env {{BITCOIN_HOST}}`. +- `apps/bitcoin-knots/manifest.yml`, `apps/bitcoin-core/manifest.yml` — + `-printtoconsole=0` (90.6% of the journal was IBD UpdateTip spam; + debug.log in the datadir keeps full logs). +- `scripts/first-boot-containers.sh` — chown 1000:1000 of + `/var/lib/archipelago/fmcd` in BOTH fmcd blocks (root-owned dir was the + fedimint-clientd "Permission denied os error 13" crash-loop); + printtoconsole=0. +- `scripts/container-doctor.sh`, `scripts/reconcile-containers.sh` — + printtoconsole=0. +- `image-recipe/configs/journald-archipelago.conf` (NEW) — SystemMaxUse=500M, + rate limits; baked by ISO builder + bootstrap self-heal. +- `image-recipe/configs/nginx-archipelago.conf` — `/assets/` 404s no longer + cacheable (the `always` immutable header could pin a missing background for + a YEAR); HTTPS block gained the missing `/assets/` location (was silently + serving index.html as images). +- `image-recipe/configs/archipelago-kiosk.service` — MemoryMax 1500→2800M, + MemoryHigh 1200→2200M (kiosk was riding reclaim-throttle = the lag). +- `image-recipe/_archived/build-auto-installer-iso.sh` — kiosk launcher/service + now spliced from `image-recipe/configs/` at build time (was a stale inline + heredoc that force-disabled GPU); **+ `firmware-intel-graphics` + + `firmware-amd-graphics`** (Debian trixie split the i915 DMC blobs out of + firmware-misc-nonfree; the .81 kernel logged tgl_dmc missing). + +### Frontend (neode-ui) — vue-tsc clean, vitest green +- `views/Login.vue` — Enter in field 1 → focus confirm; Enter in confirm → + submit; submit button always clickable (shows inline mismatch/length error + instead of being silently disabled); errors clear on input; **Restart + Onboarding needs a confirming second click** (5s window) — this button is + the likely cause of the "onboarding restarted after mismatch" report. + +`login.restartConfirm` key in en/es locales. +- `stores/sync.ts` — 30s staleness reconciliation (server.get-state) while + connected; already-connected fast path now refetches too. +- `composables/useContainersScanTimeout.ts` (NEW, +tests) — 20s escape hatch; + wired into `Apps.vue` / `Discover.vue` / `Marketplace.vue`; fresh empty node + reaches the real "no apps yet" empty state; "Checking…" can never persist. +- Backgrounds: 10 heaviest bg JPEGs → **WebP q90** (9.4MB→6.6MB; refs updated + in OnboardingWrapper/Dashboard/useRouteTransitions); 7 remaining images + stayed JPEG (WebP came out LARGER on those — noisy sources; deliberate). +- `public/assets/video/video-intro.mp4` — re-encoded CRF20 (SSIM 0.988) with + **+faststart** (moov was at EOF → browser had to download all 15MB before + playing = the intro lag). 12.7MB now, streams immediately. +- LND icon: stale dist artifact; any fresh `npm run build` ships + `app-icons/lnd.png` correctly. + +## Verification done here +- `cargo build -p archipelago` + `cargo check` clean; targeted tests + (bitcoin_status, middleware sanitize, dep_wait, lnd, crash_recovery, + boot_reconciler, bitcoin_host, prod_orchestrator lnd hooks): **52 passed, + 0 failed**. Full suite: **898 passed, 0 failed, 1 ignored** (22s). +- `npm run build` green; dist verified: 10 bg-*.webp present, `lnd.png` + icon present, `restartConfirm` string in bundle, optimized faststart + video (12,740,782 bytes) in place. Note: main had a latent build breaker + (unused template ref in `Web5ConnectedNodes.vue` from commit 8256fde1, + vue-tsc TS6133) — fixed here by removing the dead ref/binding; without + this fix `npm run build` fails on current main. +- vitest: new composable tests + related suites pass. +- `bash -n` clean on all touched scripts; nginx conf live-verified by agent + (200/404/cache headers on both HTTP+HTTPS blocks). +- ISO kiosk splice byte-verified against configs/ by agent simulation. + +## NOT done / left for you +1. **Full test-suite run + gate**: run the complete `cargo test` and (after + deploy) `tests/lifecycle/run-gate.sh` ON .228 per CLAUDE.md before any tag. +2. **Frontend bundle grep before shipping** (per memory/feedback): verify new + strings (e.g. `restartConfirm`, `bg-home.webp`) in the built tarball. +3. **Diagnostics collector** (`data-dir-listing.txt` = 15MB of podman overlay + internals; dmidecode empty) — collector script wasn't found in this repo + (likely lives on-node or in the user's collection script); fix when found. +4. **podman healthcheck cgroup EPERM spam** (1,250 journal errors, healthchecks + unreliable fleet-wide) — real open bug, Quadlet-phase territory, NOT fixed. +5. **DP link-training failures on .81** (display corruption) — likely + cable/dock/port hardware; firmware fix may help; tell user to try another + cable/port if corruption recurs. +6. **LoRa/RNode onboarding surface** — never scoped; user may want it as a + feature (mesh device-found modal exists only on Mesh page post-login). +7. The concurrent audit agent's files (`docs/1.8.0-RELEASE-HARDENING-PLAN.md`, + `core/.../trust/*`, parts of `bootstrap.rs`) are ALSO uncommitted here — + coordinate before committing; don't mix attribution. diff --git a/image-recipe/_archived/build-auto-installer-iso.sh b/image-recipe/_archived/build-auto-installer-iso.sh index 05b9bdeb..3868dabb 100755 --- a/image-recipe/_archived/build-auto-installer-iso.sh +++ b/image-recipe/_archived/build-auto-installer-iso.sh @@ -354,6 +354,8 @@ RUN apt-get update && apt-get -y full-upgrade && apt-get install -y --no-install firmware-iwlwifi \ firmware-misc-nonfree \ firmware-linux-nonfree \ + firmware-intel-graphics \ + firmware-amd-graphics \ intel-microcode \ amd64-microcode \ xorg \ @@ -528,11 +530,15 @@ RUN mkdir -p /var/lib/archipelago/data /var/lib/archipelago/config /var/lib/arch # Persist journalctl across reboots — without /var/log/journal systemd # journal uses tmpfs and everything before the last boot is lost. We # need the full history to diagnose first-boot / install / onboarding -# issues after the fact. Size cap keeps it from eating the disk. +# issues after the fact. Size cap keeps it from eating the disk, and the +# explicit rate limit stops a single chatty service (e.g. a container +# spamming conmon->journald during Bitcoin IBD) from drowning the journal. +# Keep this byte-identical to image-recipe/configs/journald-archipelago.conf — +# the backend self-heals the same file onto deployed nodes (bootstrap.rs). RUN mkdir -p /var/log/journal && \ systemd-tmpfiles --create --prefix /var/log/journal 2>/dev/null || true && \ install -d -m 0755 /etc/systemd/journald.conf.d && \ - printf '[Journal]\nStorage=persistent\nSystemMaxUse=500M\nRuntimeMaxUse=100M\nForwardToSyslog=no\n' > /etc/systemd/journald.conf.d/10-archipelago-persistent.conf + printf '[Journal]\nStorage=persistent\nSystemMaxUse=500M\nRuntimeMaxUse=100M\nForwardToSyslog=no\nRateLimitIntervalSec=30s\nRateLimitBurst=10000\n' > /etc/systemd/journald.conf.d/10-archipelago-persistent.conf # Clean up RUN apt-get clean && \ @@ -2651,98 +2657,48 @@ RestartSec=5 WantedBy=multi-user.target CLAUDESVC -# Kiosk mode — X11 + Chromium fullscreen on attached display -# Not enabled by default; toggle via: sudo archipelago-kiosk enable/disable -cat > /mnt/target/usr/local/bin/archipelago-kiosk-launcher <<'KIOSKLAUNCHER' -#!/bin/bash -# Start X server on VT7 (VT1 stays on MOTD/console) -/usr/bin/Xorg :0 vt7 -nolisten tcp -keeptty & -XPID=$! -sleep 3 +INSTALLER_SCRIPT -# Switch to kiosk display -chvt 7 2>/dev/null || true - -if ! kill -0 $XPID 2>/dev/null; then - echo 'ERROR: Xorg failed to start' +# ----------------------------------------------------------------------------- +# Kiosk launcher + systemd service: spliced into auto-install.sh at BUILD time +# from image-recipe/configs/ — the single source of truth (the same files are +# embedded in the Rust binary via include_str! in core/archipelago/src/bootstrap.rs +# and self-healed onto nodes by ensure_kiosk_hardened()). +# +# A previous inline heredoc copy here had silently diverged (unconditional +# --disable-gpu, no CPU/memory limits, VT7 scheme) and shipped stale kiosk +# behavior on fresh ISOs. Never re-inline these payloads. +# ----------------------------------------------------------------------------- +KIOSK_LAUNCHER_SRC="$SCRIPT_DIR/../configs/archipelago-kiosk-launcher.sh" +KIOSK_SERVICE_SRC="$SCRIPT_DIR/../configs/archipelago-kiosk.service" +for _kiosk_src in "$KIOSK_LAUNCHER_SRC" "$KIOSK_SERVICE_SRC"; do + if [ ! -f "$_kiosk_src" ]; then + echo "ERROR: kiosk config file missing: $_kiosk_src" >&2 + echo " The ISO must ship the maintained kiosk launcher/service from" >&2 + echo " image-recipe/configs/ — refusing to build (no stale fallback)." >&2 + exit 1 + fi +done +# Guard: payloads must not contain the heredoc terminators we wrap them in. +if grep -qx 'KIOSKLAUNCHER' "$KIOSK_LAUNCHER_SRC" || grep -qx 'KIOSKSVC' "$KIOSK_SERVICE_SRC"; then + echo "ERROR: kiosk config contains a reserved heredoc terminator line (KIOSKLAUNCHER/KIOSKSVC)" >&2 exit 1 fi +{ + echo "# Kiosk mode — X11 + Chromium fullscreen on attached display" + echo "# Not enabled by default; toggle via: sudo archipelago-kiosk enable/disable" + echo "# Payloads spliced at ISO-build time from image-recipe/configs/ (source of truth)." + echo "cat > /mnt/target/usr/local/bin/archipelago-kiosk-launcher <<'KIOSKLAUNCHER'" + cat "$KIOSK_LAUNCHER_SRC" + echo "KIOSKLAUNCHER" + echo "chmod +x /mnt/target/usr/local/bin/archipelago-kiosk-launcher" + echo "" + echo "cat > /mnt/target/etc/systemd/system/archipelago-kiosk.service <<'KIOSKSVC'" + cat "$KIOSK_SERVICE_SRC" + echo "KIOSKSVC" +} >> "$ARCH_DIR/auto-install.sh" -export DISPLAY=:0 -export HOME=/home/archipelago - -xhost +SI:localuser:archipelago 2>/dev/null -xset s off 2>/dev/null -xset -dpms 2>/dev/null -xset s noblank 2>/dev/null - -unclutter -idle 3 -root & - -while true; do - # Get screen resolution for window sizing - SCREEN_RES=$(xdpyinfo 2>/dev/null | awk '/dimensions:/{print $2}') - SCREEN_RES=${SCREEN_RES:-1920x1080} - sudo -u archipelago env DISPLAY=:0 HOME=/home/archipelago chromium \ - --kiosk \ - --start-fullscreen \ - --start-maximized \ - --window-position=0,0 \ - --window-size=${SCREEN_RES/x/,} \ - --app=http://localhost/kiosk \ - --noerrdialogs \ - --disable-infobars \ - --disable-translate \ - --no-first-run \ - --check-for-update-interval=31536000 \ - --disable-features=TranslateUI,PasswordManagerOnboarding,AutofillServerCommunication,PasswordManagerEnabled \ - --disable-session-crashed-bubble \ - --disable-save-password-bubble \ - --disable-suggestions-service \ - --password-store=basic \ - --disable-component-update \ - --credentials_enable_service=false \ - --disable-gpu \ - --disable-breakpad \ - --disable-metrics \ - --disable-metrics-reporting \ - --metrics-recording-only \ - --disable-domain-reliability \ - --disable-background-networking \ - --disable-background-timer-throttling \ - --disable-backgrounding-occluded-windows \ - --user-data-dir=/var/lib/archipelago/chromium-kiosk - sleep 3 -done - -kill $XPID 2>/dev/null -KIOSKLAUNCHER -chmod +x /mnt/target/usr/local/bin/archipelago-kiosk-launcher - -cat > /mnt/target/etc/systemd/system/archipelago-kiosk.service <<'KIOSKSVC' -[Unit] -Description=Archipelago Kiosk (X11 + Chromium) -After=archipelago.service systemd-user-sessions.service network-online.target -Wants=archipelago.service network-online.target -ConditionPathExists=/usr/local/bin/archipelago-kiosk-launcher -Conflicts=getty@tty1.service - -[Service] -Type=simple -# First-boot health-poll window is 300s (150 × 2s). Slow hardware -# (e.g. the atom-class box at .198) was blowing past the old 60s / -# 120s window, so Chromium launched against a not-yet-ready backend -# and showed a blank window that only recovered on reboot. At 300s -# even the unbundled-FileBrowser-pull + archipelago state sync + frontend -# settle fits with headroom. TimeoutStartSec is bumped in lockstep. -ExecStartPre=/bin/bash -c 'for i in $(seq 1 150); do curl -sf http://localhost/health >/dev/null 2>&1 && exit 0; sleep 2; done; exit 0' -ExecStart=/usr/local/bin/archipelago-kiosk-launcher -TimeoutStartSec=360 -Restart=always -RestartSec=5 - -[Install] -WantedBy=multi-user.target -KIOSKSVC +cat >> "$ARCH_DIR/auto-install.sh" <<'INSTALLER_SCRIPT' # Toggle script: sudo archipelago-kiosk enable|disable|status cat > /mnt/target/usr/local/bin/archipelago-kiosk <<'KIOSKTOGGLE' @@ -2790,8 +2746,8 @@ case "${1:-status}" in echo " status — Show current mode" echo "" echo "Keyboard shortcuts (from terminal):" - echo " Ctrl+Alt+F7 — Switch to kiosk display" - echo " Ctrl+Alt+F1 — Switch to terminal" + echo " Ctrl+Alt+F1 — Kiosk display (when enabled; console login when disabled)" + echo " Ctrl+Alt+F2 — Text console" exit 1 ;; esac diff --git a/image-recipe/configs/archipelago-kiosk.service b/image-recipe/configs/archipelago-kiosk.service index 5db41f3c..ad996448 100644 --- a/image-recipe/configs/archipelago-kiosk.service +++ b/image-recipe/configs/archipelago-kiosk.service @@ -27,8 +27,12 @@ RestartSec=5 # also binds the chromium/Xorg children in this unit's cgroup. Delegate=yes CPUQuota=75% -MemoryMax=1500M -MemoryHigh=1200M +# Raised from 1500M/1200M: a Framework (Tiger Lake) kiosk sat at 806M used / +# 1.1G peak, riding the old MemoryHigh reclaim-throttle line — the throttling +# itself was the perceived UI lag. Keep Max well above real peaks; High stays +# the soft reclaim line so a runaway kiosk still can't take the machine down. +MemoryMax=2800M +MemoryHigh=2200M [Install] WantedBy=multi-user.target diff --git a/image-recipe/configs/journald-archipelago.conf b/image-recipe/configs/journald-archipelago.conf new file mode 100644 index 00000000..889c86eb --- /dev/null +++ b/image-recipe/configs/journald-archipelago.conf @@ -0,0 +1,7 @@ +[Journal] +Storage=persistent +SystemMaxUse=500M +RuntimeMaxUse=100M +ForwardToSyslog=no +RateLimitIntervalSec=30s +RateLimitBurst=10000 diff --git a/image-recipe/configs/nginx-archipelago.conf b/image-recipe/configs/nginx-archipelago.conf index 013bfc4e..8ed58f45 100644 --- a/image-recipe/configs/nginx-archipelago.conf +++ b/image-recipe/configs/nginx-archipelago.conf @@ -115,11 +115,19 @@ server { # Versioned Vite assets must never fall through to index.html. During OTA # a browser can keep an old HTML shell that references now-removed hashed # chunks; returning HTML for /assets/*.js triggers strict MIME failures. - # A real 404 plus immutable/no-cache split lets the app/browser recover on - # refresh without caching the wrong content type. + # The immutable header must NOT use `always`: with `always` a transient + # 404 (e.g. mid web-ui swap on first boot) gets cached by the browser for + # a year and the asset stays "missing" until a hard cache clear. Without + # `always` the header applies only to 2xx/3xx; 404s are routed to a + # named location that marks them no-store so the browser retries. location /assets/ { try_files $uri =404; - add_header Cache-Control "public, max-age=31536000, immutable" always; + add_header Cache-Control "public, max-age=31536000, immutable"; + error_page 404 = @asset_missing; + } + location @asset_missing { + add_header Cache-Control "no-store" always; + return 404; } location ~* ^/(registerSW\.js|sw\.js|workbox-[^/]+\.js)$ { @@ -994,6 +1002,19 @@ server { try_files $uri =404; } + # Versioned Vite assets must never fall through to index.html (mirrors the + # HTTP block). No `always` on the immutable header: a transient 404 must + # not be cached for a year — 404s go to @asset_missing (no-store) instead. + location /assets/ { + try_files $uri =404; + add_header Cache-Control "public, max-age=31536000, immutable"; + error_page 404 = @asset_missing; + } + location @asset_missing { + add_header Cache-Control "no-store" always; + return 404; + } + location / { try_files $uri $uri/ /index.html; } diff --git a/neode-ui/public/assets/INTRO-ASSETS-REPLACE.md b/neode-ui/public/assets/INTRO-ASSETS-REPLACE.md index ca541919..8d3d4624 100644 --- a/neode-ui/public/assets/INTRO-ASSETS-REPLACE.md +++ b/neode-ui/public/assets/INTRO-ASSETS-REPLACE.md @@ -31,13 +31,13 @@ To change the intro splash and dashboard tab backgrounds **without touching any | Filename | Tab | |----------|-----| -| **`bg-home.jpg`** | Home | +| **`bg-home.webp`** | Home | | **`bg-web5.jpg`** | Web5 | | **`bg-network.jpg`** | Server / Network | -| **`bg-settings.jpg`** | Settings | -| **`bg-myapps.jpg`** | My Apps | -| **`bg-appstore.jpg`** | App Store / Marketplace | -| **`bg-cloud.jpg`** | Cloud | +| **`bg-settings.webp`** | Settings | +| **`bg-myapps.webp`** | My Apps | +| **`bg-appstore.webp`** | App Store / Marketplace | +| **`bg-cloud.webp`** | Cloud | | **`bg-intro.jpg`** | Default (also intro) | | **`bg-intro-3.jpg`** | Alternate layer during transitions | @@ -47,12 +47,12 @@ To change the intro splash and dashboard tab backgrounds **without touching any | Filename | Used for | |----------|----------| -| **`bg-intro-1.jpg`** | Onboarding done, login | +| **`bg-intro-1.webp`** | Onboarding done, login | | **`bg-intro-2.jpg`** | Onboarding verify | | **`bg-intro-3.jpg`** | Onboarding path, dashboard transition layer | -| **`bg-intro-4.jpg`** | Onboarding options | -| **`bg-intro-5.jpg`** | Onboarding did | -| **`bg-intro-6.jpg`** | Onboarding backup | +| **`bg-intro-4.webp`** | Onboarding options | +| **`bg-intro-5.webp`** | Onboarding did | +| **`bg-intro-6.webp`** | Onboarding backup | --- @@ -62,16 +62,16 @@ To change the intro splash and dashboard tab backgrounds **without touching any |-------|-----------| | Intro image | `neode-ui/public/assets/img/bg-intro.jpg` | | Intro video | `neode-ui/public/assets/video/video-intro.mp4` | -| Home | `neode-ui/public/assets/img/bg-home.jpg` | +| Home | `neode-ui/public/assets/img/bg-home.webp` | | Web5 | `neode-ui/public/assets/img/bg-web5.jpg` | | Network | `neode-ui/public/assets/img/bg-network.jpg` | -| Settings | `neode-ui/public/assets/img/bg-settings.jpg` | -| My Apps | `neode-ui/public/assets/img/bg-myapps.jpg` | -| App Store | `neode-ui/public/assets/img/bg-appstore.jpg` | -| Cloud | `neode-ui/public/assets/img/bg-cloud.jpg` | +| Settings | `neode-ui/public/assets/img/bg-settings.webp` | +| My Apps | `neode-ui/public/assets/img/bg-myapps.webp` | +| App Store | `neode-ui/public/assets/img/bg-appstore.webp` | +| Cloud | `neode-ui/public/assets/img/bg-cloud.webp` | | Default | `neode-ui/public/assets/img/bg-intro.jpg` | | Transition | `neode-ui/public/assets/img/bg-intro-3.jpg` | -| Intro 1–6 | `neode-ui/public/assets/img/bg-intro-1.jpg` … `bg-intro-6.jpg` | +| Intro 1–6 | `neode-ui/public/assets/img/bg-intro-1.webp` … `bg-intro-6.webp` (intro-2 and intro-3 remain `.jpg` — WebP came out larger for those) | --- diff --git a/neode-ui/public/assets/img/bg-appstore.jpg b/neode-ui/public/assets/img/bg-appstore.jpg deleted file mode 100644 index b85cdc4a..00000000 Binary files a/neode-ui/public/assets/img/bg-appstore.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-appstore.webp b/neode-ui/public/assets/img/bg-appstore.webp new file mode 100644 index 00000000..c31ee7d3 Binary files /dev/null and b/neode-ui/public/assets/img/bg-appstore.webp differ diff --git a/neode-ui/public/assets/img/bg-cloud.jpg b/neode-ui/public/assets/img/bg-cloud.jpg deleted file mode 100644 index fc361ebe..00000000 Binary files a/neode-ui/public/assets/img/bg-cloud.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-cloud.webp b/neode-ui/public/assets/img/bg-cloud.webp new file mode 100644 index 00000000..a2c228e8 Binary files /dev/null and b/neode-ui/public/assets/img/bg-cloud.webp differ diff --git a/neode-ui/public/assets/img/bg-home.jpg b/neode-ui/public/assets/img/bg-home.jpg deleted file mode 100644 index 6956650d..00000000 Binary files a/neode-ui/public/assets/img/bg-home.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-home.webp b/neode-ui/public/assets/img/bg-home.webp new file mode 100644 index 00000000..42f1eeec Binary files /dev/null and b/neode-ui/public/assets/img/bg-home.webp differ diff --git a/neode-ui/public/assets/img/bg-intro-1.jpg b/neode-ui/public/assets/img/bg-intro-1.jpg deleted file mode 100644 index fc361ebe..00000000 Binary files a/neode-ui/public/assets/img/bg-intro-1.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-intro-1.webp b/neode-ui/public/assets/img/bg-intro-1.webp new file mode 100644 index 00000000..a2c228e8 Binary files /dev/null and b/neode-ui/public/assets/img/bg-intro-1.webp differ diff --git a/neode-ui/public/assets/img/bg-intro-4.jpg b/neode-ui/public/assets/img/bg-intro-4.jpg deleted file mode 100644 index 36e87e80..00000000 Binary files a/neode-ui/public/assets/img/bg-intro-4.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-intro-4.webp b/neode-ui/public/assets/img/bg-intro-4.webp new file mode 100644 index 00000000..130fe3e7 Binary files /dev/null and b/neode-ui/public/assets/img/bg-intro-4.webp differ diff --git a/neode-ui/public/assets/img/bg-intro-5.jpg b/neode-ui/public/assets/img/bg-intro-5.jpg deleted file mode 100644 index e59bdc09..00000000 Binary files a/neode-ui/public/assets/img/bg-intro-5.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-intro-5.webp b/neode-ui/public/assets/img/bg-intro-5.webp new file mode 100644 index 00000000..428f174f Binary files /dev/null and b/neode-ui/public/assets/img/bg-intro-5.webp differ diff --git a/neode-ui/public/assets/img/bg-intro-6.jpg b/neode-ui/public/assets/img/bg-intro-6.jpg deleted file mode 100644 index 156362b0..00000000 Binary files a/neode-ui/public/assets/img/bg-intro-6.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-intro-6.webp b/neode-ui/public/assets/img/bg-intro-6.webp new file mode 100644 index 00000000..5aa31709 Binary files /dev/null and b/neode-ui/public/assets/img/bg-intro-6.webp differ diff --git a/neode-ui/public/assets/img/bg-mesh.jpg b/neode-ui/public/assets/img/bg-mesh.jpg deleted file mode 100644 index 6bcb1968..00000000 Binary files a/neode-ui/public/assets/img/bg-mesh.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-mesh.webp b/neode-ui/public/assets/img/bg-mesh.webp new file mode 100644 index 00000000..4453f9e2 Binary files /dev/null and b/neode-ui/public/assets/img/bg-mesh.webp differ diff --git a/neode-ui/public/assets/img/bg-myapps.jpg b/neode-ui/public/assets/img/bg-myapps.jpg deleted file mode 100644 index b85cdc4a..00000000 Binary files a/neode-ui/public/assets/img/bg-myapps.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-myapps.webp b/neode-ui/public/assets/img/bg-myapps.webp new file mode 100644 index 00000000..c31ee7d3 Binary files /dev/null and b/neode-ui/public/assets/img/bg-myapps.webp differ diff --git a/neode-ui/public/assets/img/bg-settings.jpg b/neode-ui/public/assets/img/bg-settings.jpg deleted file mode 100644 index f0a0ae58..00000000 Binary files a/neode-ui/public/assets/img/bg-settings.jpg and /dev/null differ diff --git a/neode-ui/public/assets/img/bg-settings.webp b/neode-ui/public/assets/img/bg-settings.webp new file mode 100644 index 00000000..49860647 Binary files /dev/null and b/neode-ui/public/assets/img/bg-settings.webp differ diff --git a/neode-ui/public/assets/video/video-intro.mp4 b/neode-ui/public/assets/video/video-intro.mp4 index 3cf7151a..4ceb62d6 100644 Binary files a/neode-ui/public/assets/video/video-intro.mp4 and b/neode-ui/public/assets/video/video-intro.mp4 differ diff --git a/neode-ui/src/composables/__tests__/useContainersScanTimeout.test.ts b/neode-ui/src/composables/__tests__/useContainersScanTimeout.test.ts new file mode 100644 index 00000000..8826df30 --- /dev/null +++ b/neode-ui/src/composables/__tests__/useContainersScanTimeout.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import { ref, nextTick } from 'vue' +import { useContainersScanTimeout } from '../useContainersScanTimeout' + +describe('useContainersScanTimeout', () => { + beforeEach(() => { + vi.useFakeTimers() + }) + + afterEach(() => { + vi.useRealTimers() + }) + + it('reflects the real scanned flag when it arrives before the timeout', async () => { + const scanned = ref(false) + const loaded = ref(true) + const { effectiveContainersScanned, scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000) + + expect(effectiveContainersScanned.value).toBe(false) + scanned.value = true + await nextTick() + expect(effectiveContainersScanned.value).toBe(true) + expect(scanTimedOut.value).toBe(false) + }) + + it('does not start the timeout until initial data has loaded', async () => { + const scanned = ref(false) + const loaded = ref(false) + const { effectiveContainersScanned } = useContainersScanTimeout(scanned, loaded, 20_000) + + vi.advanceTimersByTime(60_000) + expect(effectiveContainersScanned.value).toBe(false) + + loaded.value = true + await nextTick() + vi.advanceTimersByTime(20_000) + expect(effectiveContainersScanned.value).toBe(true) + }) + + it('falls through after the timeout even if the flag never arrives', async () => { + const scanned = ref(false) + const loaded = ref(true) + const { effectiveContainersScanned, scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000) + + vi.advanceTimersByTime(19_999) + expect(effectiveContainersScanned.value).toBe(false) + vi.advanceTimersByTime(1) + expect(effectiveContainersScanned.value).toBe(true) + expect(scanTimedOut.value).toBe(true) + }) + + it('cancels the escape hatch when the real flag arrives', async () => { + const scanned = ref(false) + const loaded = ref(true) + const { scanTimedOut } = useContainersScanTimeout(scanned, loaded, 20_000) + + vi.advanceTimersByTime(10_000) + scanned.value = true + await nextTick() + vi.advanceTimersByTime(60_000) + expect(scanTimedOut.value).toBe(false) + }) +}) diff --git a/neode-ui/src/composables/useContainersScanTimeout.ts b/neode-ui/src/composables/useContainersScanTimeout.ts new file mode 100644 index 00000000..982ee06c --- /dev/null +++ b/neode-ui/src/composables/useContainersScanTimeout.ts @@ -0,0 +1,56 @@ +// Escape hatch for the "Checking containers…" / "Checking..." states. +// +// If the server never flips `containers-scanned` to true (e.g. the UI missed +// a websocket broadcast), views gating on it would spin forever. This starts +// a timeout once initial data has loaded and, if the scan flag is still false +// when it fires, treats the scan as complete so the UI falls through to its +// real empty/install states. A periodic store-level resync exists too — this +// is the belt-and-suspenders guarantee that the spinner is always bounded. + +import { computed, getCurrentInstance, onBeforeUnmount, ref, watch, type Ref } from 'vue' + +const DEFAULT_SCAN_TIMEOUT_MS = 20_000 + +export function useContainersScanTimeout( + containersScanned: Ref, + hasLoadedInitialData: Ref, + timeoutMs: number = DEFAULT_SCAN_TIMEOUT_MS, +) { + const scanTimedOut = ref(false) + let timer: ReturnType | undefined + + function clearTimer(): void { + if (timer !== undefined) { + clearTimeout(timer) + timer = undefined + } + } + + watch( + [containersScanned, hasLoadedInitialData], + ([scanned, loaded]) => { + if (scanned) { + // Real signal arrived — cancel the escape hatch. + clearTimer() + scanTimedOut.value = false + return + } + if (loaded && timer === undefined && !scanTimedOut.value) { + timer = setTimeout(() => { + scanTimedOut.value = true + timer = undefined + }, timeoutMs) + } + }, + { immediate: true }, + ) + + if (getCurrentInstance()) onBeforeUnmount(clearTimer) + + /** True once the server reports the scan done OR the timeout has elapsed. */ + const effectiveContainersScanned = computed( + () => containersScanned.value || scanTimedOut.value, + ) + + return { effectiveContainersScanned, scanTimedOut } +} diff --git a/neode-ui/src/locales/en.json b/neode-ui/src/locales/en.json index bc632719..00aeddfd 100644 --- a/neode-ui/src/locales/en.json +++ b/neode-ui/src/locales/en.json @@ -96,6 +96,7 @@ "serverStarting": "Server starting up...", "replayIntro": "Replay Intro", "onboarding": "Onboarding", + "restartConfirm": "Are you sure? This wipes onboarding progress — click again to confirm", "resetting": "Resetting...", "recoveryNote": "Password recovery requires SSH access to the server.", "errorMinLength": "Password must be at least 8 characters", diff --git a/neode-ui/src/locales/es.json b/neode-ui/src/locales/es.json index b4fd127b..f94ff178 100644 --- a/neode-ui/src/locales/es.json +++ b/neode-ui/src/locales/es.json @@ -96,6 +96,7 @@ "serverStarting": "El servidor est\u00e1 iniciando...", "replayIntro": "Repetir introducci\u00f3n", "onboarding": "Configuraci\u00f3n inicial", + "restartConfirm": "Are you sure? This wipes onboarding progress \u2014 click again to confirm", "resetting": "Restableciendo...", "recoveryNote": "La recuperaci\u00f3n de contrase\u00f1a requiere acceso SSH al servidor.", "errorMinLength": "La contrase\u00f1a debe tener al menos 8 caracteres", diff --git a/neode-ui/src/stores/sync.ts b/neode-ui/src/stores/sync.ts index 38284333..fcf1cd1f 100644 --- a/neode-ui/src/stores/sync.ts +++ b/neode-ui/src/stores/sync.ts @@ -14,6 +14,9 @@ export const useSyncStore = defineStore('sync', () => { const hasLoadedInitialData = ref(false) let isWsSubscribed = false let isWsConnecting = false + let isRefreshingState = false + let stalenessTimer: ReturnType | null = null + const STALENESS_INTERVAL_MS = 30_000 // Computed const serverInfo = computed(() => data.value?.['server-info']) @@ -21,6 +24,47 @@ export const useSyncStore = defineStore('sync', () => { const peerHealth = computed>(() => data.value?.['peer-health'] || {}) const uiData = computed(() => data.value?.ui) + /** + * Refetch the full state snapshot via RPC and apply it through the same + * path as the post-connect fetch (revision/hasLoadedInitialData handling + * stays identical). Guarded against overlapping fetches. + */ + async function refreshStateFromServer(): Promise { + if (isRefreshingState) return + isRefreshingState = true + try { + const freshState = await rpcClient.call<{ data: DataModel }>({ method: 'server.get-state' }) + if (freshState?.data) { + data.value = freshState.data + hasLoadedInitialData.value = true + } + } catch { + // Non-fatal: WebSocket patches will still work + if (import.meta.env.DEV) console.warn('[Store] Failed to refresh state from server') + } finally { + isRefreshingState = false + } + } + + /** + * Belt-and-suspenders against missed broadcasts: while the WebSocket is + * connected, periodically resync the full state so a dropped patch can + * never permanently strand the UI on stale data. + */ + function startStalenessReconciliation(): void { + if (stalenessTimer) return + stalenessTimer = setInterval(() => { + if (wsClient.isConnected()) void refreshStateFromServer() + }, STALENESS_INTERVAL_MS) + } + + function stopStalenessReconciliation(): void { + if (stalenessTimer) { + clearInterval(stalenessTimer) + stalenessTimer = null + } + } + // Actions async function connectWebSocket(): Promise { // Prevent concurrent connection attempts @@ -82,6 +126,10 @@ export const useSyncStore = defineStore('sync', () => { if (import.meta.env.DEV) console.log('[Store] WebSocket already connected') isConnected.value = true isReconnecting.value = false + // Re-entrant call while already connected (e.g. after onboarding + // completes): resync state in case a broadcast was missed. + void refreshStateFromServer() + startStalenessReconciliation() return } @@ -89,16 +137,8 @@ export const useSyncStore = defineStore('sync', () => { if (import.meta.env.DEV) console.log('[Store] WebSocket connected') // Fetch fresh state after reconnect to avoid stale patch application - try { - const freshState = await rpcClient.call<{ data: DataModel }>({ method: 'server.get-state' }) - if (freshState?.data) { - data.value = freshState.data - hasLoadedInitialData.value = true - } - } catch { - // Non-fatal: WebSocket patches will still work - if (import.meta.env.DEV) console.warn('[Store] Failed to fetch fresh state after reconnect') - } + await refreshStateFromServer() + startStalenessReconciliation() // Connection state will be updated via the callback if (wsClient.isConnected()) { @@ -158,6 +198,7 @@ export const useSyncStore = defineStore('sync', () => { /** Reset sync state on logout — called by auth store */ function resetOnLogout(): void { + stopStalenessReconciliation() data.value = null hasLoadedInitialData.value = false isWsSubscribed = false diff --git a/neode-ui/src/views/Apps.vue b/neode-ui/src/views/Apps.vue index 413b1a0f..8612d3d2 100644 --- a/neode-ui/src/views/Apps.vue +++ b/neode-ui/src/views/Apps.vue @@ -397,6 +397,7 @@ import { useAppsActions } from './apps/useAppsActions' import { validateSideloadRequest } from './apps/sideloadValidation' import { useMarketplaceApp } from '@/composables/useMarketplaceApp' import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs' +import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout' import { type AppsTab, filterEntriesForTab, isWebOnlyApp, isWebsitePackage, opensInTab, resolveRuntimeLaunchUrl, WEB_ONLY_APPS, WEB_ONLY_APP_URLS, buildAllCategories, useCategoriesWithApps, @@ -461,7 +462,14 @@ const ALL_CATEGORIES = computed(() => buildAllCategories(t)) const SERVICE_CATEGORIES = computed(() => buildServiceCategories(t)) const livePackages = computed(() => store.packages || {}) -const containersScanned = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] !== false) +// Field missing from server data = not scanned yet (consistent with Discover/Marketplace) +const containersScannedRaw = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false) +// Escape hatch: never show "Checking containers…" forever — after a timeout, +// fall through to the real (empty) state even if the scanned flag never arrives. +const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout( + containersScannedRaw, + computed(() => store.hasLoadedInitialData), +) const { packages: stablePackages, isUsingLastKnownPackages, diff --git a/neode-ui/src/views/Dashboard.vue b/neode-ui/src/views/Dashboard.vue index e909cb0e..faaf9887 100644 --- a/neode-ui/src/views/Dashboard.vue +++ b/neode-ui/src/views/Dashboard.vue @@ -178,12 +178,12 @@ const backgroundImage = computed(() => { if (mapped) return mapped // Detail/sub pages inherit their parent tab's background so they stay // visually "inside" the section instead of snapping to the home backdrop. - if (route.path.startsWith('/dashboard/cloud/')) return 'bg-cloud.jpg' + if (route.path.startsWith('/dashboard/cloud/')) return 'bg-cloud.webp' if (route.path.startsWith('/dashboard/web5/')) return 'bg-web5.jpg' if (route.path.startsWith('/dashboard/server/')) return 'bg-web5.jpg' - if (route.path.startsWith('/dashboard/settings/')) return 'bg-settings.jpg' + if (route.path.startsWith('/dashboard/settings/')) return 'bg-settings.webp' if (isDetailRoute(route.path)) return 'bg-intro.jpg' - return 'bg-home.jpg' + return 'bg-home.webp' }) const isDarkRoute = computed(() => { diff --git a/neode-ui/src/views/Discover.vue b/neode-ui/src/views/Discover.vue index 7e5cc663..e23b6b5f 100644 --- a/neode-ui/src/views/Discover.vue +++ b/neode-ui/src/views/Discover.vue @@ -235,6 +235,7 @@ import { useMarketplaceApp } from '@/composables/useMarketplaceApp' import { useAppLauncherStore } from '@/stores/appLauncher' import { useToast } from '@/composables/useToast' import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs' +import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout' import { APP_STORE_SECTIONS } from './appStoreCategories' import DiscoverHero from './discover/DiscoverHero.vue' import FeaturedApps from './discover/FeaturedApps.vue' @@ -335,7 +336,13 @@ function retryNostr() { } const installedPackages = computed(() => store.data?.['package-data'] || {}) -const containersScanned = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false) +const containersScannedRaw = computed(() => store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false) +// Escape hatch: never leave app cards on "Checking..." forever — after a +// timeout, treat the scan as done so cards render their normal install state. +const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout( + containersScannedRaw, + computed(() => store.hasLoadedInitialData), +) const allApps = computed(() => { diff --git a/neode-ui/src/views/Login.vue b/neode-ui/src/views/Login.vue index 63aff776..1b192c74 100644 --- a/neode-ui/src/views/Login.vue +++ b/neode-ui/src/views/Login.vue @@ -66,7 +66,8 @@ data-form-type="other" class="w-full px-4 py-3 bg-transparent border border-white/20 rounded-lg text-white placeholder-white/40 focus:outline-none focus:border-white/40 focus:ring-1 focus:ring-white/20 transition-colors" :placeholder="t('login.enterPasswordSetup')" - @keydown.enter="handleSetupWithSound" + @keydown.enter="confirmPasswordInputRef?.focus()" + @input="error = null" :disabled="loading || formDisabled" /> @@ -77,6 +78,7 @@ @@ -260,6 +265,7 @@ const requiresTotp = ref(false) const totpCode = ref('') const useBackupCode = ref(false) const totpInputRef = ref(null) +const confirmPasswordInputRef = ref(null) // Server startup state const serverReady = ref(false) @@ -338,6 +344,7 @@ onBeforeUnmount(() => { removeUnlockListeners() if (startupPollTimer) clearTimeout(startupPollTimer) if (startupProgressInterval) clearInterval(startupProgressInterval) + if (confirmRestartTimer) clearTimeout(confirmRestartTimer) }) onMounted(async () => { @@ -536,9 +543,26 @@ function replayIntro() { } const isResettingOnboarding = ref(false) +const confirmingRestartOnboarding = ref(false) +let confirmRestartTimer: ReturnType | null = null async function restartOnboarding() { if (isResettingOnboarding.value) return + // First click arms a confirmation state; only a second explicit click restarts. + if (!confirmingRestartOnboarding.value) { + confirmingRestartOnboarding.value = true + if (confirmRestartTimer) clearTimeout(confirmRestartTimer) + confirmRestartTimer = setTimeout(() => { + confirmingRestartOnboarding.value = false + confirmRestartTimer = null + }, 5000) + return + } + if (confirmRestartTimer) { + clearTimeout(confirmRestartTimer) + confirmRestartTimer = null + } + confirmingRestartOnboarding.value = false isResettingOnboarding.value = true // Local-only reset — no RPC needed since user isn't logged in. // Onboarding pages are all public, so clearing localStorage is enough. diff --git a/neode-ui/src/views/Marketplace.vue b/neode-ui/src/views/Marketplace.vue index 3625a856..faffc006 100644 --- a/neode-ui/src/views/Marketplace.vue +++ b/neode-ui/src/views/Marketplace.vue @@ -170,6 +170,7 @@ import { useMarketplaceApp } from '@/composables/useMarketplaceApp' import { useAppLauncherStore } from '@/stores/appLauncher' import { useToast } from '@/composables/useToast' import { useCollapsingHeaderTabs } from '@/composables/useCollapsingHeaderTabs' +import { useContainersScanTimeout } from '@/composables/useContainersScanTimeout' import { APP_STORE_CATEGORIES, APP_STORE_SECTIONS } from './appStoreCategories' import MarketplaceAppCard from './marketplace/MarketplaceAppCard.vue' import { @@ -286,9 +287,15 @@ const installedPackages = computed(() => { return store.data?.['package-data'] || {} }) -const containersScanned = computed(() => { +const containersScannedRaw = computed(() => { return store.data?.['server-info']?.['status-info']?.['containers-scanned'] ?? false }) +// Escape hatch: never leave app cards on "Checking..." forever — after a +// timeout, treat the scan as done so cards render their normal install state. +const { effectiveContainersScanned: containersScanned } = useContainersScanTimeout( + containersScannedRaw, + computed(() => store.hasLoadedInitialData), +) // Combine curated apps with Nostr relay-discovered apps const allApps = computed(() => { diff --git a/neode-ui/src/views/OnboardingWrapper.vue b/neode-ui/src/views/OnboardingWrapper.vue index 358d1546..0a51e7fb 100644 --- a/neode-ui/src/views/OnboardingWrapper.vue +++ b/neode-ui/src/views/OnboardingWrapper.vue @@ -109,16 +109,16 @@ const useVideoBackground = computed(() => { // Note: bg-intro.jpg is used for splash and /onboarding/intro for seamless transition const routeBackgrounds: Record = { '/onboarding/intro': 'bg-intro.jpg', // Video will be used instead - '/onboarding/options': 'bg-intro-4.jpg', + '/onboarding/options': 'bg-intro-4.webp', '/onboarding/path': 'bg-intro-3.jpg', - '/onboarding/seed': 'bg-intro-1.jpg', - '/onboarding/seed-verify': 'bg-intro-1.jpg', - '/onboarding/seed-restore': 'bg-intro-1.jpg', - '/onboarding/did': 'bg-intro-4.jpg', - '/onboarding/identity': 'bg-intro-1.jpg', - '/onboarding/backup': 'bg-intro-6.jpg', + '/onboarding/seed': 'bg-intro-1.webp', + '/onboarding/seed-verify': 'bg-intro-1.webp', + '/onboarding/seed-restore': 'bg-intro-1.webp', + '/onboarding/did': 'bg-intro-4.webp', + '/onboarding/identity': 'bg-intro-1.webp', + '/onboarding/backup': 'bg-intro-6.webp', '/onboarding/verify': 'bg-intro-2.jpg', - '/onboarding/done': 'bg-intro-1.jpg', + '/onboarding/done': 'bg-intro-1.webp', '/login': 'bg-intro.jpg' // Video loops from splash (same as intro) } @@ -126,12 +126,12 @@ const routeBackgrounds: Record = { // identical on every logout. Cycles through bg-intro-1..6 using a // counter persisted to localStorage so subsequent visits advance. const LOGIN_BACKGROUNDS = [ - 'bg-intro-1.jpg', + 'bg-intro-1.webp', 'bg-intro-2.jpg', 'bg-intro-3.jpg', - 'bg-intro-4.jpg', - 'bg-intro-5.jpg', - 'bg-intro-6.jpg', + 'bg-intro-4.webp', + 'bg-intro-5.webp', + 'bg-intro-6.webp', ] function pickNextLoginBackground(): string { try { @@ -307,7 +307,7 @@ watch(() => route.path, (newPath, oldPath) => { // Login route: set background immediately, no zoom, no transition (glitch is always-on) if (newPath === '/login') { - currentBackground.value = 'bg-intro-1.jpg' + currentBackground.value = 'bg-intro-1.webp' isTransitioning.value = false isGlitching.value = false return diff --git a/neode-ui/src/views/dashboard/useRouteTransitions.ts b/neode-ui/src/views/dashboard/useRouteTransitions.ts index c10e0c7a..1a127489 100644 --- a/neode-ui/src/views/dashboard/useRouteTransitions.ts +++ b/neode-ui/src/views/dashboard/useRouteTransitions.ts @@ -19,19 +19,19 @@ const WEB5_TAB_ORDER = ['/dashboard/web5', '/dashboard/cloud', '/dashboard/serve /** Route-to-background image mapping */ export const ROUTE_BACKGROUNDS: Record = { - '/dashboard': 'bg-home.jpg', - '/dashboard/': 'bg-home.jpg', - '/dashboard/apps': 'bg-myapps.jpg', - '/dashboard/discover': 'bg-appstore.jpg', - '/dashboard/marketplace': 'bg-appstore.jpg', - '/dashboard/cloud': 'bg-cloud.jpg', - '/dashboard/mesh': 'bg-mesh.jpg', + '/dashboard': 'bg-home.webp', + '/dashboard/': 'bg-home.webp', + '/dashboard/apps': 'bg-myapps.webp', + '/dashboard/discover': 'bg-appstore.webp', + '/dashboard/marketplace': 'bg-appstore.webp', + '/dashboard/cloud': 'bg-cloud.webp', + '/dashboard/mesh': 'bg-mesh.webp', '/dashboard/server': 'bg-network.jpg', '/dashboard/web5': 'bg-web5.jpg', '/dashboard/server/federation': 'bg-web5.jpg', '/dashboard/monitoring': 'bg-web5.jpg', '/dashboard/fleet': 'bg-web5.jpg', - '/dashboard/settings': 'bg-settings.jpg', + '/dashboard/settings': 'bg-settings.webp', '/dashboard/chat': 'bg-aiui.jpg', } diff --git a/scripts/container-doctor.sh b/scripts/container-doctor.sh index 60963682..322c8e94 100755 --- a/scripts/container-doctor.sh +++ b/scripts/container-doctor.sh @@ -269,7 +269,7 @@ rpcpassword=$BTC_RPC_PASS rpcallowip=127.0.0.1/32 rpcallowip=10.88.0.0/16 listen=1 -printtoconsole=1 +printtoconsole=0 BCONF log "Updated bitcoin.conf with full RPC settings" fi diff --git a/scripts/deploy-tailscale.sh b/scripts/deploy-tailscale.sh index cc85eb77..51853135 100755 --- a/scripts/deploy-tailscale.sh +++ b/scripts/deploy-tailscale.sh @@ -518,7 +518,7 @@ deploy_node() { if [ -f /var/lib/archipelago/bitcoin/bitcoin.conf ]; then if grep -q 'rpcbind' /var/lib/archipelago/bitcoin/bitcoin.conf 2>/dev/null; then echo ' Cleaning old bitcoin.conf (conflicting rpcbind)...' - printf 'printtoconsole=1\n' | sudo tee /var/lib/archipelago/bitcoin/bitcoin.conf > /dev/null + printf 'printtoconsole=0\n' | sudo tee /var/lib/archipelago/bitcoin/bitcoin.conf > /dev/null sudo chown 100101:100101 /var/lib/archipelago/bitcoin/bitcoin.conf 2>/dev/null fi fi diff --git a/scripts/first-boot-containers.sh b/scripts/first-boot-containers.sh index 5840c415..d7446f3e 100755 --- a/scripts/first-boot-containers.sh +++ b/scripts/first-boot-containers.sh @@ -180,6 +180,10 @@ FBEOF mkdir -p /var/lib/archipelago/fmcd FMCD_PW_FILE=/var/lib/archipelago/fmcd/password [ -s "$FMCD_PW_FILE" ] || head -c 24 /dev/urandom | base64 | tr -dc 'A-Za-z0-9' > "$FMCD_PW_FILE" + # This script runs as root but the container is rootless (container + # uid 0 → host 1000): a root-owned /data leaves fmcd crash-looping + # with "Permission denied (os error 13)". Match manifest data_uid. + chown -R 1000:1000 /var/lib/archipelago/fmcd FMCD_PW="$(cat "$FMCD_PW_FILE")" FMCD_DEFAULT_INVITE="fed11qgqyj3mfwfhksw309uuxywtxxfjrjc35xuexverpxdsnxcnrxucxvenzveskgc3kvvun2c34xp3k2ep38yunzdpexcekxe3hvd3rvvmx8pnrvdenx5mnzvtzqqqjqt0t6pc3s5z0ynqjw9s4njf6svwgu59kweawc0vvrddcjeemw6yyn4pcdp" pull_with_fallback "${FMCD_IMAGE}" @@ -366,7 +370,9 @@ rpcauth=${RPCAUTH} server=1 rpcallowip=0.0.0.0/0 listen=1 -printtoconsole=1 +# printtoconsole=0: journald log-volume fix — bitcoind's datadir debug.log +# already has everything; console duplication spammed journald during IBD. +printtoconsole=0 # ZMQ publishers for LND and other services that need real-time block/tx notifications zmqpubrawblock=tcp://0.0.0.0:28332 zmqpubrawtx=tcp://0.0.0.0:28333 @@ -649,7 +655,7 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -qE 'bitcoin-knots|arch -v /var/lib/archipelago/bitcoin:/home/bitcoin/.bitcoin \ "${BITCOIN_KNOTS_IMAGE}" \ $BTC_EXTRA_ARGS \ - -printtoconsole=1 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 -rpcthreads=16 -rpcworkqueue=256 2>>"$LOG"; then + -printtoconsole=0 -dbcache=$BTC_DBCACHE -par=0 -maxconnections=125 -rpcthreads=16 -rpcworkqueue=256 2>>"$LOG"; then log "Bitcoin Knots started" else log "Bitcoin Knots failed (may already exist)" @@ -1055,6 +1061,9 @@ if ! $DOCKER ps --format '{{.Names}}' 2>/dev/null | grep -q '^fedimint-clientd$' mkdir -p /var/lib/archipelago/fmcd FMCD_PW_FILE=/var/lib/archipelago/fmcd/password [ -s "$FMCD_PW_FILE" ] || head -c 24 /dev/urandom | base64 | tr -dc 'A-Za-z0-9' > "$FMCD_PW_FILE" + # Root-created /data breaks the rootless container (uid 0 → host 1000): + # fmcd crash-loops with "Permission denied (os error 13)". Match data_uid. + chown -R 1000:1000 /var/lib/archipelago/fmcd FMCD_PW="$(cat "$FMCD_PW_FILE")" FMCD_DEFAULT_INVITE="fed11qgqyj3mfwfhksw309uuxywtxxfjrjc35xuexverpxdsnxcnrxucxvenzveskgc3kvvun2c34xp3k2ep38yunzdpexcekxe3hvd3rvvmx8pnrvdenx5mnzvtzqqqjqt0t6pc3s5z0ynqjw9s4njf6svwgu59kweawc0vvrddcjeemw6yyn4pcdp" $DOCKER run -d --name fedimint-clientd --restart unless-stopped \ diff --git a/scripts/reconcile-containers.sh b/scripts/reconcile-containers.sh index 8b9b2c34..5ac09c76 100755 --- a/scripts/reconcile-containers.sh +++ b/scripts/reconcile-containers.sh @@ -722,10 +722,11 @@ ensure_bitcoin_conf() { hash=$(echo -n "$BITCOIN_RPC_PASS" | openssl dgst -sha256 -hmac "$salt" -hex 2>/dev/null | awk '{print $NF}') rpcauth="${BITCOIN_RPC_USER}:${salt}\$${hash}" # Only rpcauth + printtoconsole here — all other options are in SPEC_CUSTOM_ARGS - # to avoid duplicate bind conflicts + # to avoid duplicate bind conflicts. printtoconsole=0: datadir debug.log + # already has everything; console duplication spammed journald during IBD. sudo tee "$BITCOIN_CONF" >/dev/null << BTCEOF rpcauth=${rpcauth} -printtoconsole=1 +printtoconsole=0 BTCEOF info "Generated bitcoin.conf" fi