From 992b673b205f915bbcdba050f5a4a53e03342dae Mon Sep 17 00:00:00 2001 From: archipelago Date: Wed, 29 Apr 2026 14:50:33 -0400 Subject: [PATCH] chore: release v1.7.46-alpha MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to v1.7.45-alpha closing the remaining tasks identified by the resilience sweeps + the new bitcoin orphan / install-fail-vanish bugs. User-visible: - Health monitor: stop paging on orphaned containers from variant switches - Install fail: card stays visible (was vanishing) with error message - Stack pull progress: interpolate 20→70% (was stuck at 20%) - docker.io → lfg2025 mirror: bitcoin/gitea/nextcloud/valkey Internal: - Resilience harness — install-wait uses expected_containers_for, ui+auth probes retry with 60s backoff, dep-snapshot fix - InstallProgress gains optional `message` field (frontend renders it when phase is None) binary $(stat -c %s releases/v1.7.46-alpha/archipelago) sha256:$(sha256sum releases/v1.7.46-alpha/archipelago | awk '{print $1}') tarball $(stat -c %s releases/v1.7.46-alpha/archipelago-frontend-1.7.46-alpha.tar.gz) sha256:$(sha256sum releases/v1.7.46-alpha/archipelago-frontend-1.7.46-alpha.tar.gz | awk '{print $1}') Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 8 +++ app-catalog/catalog.json | 6 +-- core/Cargo.lock | 2 +- core/archipelago/Cargo.toml | 2 +- .../src/api/rpc/package/async_lifecycle.rs | 25 +++++++-- .../src/api/rpc/package/progress.rs | 3 ++ .../archipelago/src/api/rpc/package/stacks.rs | 4 +- core/archipelago/src/data_model.rs | 6 +++ core/archipelago/src/health_monitor.rs | 14 +++++ neode-ui/package.json | 2 +- neode-ui/public/catalog.json | 6 +-- neode-ui/src/stores/server.ts | 30 ++++++++++- neode-ui/src/types/api.ts | 3 ++ scripts/resilience/resilience.sh | 53 +++++++++++++++---- 14 files changed, 137 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2f887c2..0acc0a83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v1.7.46-alpha (2026-04-29) + +- Health monitor no longer pages "Auto-restart failed" for orphaned containers. After a variant switch (bitcoin-core ↔ bitcoin-knots) the previous variant's container could survive uninstall and the health monitor would try restarting it forever. Now skipped silently with a debug log. +- Apps no longer disappear from My Apps when an install fails. The card stays visible with state=Stopped so the user can retry or uninstall, with the failure reason surfaced via the new install_progress.message field. +- "Downloading…" progress now actually advances during multi-image stack pulls. Was sticking at 20% until all pulls finished; now interpolates 20%→70% based on which image of N has landed. +- Pulled four docker.io images (bitcoin, gitea, nextcloud, valkey) into the lfg2025 registries on OVH and tx1138. Removes a docker.io dependency from first-boot installs. +- Resilience harness improvements: install-fail entries no longer vanish, install/uninstall/probe cells are timing-tolerant (60s retry on ui_probe and auth_probe), dep snapshots no longer leak companion containers into the dependent app's "new containers" set. + ## v1.7.45-alpha (2026-04-29) - Bitcoin RPC auth is durable. The dashboard reliably connects across container restart, image update, and reboot. Was failing on registry-pulled images that shipped a stale baked-in password. diff --git a/app-catalog/catalog.json b/app-catalog/catalog.json index 99428db1..5fee9801 100644 --- a/app-catalog/catalog.json +++ b/app-catalog/catalog.json @@ -31,7 +31,7 @@ "author": "Bitcoin Core contributors", "category": "money", "tier": "optional", - "dockerImage": "docker.io/bitcoin/bitcoin:28.4", + "dockerImage": "146.59.87.168:3000/lfg2025/bitcoin:28.4", "repoUrl": "https://github.com/bitcoin/bitcoin" }, { @@ -125,7 +125,7 @@ "icon": "/assets/img/app-icons/gitea.svg", "author": "Gitea", "category": "development", - "dockerImage": "docker.io/gitea/gitea:1.23", + "dockerImage": "146.59.87.168:3000/lfg2025/gitea:1.23", "repoUrl": "https://gitea.com" }, { @@ -263,7 +263,7 @@ "icon": "/assets/img/app-icons/nextcloud.webp", "author": "Nextcloud", "category": "data", - "dockerImage": "docker.io/nextcloud:28", + "dockerImage": "146.59.87.168:3000/lfg2025/nextcloud:28", "repoUrl": "https://github.com/nextcloud/server" } ] diff --git a/core/Cargo.lock b/core/Cargo.lock index c7b02f5b..fe457273 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -80,7 +80,7 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "archipelago" -version = "1.7.45-alpha" +version = "1.7.46-alpha" dependencies = [ "anyhow", "archipelago-container", diff --git a/core/archipelago/Cargo.toml b/core/archipelago/Cargo.toml index 1e2cbe0f..e73ebde9 100644 --- a/core/archipelago/Cargo.toml +++ b/core/archipelago/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "archipelago" -version = "1.7.45-alpha" +version = "1.7.46-alpha" edition = "2021" description = "Archipelago Bitcoin Node OS - Native backend" authors = ["Archipelago Team"] diff --git a/core/archipelago/src/api/rpc/package/async_lifecycle.rs b/core/archipelago/src/api/rpc/package/async_lifecycle.rs index 20fa5c1f..cac8be94 100644 --- a/core/archipelago/src/api/rpc/package/async_lifecycle.rs +++ b/core/archipelago/src/api/rpc/package/async_lifecycle.rs @@ -113,11 +113,26 @@ impl RpcHandler { Err(e) => { error!("package.install {} failed: {:#}", package_id_spawn, e); install_log(&format!("INSTALL FAIL: {} — {:#}", package_id_spawn, e)).await; - // No pre-state to revert to — remove the entry entirely so - // the UI shows the app as not installed. The next package - // scan will re-create it only if podman actually has a - // container for it (partial install recovery). - remove_package_entry(&handler.state_manager, &package_id_spawn).await; + // Don't remove the entry — that's what made the card + // vanish from My Apps mid-install / between retry-loop + // attempts (e.g. tailscale's entrypoint failure). Leave + // the entry visible with state=Stopped + the install + // error in install_progress.message so the user can see + // what went wrong and decide whether to retry or + // uninstall. clear_install_progress would erase the + // message, so we set it explicitly here instead. + let err_msg = format!("Install failed: {:#}", e); + let (mut data, _) = handler.state_manager.get_snapshot().await; + if let Some(entry) = data.package_data.get_mut(&package_id_spawn) { + entry.state = PackageState::Stopped; + entry.install_progress = Some(crate::data_model::InstallProgress { + size: 0, + downloaded: 0, + phase: None, + message: Some(err_msg), + }); + handler.state_manager.update_data(data).await; + } } } }); diff --git a/core/archipelago/src/api/rpc/package/progress.rs b/core/archipelago/src/api/rpc/package/progress.rs index 3ef0e89e..671651f3 100644 --- a/core/archipelago/src/api/rpc/package/progress.rs +++ b/core/archipelago/src/api/rpc/package/progress.rs @@ -25,6 +25,7 @@ impl RpcHandler { size, downloaded, phase: existing_phase, + message: None, }); self.state_manager.update_data(data).await; } @@ -55,6 +56,7 @@ impl RpcHandler { size, downloaded, phase: Some(phase), + message: None, }); self.state_manager.update_data(data).await; } @@ -97,6 +99,7 @@ impl RpcHandler { size: total, downloaded, phase: existing_phase, + message: None, }); state_manager.update_data(data).await; } diff --git a/core/archipelago/src/api/rpc/package/stacks.rs b/core/archipelago/src/api/rpc/package/stacks.rs index 10f5d668..5c4b9400 100644 --- a/core/archipelago/src/api/rpc/package/stacks.rs +++ b/core/archipelago/src/api/rpc/package/stacks.rs @@ -201,7 +201,7 @@ impl RpcHandler { let images = [ "146.59.87.168:3000/lfg2025/immich-postgres:14-vectorchord0.4.3-pgvectors0.2.0", - "docker.io/valkey/valkey:7-alpine", + "146.59.87.168:3000/lfg2025/valkey:7-alpine", "146.59.87.168:3000/lfg2025/immich-server:release", ]; self.set_install_phase("immich", InstallPhase::PullingImage) @@ -300,7 +300,7 @@ impl RpcHandler { "--health-cmd=valkey-cli ping || exit 1", "--health-interval=30s", "--health-retries=3", - "docker.io/valkey/valkey:7-alpine", + "146.59.87.168:3000/lfg2025/valkey:7-alpine", ]) .output() .await; diff --git a/core/archipelago/src/data_model.rs b/core/archipelago/src/data_model.rs index 7f3c4444..2eadd12d 100644 --- a/core/archipelago/src/data_model.rs +++ b/core/archipelago/src/data_model.rs @@ -255,6 +255,12 @@ pub struct InstallProgress { /// a fixed UI percentage and a descriptive label. #[serde(default, skip_serializing_if = "Option::is_none")] pub phase: Option, + /// Optional explicit message — used to surface install failures so + /// the UI can keep the app card visible with an error description + /// instead of silently removing the entry on fail. UI's PHASE_INFO + /// label takes precedence when phase is set. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub message: Option, } /// Phases of the install / update pipeline, surfaced to the UI so users diff --git a/core/archipelago/src/health_monitor.rs b/core/archipelago/src/health_monitor.rs index d506c05b..1aadbf7c 100644 --- a/core/archipelago/src/health_monitor.rs +++ b/core/archipelago/src/health_monitor.rs @@ -539,6 +539,20 @@ pub fn spawn_health_monitor(state: Arc, data_dir: PathBuf) { debug!("Skipping uninstalled container: {}", container.name); continue; } + } else { + // Orphan: container exists in podman but archipelago has + // no package_data entry for it. Common after a variant + // switch (bitcoin-core ↔ bitcoin-knots) where the + // uninstall removed the package entry but the prior + // variant's container survived in stopped state. Without + // this guard the health monitor pages every minute with + // "Auto-restart failed (attempt N/10)" for an app the + // user can no longer see in the dashboard. + debug!( + "Skipping orphan container (not in package_data): {}", + container.name + ); + continue; } if container.healthy { diff --git a/neode-ui/package.json b/neode-ui/package.json index 84aed1d8..7b1b7f04 100644 --- a/neode-ui/package.json +++ b/neode-ui/package.json @@ -1,7 +1,7 @@ { "name": "neode-ui", "private": true, - "version": "1.7.45-alpha", + "version": "1.7.46-alpha", "type": "module", "scripts": { "start": "./start-dev.sh", diff --git a/neode-ui/public/catalog.json b/neode-ui/public/catalog.json index 99428db1..5fee9801 100644 --- a/neode-ui/public/catalog.json +++ b/neode-ui/public/catalog.json @@ -31,7 +31,7 @@ "author": "Bitcoin Core contributors", "category": "money", "tier": "optional", - "dockerImage": "docker.io/bitcoin/bitcoin:28.4", + "dockerImage": "146.59.87.168:3000/lfg2025/bitcoin:28.4", "repoUrl": "https://github.com/bitcoin/bitcoin" }, { @@ -125,7 +125,7 @@ "icon": "/assets/img/app-icons/gitea.svg", "author": "Gitea", "category": "development", - "dockerImage": "docker.io/gitea/gitea:1.23", + "dockerImage": "146.59.87.168:3000/lfg2025/gitea:1.23", "repoUrl": "https://gitea.com" }, { @@ -263,7 +263,7 @@ "icon": "/assets/img/app-icons/nextcloud.webp", "author": "Nextcloud", "category": "data", - "dockerImage": "docker.io/nextcloud:28", + "dockerImage": "146.59.87.168:3000/lfg2025/nextcloud:28", "repoUrl": "https://github.com/nextcloud/server" } ] diff --git a/neode-ui/src/stores/server.ts b/neode-ui/src/stores/server.ts index 78a9c29f..3b794665 100644 --- a/neode-ui/src/stores/server.ts +++ b/neode-ui/src/stores/server.ts @@ -63,18 +63,44 @@ export const useServerStore = defineStore('server', () => { if (progress.phase) { const info = PHASE_INFO[progress.phase] if (info) { + // Within the PullingImage band (20→70%), interpolate the + // bar based on how many images / bytes have landed so far. + // Without this, multi-container stacks (indeedhub: 7, + // mempool: 3, btcpay: 4) just sit at 20% for the entire + // pull duration — exactly what the user reported as + // "Downloading sticks at 20% mostly". X-of-N progress + // comes from set_install_progress(i, n) in stacks.rs. + let bandProgress = info.progress + if (progress.phase === 'pulling-image' && progress.size > 0) { + const fraction = Math.min(progress.downloaded / progress.size, 1) + // PullingImage band: 20% → 70%, so 50pp to interpolate over. + bandProgress = 20 + Math.round(fraction * 50) + } // Only advance forward — never let the bar step backward // between patches (can happen briefly during scan merges). - const nextProgress = Math.max(current.progress, info.progress) + const nextProgress = Math.max(current.progress, bandProgress) + // Show explicit message when set (e.g. install-fail descriptions + // surfaced via install_progress.message) — otherwise PHASE_INFO label. + const label = progress.message || info.message installingApps.value.set(appId, { ...current, status: info.status, progress: nextProgress, - message: info.message, + message: label, }) continue } } + // No phase but message is set (install-fail path) — show the message + // even if PHASE_INFO doesn't apply. Status stays whatever the watcher + // currently has. + if (progress.message) { + installingApps.value.set(appId, { + ...current, + message: progress.message, + }) + continue + } // Fallback: byte counters (rare — podman usually doesn't // emit parseable progress on a piped stderr). const pct = progress.size > 0 ? Math.round((progress.downloaded / progress.size) * 100) : 0 diff --git a/neode-ui/src/types/api.ts b/neode-ui/src/types/api.ts index 8f787164..3f0773ff 100644 --- a/neode-ui/src/types/api.ts +++ b/neode-ui/src/types/api.ts @@ -166,6 +166,9 @@ export interface InstallProgress { * counters — podman pull doesn't emit parseable progress when * stderr is piped, so byte counters are usually (0,0). */ phase?: InstallPhase + /** Optional explicit message — surfaced on install failures so the + * UI can show what went wrong instead of silently removing the card. */ + message?: string } // RPC Request/Response types diff --git a/scripts/resilience/resilience.sh b/scripts/resilience/resilience.sh index 9ca86638..a9780ae5 100755 --- a/scripts/resilience/resilience.sh +++ b/scripts/resilience/resilience.sh @@ -129,19 +129,34 @@ snapshot_containers() { ssh_run "podman ps -a --format '{{.Names}}' | sort" } -# Whether $app currently has any of its expected containers running. Uses +# Whether $app currently has ALL of its expected containers running. Uses # the per-app metadata table in lib.sh (expected_containers_for) so variant # apps (bitcoin-knots/bitcoin-core sharing slots) and stacks are detected # correctly. Falls back to name-prefix match for apps the table doesn't know. +# +# Returns true only when every expected container is present. Earlier +# versions returned true on ANY match — that caused dep installs (e.g. +# bitcoin-knots required by btcpay) to be declared "installed" as soon as +# the backend container appeared, before the UI companion (archy-bitcoin-ui) +# was up. The before-snapshot then missed the companion, the after-snapshot +# caught it, and it leaked into the dependent app's "new containers" set, +# false-positive-FAILing stop/uninstall when the companion (correctly) did +# not respond to the dependent app's package.stop. app_already_installed() { local app="$1" local snap; snap=$(snapshot_containers) local expected expected=$(expected_containers_for "$app") - local c - for c in $expected; do - echo "$snap" | grep -qxF "$c" && return 0 - done + if [ -n "$expected" ] && [ "$expected" != "$app" ]; then + local c missing=0 + for c in $expected; do + echo "$snap" | grep -qxF "$c" || missing=1 + done + [ "$missing" -eq 0 ] && return 0 + # Fall through to prefix match if the expected_containers list has + # gaps; a partial install still counts as "installed enough" for + # preclean purposes. + fi # Generic prefix fallback for apps not in the expected_containers_for table. echo "$snap" | grep -qE "^(${app}|${app}-|archy-${app}|archy-${app}-)" } @@ -291,8 +306,18 @@ run_app_matrix() { fi # ── 02 ui_probe ────────────────────────────────────────────── + # Retry with backoff — install just finished, but the app's backend + # (fedimint, immich, mempool stack) may take 30+s to be ready to serve + # HTTP. Probing immediately false-positive-FAILed those apps; pass on + # first 2xx/3xx within 60s. local code - code=$(probe_app_proxy "$app") + local ui_deadline=$(($(date +%s) + 60)) + while :; do + code=$(probe_app_proxy "$app") + [[ "$code" =~ ^(2[0-9][0-9]|3[0-9][0-9])$ ]] && break + [ "$(date +%s)" -ge "$ui_deadline" ] && break + sleep 5 + done # Accept all 2xx/3xx — proxy reaches backend, app may redirect to login, # serve OAuth flow (307), or use 308 permanent. 401/403 still fail because # those mean "backend reached, app rejected request" which is the @@ -300,17 +325,27 @@ run_app_matrix() { if [[ "$code" =~ ^(2[0-9][0-9]|3[0-9][0-9])$ ]]; then record "$app" ui_probe PASS "HTTP $code" else - record "$app" ui_probe FAIL "HTTP $code (expected 2xx/3xx)" + record "$app" ui_probe FAIL "HTTP $code (expected 2xx/3xx, retried 60s)" fi # ── 03 auth_probe (only for apps with a credentialed/data endpoint) ── + # Same backoff treatment: bitcoin-ui's nginx config bind-mount is + # picked up at start, but the bitcoin-core backend may not have + # accepted RPC connections yet on a fresh install. local probe_code; local pass_codes + pass_codes=$(auth_probe_pass_codes "$app") if probe_code=$(auth_probe_for "$app" 2>/dev/null) && [ -n "$probe_code" ]; then - pass_codes=$(auth_probe_pass_codes "$app") + local auth_deadline=$(($(date +%s) + 60)) + while :; do + echo " $pass_codes " | grep -qF " $probe_code " && break + [ "$(date +%s)" -ge "$auth_deadline" ] && break + sleep 5 + probe_code=$(auth_probe_for "$app" 2>/dev/null) || break + done if echo " $pass_codes " | grep -qF " $probe_code "; then record "$app" auth_probe PASS "HTTP $probe_code" else - record "$app" auth_probe FAIL "HTTP $probe_code (expected one of: $pass_codes — credential plumbing broken)" + record "$app" auth_probe FAIL "HTTP $probe_code (expected one of: $pass_codes; retried 60s — credential plumbing broken)" fi else record "$app" auth_probe SKIP "no authenticated probe defined"