fix(orchestrator,ui): stop crash-looping orphan stack members; dedupe Electrum launch overlay

- crash_recovery.rs: stack boot/runtime recovery (immich/indeedhub/netbird) now
  requires the stack's core dependency container to exist before touching any
  sibling, instead of firing on any leftover container. Fixes an infinite
  120s-interval crash loop where orphan debris from a partial/failed install
  (indeedhub-api with no indeedhub-postgres ever created) was repeatedly
  force-restarted against a dependency that doesn't exist, which also blocked
  a real reinstall via container name conflicts.
- AppSessionFrame.vue: the generic app-loading overlay and the ElectrumX
  sync-in-progress overlay could render simultaneously (same z-index) during
  launch. The sync screen is strictly more informative, so it now takes
  precedence instead of the two stacking on top of each other.

Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-07-01 07:02:01 -04:00
parent 5b7cd5d5d0
commit d414ae3daa
2 changed files with 47 additions and 9 deletions

View File

@ -415,7 +415,7 @@ async fn start_stopped_app_stacks(data_dir: &Path) -> RecoveryReport {
};
for stack in stack_recovery_specs() {
if !stack_has_any_container(stack).await {
if !stack_anchor_container_exists(stack).await {
continue;
}
@ -619,6 +619,11 @@ struct StackRecoverySpec {
network: &'static str,
aliases: &'static [(&'static str, &'static str)],
containers: &'static [&'static str],
/// The stack's core dependency (its DB / server container) — every other
/// member depends on this being present. Used to distinguish "a genuinely
/// installed stack has a crashed member" from "orphan debris from a
/// partial/failed install" (see `stack_anchor_container_exists`).
anchor: &'static str,
}
fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
@ -632,6 +637,7 @@ fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
("immich_server", "immich_server"),
],
containers: &["immich_postgres", "immich_redis", "immich_server"],
anchor: "immich_postgres",
},
StackRecoverySpec {
name: "indeedhub",
@ -653,6 +659,7 @@ fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
"indeedhub-ffmpeg",
"indeedhub",
],
anchor: "indeedhub-postgres",
},
StackRecoverySpec {
name: "netbird",
@ -663,17 +670,20 @@ fn stack_recovery_specs() -> &'static [StackRecoverySpec] {
("netbird", "netbird"),
],
containers: &["netbird-server", "netbird-dashboard", "netbird"],
anchor: "netbird-server",
},
]
}
async fn stack_has_any_container(stack: &StackRecoverySpec) -> bool {
for container in stack.containers {
if container_state(container).await.is_some() {
return true;
}
}
false
/// Whether the stack's core dependency container exists at all (running or
/// not — existence, not health, is what matters here). `false` means any
/// other stack member still lying around is orphan debris from a partial or
/// already-uninstalled install, not a legitimately-installed-but-crashed
/// stack — blindly restarting those siblings just crash-loops them forever
/// against a dependency that was never created (indeedhub-api on `.116`,
/// 2026-07-01: retried every 120s against a nonexistent indeedhub-postgres).
async fn stack_anchor_container_exists(stack: &StackRecoverySpec) -> bool {
container_state(stack.anchor).await.is_some()
}
async fn repair_stack_network_aliases(stack: &StackRecoverySpec) {
@ -1059,4 +1069,27 @@ mod tests {
true
));
}
#[test]
fn stack_recovery_anchor_is_the_stacks_own_core_dependency() {
// Every stack's anchor must be one of its own containers (typically
// the DB/server the rest depend on) — a typo here would silently
// disable orphan-debris protection for that stack.
for stack in stack_recovery_specs() {
assert!(
stack.containers.contains(&stack.anchor),
"{}: anchor {} not among its own containers",
stack.name,
stack.anchor
);
}
assert_eq!(
stack_recovery_specs()
.iter()
.find(|s| s.name == "indeedhub")
.unwrap()
.anchor,
"indeedhub-postgres"
);
}
}

View File

@ -1,7 +1,12 @@
<template>
<div class="relative flex-1 min-h-0 bg-black/40 overflow-hidden app-session-frame-safe">
<Transition name="content-fade">
<AppLoadingScreen v-if="loading" :icon="appIcon" :title="appTitle" :progress="loadProgress" />
<!-- Suppressed while the ElectrumX sync overlay below is showing both
conditions can be true at once during launch (generic loader fires
first, then sync status arrives), and the sync screen is strictly
more informative, so it takes precedence instead of the two
rendering on top of each other. -->
<AppLoadingScreen v-if="loading && !(electrsSync && !electrsSync.stale)" :icon="appIcon" :title="appTitle" :progress="loadProgress" />
</Transition>
<!-- ElectrumX sync screen shown before the real UI while the on-chain