test(gate): fix two false-failing lifecycle tests (not product bugs)

- immich restart: bump wait 120s->240s. Restart = ordered stop+start of the 3- container stack (postgres->redis->server w/ DB migrations), so it needs at least as long as the start test (180s) — the old 120s was inconsistent and false-failed on loaded nodes. immich does return to running. - fedimint orphan check: the unanchored 'total' regex (^fedimint) counts the legitimate fedimint-clientd (dual-ecash bridge) but the anchored 'known' regex omitted it -> total>known false orphan on every node running fedimint-clientd. Add fedimint-clientd to known. Both run as LOCAL podman/systemctl on the gate runner, so they test the runner node (.116), not the RPC target — surfaced while driving the .228 gate green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 14:11:35 -04:00 · 2026-06-22 14:11:35 -04:00 · 53b8e47f1d
commit 53b8e47f1d
parent f4727bfdb3
2 changed files with 10 additions and 2 deletions
--- a/tests/lifecycle/bats/fedimint.bats
+++ b/tests/lifecycle/bats/fedimint.bats
@ -45,8 +45,12 @@ fedimint_skip_if_absent() {
  local total known
  total=$(podman ps -a --format '{{.Names}}' \
    | grep -Ec '^(fedimint|fedimintd|fedimint-gateway)' || true)
+  # `fedimint-clientd` (the dual-ecash HTTP bridge) is a legitimate, known
+  # container — and the unanchored `total` regex above counts it (it starts
+  # with "fedimint"). It must therefore be in the known set too, or every node
+  # running fedimint-clientd false-fails this orphan check.
  known=$(podman ps -a --format '{{.Names}}' \
-    | grep -Ec '^(fedimint|fedimint-gateway)$' || true)
+    | grep -Ec '^(fedimint|fedimint-clientd|fedimint-gateway)$' || true)
  [ "$total" -eq "$known" ]
 }

--- a/tests/lifecycle/bats/immich.bats
+++ b/tests/lifecycle/bats/immich.bats
@ -78,7 +78,11 @@ teardown_file() {
  [[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
  run rpc_result package.restart '{"id":"immich"}'
  [ "$status" -eq 0 ]
-  run wait_for_container_status immich running 120
+  # Restart = ordered stop+start of the whole 3-container stack (postgres→redis→
+  # server, with the server doing DB-readiness + migrations on boot), so it needs
+  # at least as long as `start` (180s) — more, since it stops first. The old 120s
+  # was inconsistent with the start test and false-failed on heavily-loaded nodes.
+  run wait_for_container_status immich running 240
  [ "$status" -eq 0 ]
 }