From 53b8e47f1de4e82e480724d293c4d7e9201014f5 Mon Sep 17 00:00:00 2001 From: archipelago Date: Mon, 22 Jun 2026 14:11:35 -0400 Subject: [PATCH] test(gate): fix two false-failing lifecycle tests (not product bugs) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - immich restart: bump wait 120s->240s. Restart = ordered stop+start of the 3- container stack (postgres->redis->server w/ DB migrations), so it needs at least as long as the start test (180s) — the old 120s was inconsistent and false-failed on loaded nodes. immich does return to running. - fedimint orphan check: the unanchored 'total' regex (^fedimint) counts the legitimate fedimint-clientd (dual-ecash bridge) but the anchored 'known' regex omitted it -> total>known false orphan on every node running fedimint-clientd. Add fedimint-clientd to known. Both run as LOCAL podman/systemctl on the gate runner, so they test the runner node (.116), not the RPC target — surfaced while driving the .228 gate green. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/lifecycle/bats/fedimint.bats | 6 +++++- tests/lifecycle/bats/immich.bats | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/lifecycle/bats/fedimint.bats b/tests/lifecycle/bats/fedimint.bats index 74d9c877..1c6338ae 100644 --- a/tests/lifecycle/bats/fedimint.bats +++ b/tests/lifecycle/bats/fedimint.bats @@ -45,8 +45,12 @@ fedimint_skip_if_absent() { local total known total=$(podman ps -a --format '{{.Names}}' \ | grep -Ec '^(fedimint|fedimintd|fedimint-gateway)' || true) + # `fedimint-clientd` (the dual-ecash HTTP bridge) is a legitimate, known + # container — and the unanchored `total` regex above counts it (it starts + # with "fedimint"). It must therefore be in the known set too, or every node + # running fedimint-clientd false-fails this orphan check. known=$(podman ps -a --format '{{.Names}}' \ - | grep -Ec '^(fedimint|fedimint-gateway)$' || true) + | grep -Ec '^(fedimint|fedimint-clientd|fedimint-gateway)$' || true) [ "$total" -eq "$known" ] } diff --git a/tests/lifecycle/bats/immich.bats b/tests/lifecycle/bats/immich.bats index 3ee6b60e..b3779875 100644 --- a/tests/lifecycle/bats/immich.bats +++ b/tests/lifecycle/bats/immich.bats @@ -78,7 +78,11 @@ teardown_file() { [[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set" run rpc_result package.restart '{"id":"immich"}' [ "$status" -eq 0 ] - run wait_for_container_status immich running 120 + # Restart = ordered stop+start of the whole 3-container stack (postgres→redis→ + # server, with the server doing DB-readiness + migrations on boot), so it needs + # at least as long as `start` (180s) — more, since it stops first. The old 120s + # was inconsistent with the start test and false-failed on heavily-loaded nodes. + run wait_for_container_status immich running 240 [ "$status" -eq 0 ] }