From daa8fb4891dd40c235ea46c5e749966ee66431f6 Mon Sep 17 00:00:00 2001 From: archipelago Date: Wed, 1 Jul 2026 13:59:06 -0400 Subject: [PATCH] fix(tests): make required-stack-destructive.bats portable across app rosters Same class of bug as required-stack.bats: hardcoded required_containers included mempool/mempool-api unconditionally, so a node without the mempool stack (e.g. .5) hard-fails restarting a container that was never installed, and waits out full 180-240s timeouts probing endpoints that will never come up. Likely explains .5's abnormally long (2216s) iteration 1 runtime during the current multinode-pass run. Same skip-if-absent fix as the prior commit. Co-Authored-By: Claude Sonnet 5 --- .../bats/required-stack-destructive.bats | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/tests/lifecycle/bats/required-stack-destructive.bats b/tests/lifecycle/bats/required-stack-destructive.bats index b2666ee4..9d89c113 100755 --- a/tests/lifecycle/bats/required-stack-destructive.bats +++ b/tests/lifecycle/bats/required-stack-destructive.bats @@ -12,6 +12,20 @@ required_containers=( "mempool-api" ) +container_installed() { + podman ps -a --format '{{.Names}}' | grep -Fx "$1" >/dev/null +} + +# Only the subset of required_containers actually installed on this node — +# a node without the mempool stack (or another optional app) shouldn't +# hard-fail restarting/probing something it was never meant to have. +installed_required_containers() { + local c + for c in "${required_containers[@]}"; do + container_installed "$c" && echo "$c" + done +} + wait_running() { local name="$1" local timeout="${2:-120}" @@ -60,19 +74,23 @@ restart_with_retry() { @test "restart each required service container and verify it recovers" { [[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set" - for c in "${required_containers[@]}"; do + local targets; targets="$(installed_required_containers)" + [[ -n "$targets" ]] || skip "none of required_containers installed on this node" + while IFS= read -r c; do run restart_with_retry "$c" 4 [ "$status" -eq 0 ] run wait_running "$c" 180 [ "$status" -eq 0 ] - done + done <<< "$targets" } @test "required endpoints still respond after restarts" { [[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set" - run wait_http_ok "http://127.0.0.1:8334/" 180 - [ "$status" -eq 0 ] + if container_installed archy-bitcoin-ui; then + run wait_http_ok "http://127.0.0.1:8334/" 180 + [ "$status" -eq 0 ] + fi # :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers). # Only assert it when NPM is actually installed on this node; otherwise the @@ -82,17 +100,23 @@ restart_with_retry() { [ "$status" -eq 0 ] fi - run wait_http_ok "http://127.0.0.1:4080/" 180 - [ "$status" -eq 0 ] + if container_installed mempool; then + run wait_http_ok "http://127.0.0.1:4080/" 180 + [ "$status" -eq 0 ] + fi - run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240 - [ "$status" -eq 0 ] + if container_installed mempool-api; then + run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240 + [ "$status" -eq 0 ] + fi - # lnd RPC readiness lags container 'running' (wallet unlock + graph sync) — - # retry rather than single-shot. See lnd.bats. - run sh -lc 'for i in $(seq 1 60); do - podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0 - sleep 3 - done; exit 1' - [ "$status" -eq 0 ] + if container_installed lnd; then + # lnd RPC readiness lags container 'running' (wallet unlock + graph sync) — + # retry rather than single-shot. See lnd.bats. + run sh -lc 'for i in $(seq 1 60); do + podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0 + sleep 3 + done; exit 1' + [ "$status" -eq 0 ] + fi }