diff --git a/tests/lifecycle/bats/lnd.bats b/tests/lifecycle/bats/lnd.bats index ddd9d3e5..a3635d62 100644 --- a/tests/lifecycle/bats/lnd.bats +++ b/tests/lifecycle/bats/lnd.bats @@ -110,8 +110,10 @@ teardown_file() { [[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set" # lnd takes longer than bitcoind to accept RPC after cold restart because - # the wallet has to be unlocked first. Give it 90s. - local deadline=$(( $(date +%s) + 90 )) + # the wallet has to be unlocked first, then it reconnects to bitcoind and + # re-syncs the graph. On a loaded node this exceeds 90s (observed ~2min on + # .228, then synced_to_chain:true). Give it 240s. + local deadline=$(( $(date +%s) + 240 )) while (( $(date +%s) < deadline )); do if sh -lc 'podman exec lnd lncli \ --tlscertpath /root/.lnd/tls.cert \ diff --git a/tests/lifecycle/bats/required-stack-destructive.bats b/tests/lifecycle/bats/required-stack-destructive.bats index 0d042c63..b2666ee4 100755 --- a/tests/lifecycle/bats/required-stack-destructive.bats +++ b/tests/lifecycle/bats/required-stack-destructive.bats @@ -74,8 +74,13 @@ restart_with_retry() { run wait_http_ok "http://127.0.0.1:8334/" 180 [ "$status" -eq 0 ] - run wait_http_ok "http://127.0.0.1:8081/" 180 - [ "$status" -eq 0 ] + # :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers). + # Only assert it when NPM is actually installed on this node; otherwise the + # required-endpoints check false-fails on nodes that don't run NPM. + if podman ps --format '{{.Names}}' | grep -q '^nginx-proxy-manager$'; then + run wait_http_ok "http://127.0.0.1:8081/" 180 + [ "$status" -eq 0 ] + fi run wait_http_ok "http://127.0.0.1:4080/" 180 [ "$status" -eq 0 ] @@ -83,6 +88,11 @@ restart_with_retry() { run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240 [ "$status" -eq 0 ] - run sh -lc 'podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null' + # lnd RPC readiness lags container 'running' (wallet unlock + graph sync) — + # retry rather than single-shot. See lnd.bats. + run sh -lc 'for i in $(seq 1 60); do + podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0 + sleep 3 + done; exit 1' [ "$status" -eq 0 ] } diff --git a/tests/lifecycle/bats/required-stack.bats b/tests/lifecycle/bats/required-stack.bats index 33474ba3..8674c0c3 100644 --- a/tests/lifecycle/bats/required-stack.bats +++ b/tests/lifecycle/bats/required-stack.bats @@ -123,7 +123,10 @@ PY } @test "bitcoin ui responds" { - run curl -fsS "http://127.0.0.1:8334/" + # The companion (archy-bitcoin-ui) may have just been recreated by an earlier + # companion-survives test; its nginx takes a moment to serve. Retry ~120s + # rather than single-shot. + run sh -lc 'for i in $(seq 1 40); do curl -fsS -o /dev/null "http://127.0.0.1:8334/" && exit 0; sleep 3; done; exit 1' [ "$status" -eq 0 ] } diff --git a/tests/lifecycle/lib/ui-probes.bash b/tests/lifecycle/lib/ui-probes.bash index a480b7cf..038c6892 100644 --- a/tests/lifecycle/lib/ui-probes.bash +++ b/tests/lifecycle/lib/ui-probes.bash @@ -65,6 +65,16 @@ probe_app_url() { if ! probe_container_running "$container"; then skip "$label: backing container '$container' is not running" fi + # An app's proxy/UI takes time to serve 200 after a (re)start — the backend + # may still be unlocking/syncing (lnd) and the companion nginx reloading. + # Retry up to ~90s rather than single-shot, so a readiness race isn't a fail. + local deadline=$(( $(date +%s) + 90 )) + while (( $(date +%s) < deadline )); do + if probe_https_200 "$url" "$label"; then + return 0 + fi + sleep 3 + done run probe_https_200 "$url" "$label" [ "$status" -eq 0 ] }