test(gate): fix the last 4 readiness/config false-fails (none are product bugs)

On a proper on-node .228 run (synced bitcoin, 4-fix binary) the lifecycle matrix is
green; these 4 were test-harness issues:
- lnd 'recovers after restart' (65): bump retry window 90s->240s. lnd cold-restart
  recovery (wallet unlock + bitcoind reconnect + graph sync) exceeds 90s on a loaded
  node but DOES complete (synced_to_chain:true).
- bitcoin ui responds (89): retry ~120s instead of single-shot (companion nginx may
  have just been recreated by the companion-survives test).
- probe_app_url (99 lnd proxy + all ui-coverage proxy probes): retry up to 90s for
  post-restart proxy/UI readiness instead of single-shot.
- required endpoints after restart (94): :8081 is nginx-proxy-manager, an OPTIONAL
  app (not in required_containers) — only assert it when NPM is installed; and make
  the trailing lncli getinfo a retry.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-06-22 15:43:51 -04:00
parent 8893055810
commit 892ff083c4
4 changed files with 31 additions and 6 deletions

View File

@ -110,8 +110,10 @@ teardown_file() {
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
# lnd takes longer than bitcoind to accept RPC after cold restart because
# the wallet has to be unlocked first. Give it 90s.
local deadline=$(( $(date +%s) + 90 ))
# the wallet has to be unlocked first, then it reconnects to bitcoind and
# re-syncs the graph. On a loaded node this exceeds 90s (observed ~2min on
# .228, then synced_to_chain:true). Give it 240s.
local deadline=$(( $(date +%s) + 240 ))
while (( $(date +%s) < deadline )); do
if sh -lc 'podman exec lnd lncli \
--tlscertpath /root/.lnd/tls.cert \

View File

@ -74,8 +74,13 @@ restart_with_retry() {
run wait_http_ok "http://127.0.0.1:8334/" 180
[ "$status" -eq 0 ]
run wait_http_ok "http://127.0.0.1:8081/" 180
[ "$status" -eq 0 ]
# :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers).
# Only assert it when NPM is actually installed on this node; otherwise the
# required-endpoints check false-fails on nodes that don't run NPM.
if podman ps --format '{{.Names}}' | grep -q '^nginx-proxy-manager$'; then
run wait_http_ok "http://127.0.0.1:8081/" 180
[ "$status" -eq 0 ]
fi
run wait_http_ok "http://127.0.0.1:4080/" 180
[ "$status" -eq 0 ]
@ -83,6 +88,11 @@ restart_with_retry() {
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240
[ "$status" -eq 0 ]
run sh -lc 'podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null'
# lnd RPC readiness lags container 'running' (wallet unlock + graph sync) —
# retry rather than single-shot. See lnd.bats.
run sh -lc 'for i in $(seq 1 60); do
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0
sleep 3
done; exit 1'
[ "$status" -eq 0 ]
}

View File

@ -123,7 +123,10 @@ PY
}
@test "bitcoin ui responds" {
run curl -fsS "http://127.0.0.1:8334/"
# The companion (archy-bitcoin-ui) may have just been recreated by an earlier
# companion-survives test; its nginx takes a moment to serve. Retry ~120s
# rather than single-shot.
run sh -lc 'for i in $(seq 1 40); do curl -fsS -o /dev/null "http://127.0.0.1:8334/" && exit 0; sleep 3; done; exit 1'
[ "$status" -eq 0 ]
}

View File

@ -65,6 +65,16 @@ probe_app_url() {
if ! probe_container_running "$container"; then
skip "$label: backing container '$container' is not running"
fi
# An app's proxy/UI takes time to serve 200 after a (re)start — the backend
# may still be unlocking/syncing (lnd) and the companion nginx reloading.
# Retry up to ~90s rather than single-shot, so a readiness race isn't a fail.
local deadline=$(( $(date +%s) + 90 ))
while (( $(date +%s) < deadline )); do
if probe_https_200 "$url" "$label"; then
return 0
fi
sleep 3
done
run probe_https_200 "$url" "$label"
[ "$status" -eq 0 ]
}