test(gate): fix the last 4 readiness/config false-fails (none are product bugs)
On a proper on-node .228 run (synced bitcoin, 4-fix binary) the lifecycle matrix is green; these 4 were test-harness issues: - lnd 'recovers after restart' (65): bump retry window 90s->240s. lnd cold-restart recovery (wallet unlock + bitcoind reconnect + graph sync) exceeds 90s on a loaded node but DOES complete (synced_to_chain:true). - bitcoin ui responds (89): retry ~120s instead of single-shot (companion nginx may have just been recreated by the companion-survives test). - probe_app_url (99 lnd proxy + all ui-coverage proxy probes): retry up to 90s for post-restart proxy/UI readiness instead of single-shot. - required endpoints after restart (94): :8081 is nginx-proxy-manager, an OPTIONAL app (not in required_containers) — only assert it when NPM is installed; and make the trailing lncli getinfo a retry. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8893055810
commit
892ff083c4
@ -110,8 +110,10 @@ teardown_file() {
|
||||
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
||||
|
||||
# lnd takes longer than bitcoind to accept RPC after cold restart because
|
||||
# the wallet has to be unlocked first. Give it 90s.
|
||||
local deadline=$(( $(date +%s) + 90 ))
|
||||
# the wallet has to be unlocked first, then it reconnects to bitcoind and
|
||||
# re-syncs the graph. On a loaded node this exceeds 90s (observed ~2min on
|
||||
# .228, then synced_to_chain:true). Give it 240s.
|
||||
local deadline=$(( $(date +%s) + 240 ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
if sh -lc 'podman exec lnd lncli \
|
||||
--tlscertpath /root/.lnd/tls.cert \
|
||||
|
||||
@ -74,8 +74,13 @@ restart_with_retry() {
|
||||
run wait_http_ok "http://127.0.0.1:8334/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:8081/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
# :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers).
|
||||
# Only assert it when NPM is actually installed on this node; otherwise the
|
||||
# required-endpoints check false-fails on nodes that don't run NPM.
|
||||
if podman ps --format '{{.Names}}' | grep -q '^nginx-proxy-manager$'; then
|
||||
run wait_http_ok "http://127.0.0.1:8081/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
fi
|
||||
|
||||
run wait_http_ok "http://127.0.0.1:4080/" 180
|
||||
[ "$status" -eq 0 ]
|
||||
@ -83,6 +88,11 @@ restart_with_retry() {
|
||||
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
run sh -lc 'podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null'
|
||||
# lnd RPC readiness lags container 'running' (wallet unlock + graph sync) —
|
||||
# retry rather than single-shot. See lnd.bats.
|
||||
run sh -lc 'for i in $(seq 1 60); do
|
||||
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0
|
||||
sleep 3
|
||||
done; exit 1'
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
@ -123,7 +123,10 @@ PY
|
||||
}
|
||||
|
||||
@test "bitcoin ui responds" {
|
||||
run curl -fsS "http://127.0.0.1:8334/"
|
||||
# The companion (archy-bitcoin-ui) may have just been recreated by an earlier
|
||||
# companion-survives test; its nginx takes a moment to serve. Retry ~120s
|
||||
# rather than single-shot.
|
||||
run sh -lc 'for i in $(seq 1 40); do curl -fsS -o /dev/null "http://127.0.0.1:8334/" && exit 0; sleep 3; done; exit 1'
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
|
||||
@ -65,6 +65,16 @@ probe_app_url() {
|
||||
if ! probe_container_running "$container"; then
|
||||
skip "$label: backing container '$container' is not running"
|
||||
fi
|
||||
# An app's proxy/UI takes time to serve 200 after a (re)start — the backend
|
||||
# may still be unlocking/syncing (lnd) and the companion nginx reloading.
|
||||
# Retry up to ~90s rather than single-shot, so a readiness race isn't a fail.
|
||||
local deadline=$(( $(date +%s) + 90 ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
if probe_https_200 "$url" "$label"; then
|
||||
return 0
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
run probe_https_200 "$url" "$label"
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user