On a proper on-node .228 run (synced bitcoin, 4-fix binary) the lifecycle matrix is green; these 4 were test-harness issues: - lnd 'recovers after restart' (65): bump retry window 90s->240s. lnd cold-restart recovery (wallet unlock + bitcoind reconnect + graph sync) exceeds 90s on a loaded node but DOES complete (synced_to_chain:true). - bitcoin ui responds (89): retry ~120s instead of single-shot (companion nginx may have just been recreated by the companion-survives test). - probe_app_url (99 lnd proxy + all ui-coverage proxy probes): retry up to 90s for post-restart proxy/UI readiness instead of single-shot. - required endpoints after restart (94): :8081 is nginx-proxy-manager, an OPTIONAL app (not in required_containers) — only assert it when NPM is installed; and make the trailing lncli getinfo a retry. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
99 lines
2.7 KiB
Bash
Executable File
99 lines
2.7 KiB
Bash
Executable File
#!/usr/bin/env bats
|
|
# tests/lifecycle/bats/required-stack-destructive.bats
|
|
#
|
|
# Controlled destructive lifecycle checks for required stack containers.
|
|
# Runs only when ARCHY_ALLOW_DESTRUCTIVE=1.
|
|
|
|
required_containers=(
|
|
"archy-bitcoin-ui"
|
|
"archy-lnd-ui"
|
|
"archy-electrs-ui"
|
|
"mempool"
|
|
"mempool-api"
|
|
)
|
|
|
|
wait_running() {
|
|
local name="$1"
|
|
local timeout="${2:-120}"
|
|
local deadline=$(( $(date +%s) + timeout ))
|
|
while (( $(date +%s) < deadline )); do
|
|
local running
|
|
running=$(podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null || true)
|
|
if [[ "$running" == "true" ]]; then
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
wait_http_ok() {
|
|
local url="$1"
|
|
local timeout="${2:-180}"
|
|
local deadline=$(( $(date +%s) + timeout ))
|
|
while (( $(date +%s) < deadline )); do
|
|
if curl -fsS "$url" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
return 1
|
|
}
|
|
|
|
restart_with_retry() {
|
|
local name="$1"
|
|
local attempts="${2:-3}"
|
|
local i
|
|
for ((i=1; i<=attempts; i++)); do
|
|
if podman restart "$name" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 3
|
|
done
|
|
return 1
|
|
}
|
|
|
|
@test "required-stack destructive gate enabled" {
|
|
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
|
}
|
|
|
|
@test "restart each required service container and verify it recovers" {
|
|
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
|
|
|
for c in "${required_containers[@]}"; do
|
|
run restart_with_retry "$c" 4
|
|
[ "$status" -eq 0 ]
|
|
run wait_running "$c" 180
|
|
[ "$status" -eq 0 ]
|
|
done
|
|
}
|
|
|
|
@test "required endpoints still respond after restarts" {
|
|
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
|
|
|
|
run wait_http_ok "http://127.0.0.1:8334/" 180
|
|
[ "$status" -eq 0 ]
|
|
|
|
# :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers).
|
|
# Only assert it when NPM is actually installed on this node; otherwise the
|
|
# required-endpoints check false-fails on nodes that don't run NPM.
|
|
if podman ps --format '{{.Names}}' | grep -q '^nginx-proxy-manager$'; then
|
|
run wait_http_ok "http://127.0.0.1:8081/" 180
|
|
[ "$status" -eq 0 ]
|
|
fi
|
|
|
|
run wait_http_ok "http://127.0.0.1:4080/" 180
|
|
[ "$status" -eq 0 ]
|
|
|
|
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240
|
|
[ "$status" -eq 0 ]
|
|
|
|
# lnd RPC readiness lags container 'running' (wallet unlock + graph sync) —
|
|
# retry rather than single-shot. See lnd.bats.
|
|
run sh -lc 'for i in $(seq 1 60); do
|
|
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0
|
|
sleep 3
|
|
done; exit 1'
|
|
[ "$status" -eq 0 ]
|
|
}
|