archy/tests/lifecycle/bats/required-stack-destructive.bats
archipelago 892ff083c4 test(gate): fix the last 4 readiness/config false-fails (none are product bugs)
On a proper on-node .228 run (synced bitcoin, 4-fix binary) the lifecycle matrix is
green; these 4 were test-harness issues:
- lnd 'recovers after restart' (65): bump retry window 90s->240s. lnd cold-restart
  recovery (wallet unlock + bitcoind reconnect + graph sync) exceeds 90s on a loaded
  node but DOES complete (synced_to_chain:true).
- bitcoin ui responds (89): retry ~120s instead of single-shot (companion nginx may
  have just been recreated by the companion-survives test).
- probe_app_url (99 lnd proxy + all ui-coverage proxy probes): retry up to 90s for
  post-restart proxy/UI readiness instead of single-shot.
- required endpoints after restart (94): :8081 is nginx-proxy-manager, an OPTIONAL
  app (not in required_containers) — only assert it when NPM is installed; and make
  the trailing lncli getinfo a retry.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 15:43:51 -04:00

99 lines
2.7 KiB
Bash
Executable File

#!/usr/bin/env bats
# tests/lifecycle/bats/required-stack-destructive.bats
#
# Controlled destructive lifecycle checks for required stack containers.
# Runs only when ARCHY_ALLOW_DESTRUCTIVE=1.
required_containers=(
"archy-bitcoin-ui"
"archy-lnd-ui"
"archy-electrs-ui"
"mempool"
"mempool-api"
)
wait_running() {
local name="$1"
local timeout="${2:-120}"
local deadline=$(( $(date +%s) + timeout ))
while (( $(date +%s) < deadline )); do
local running
running=$(podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null || true)
if [[ "$running" == "true" ]]; then
return 0
fi
sleep 2
done
return 1
}
wait_http_ok() {
local url="$1"
local timeout="${2:-180}"
local deadline=$(( $(date +%s) + timeout ))
while (( $(date +%s) < deadline )); do
if curl -fsS "$url" >/dev/null 2>&1; then
return 0
fi
sleep 2
done
return 1
}
restart_with_retry() {
local name="$1"
local attempts="${2:-3}"
local i
for ((i=1; i<=attempts; i++)); do
if podman restart "$name" >/dev/null 2>&1; then
return 0
fi
sleep 3
done
return 1
}
@test "required-stack destructive gate enabled" {
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
}
@test "restart each required service container and verify it recovers" {
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
for c in "${required_containers[@]}"; do
run restart_with_retry "$c" 4
[ "$status" -eq 0 ]
run wait_running "$c" 180
[ "$status" -eq 0 ]
done
}
@test "required endpoints still respond after restarts" {
[[ "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || skip "ARCHY_ALLOW_DESTRUCTIVE not set"
run wait_http_ok "http://127.0.0.1:8334/" 180
[ "$status" -eq 0 ]
# :8081 is nginx-proxy-manager — an OPTIONAL app (not in required_containers).
# Only assert it when NPM is actually installed on this node; otherwise the
# required-endpoints check false-fails on nodes that don't run NPM.
if podman ps --format '{{.Names}}' | grep -q '^nginx-proxy-manager$'; then
run wait_http_ok "http://127.0.0.1:8081/" 180
[ "$status" -eq 0 ]
fi
run wait_http_ok "http://127.0.0.1:4080/" 180
[ "$status" -eq 0 ]
run wait_http_ok "http://127.0.0.1:8999/api/v1/backend-info" 240
[ "$status" -eq 0 ]
# lnd RPC readiness lags container 'running' (wallet unlock + graph sync) —
# retry rather than single-shot. See lnd.bats.
run sh -lc 'for i in $(seq 1 60); do
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0
sleep 3
done; exit 1'
[ "$status" -eq 0 ]
}