archy/tests/lifecycle/bats/required-stack.bats
archipelago 98f4fa44a8 test(gate): harden readiness for sustained 5x churn + inter-iteration settle
The 1x gate is green; the 5x failed iters 1-2 on readiness-under-churn (apps DO
recover — lnd synced, mempool just mid-restart when probed — but slower than the
windows when restarted back-to-back). Hardening:
- run-20x.sh: best-effort settle_stack() before each iteration (wait for
  mempool-api/frontend + lnd RPC healthy, 180s, on-node, never fails the run).
- required containers present/running (80/81): wait-loops (180s) not single-shot.
- mempool api/frontend (87/88): retry ~180s not single-shot.
- mempool queryable (74): 60s->180s. lnd restart-running (64): 120s->240s.
  lnd getinfo (60): 90s->240s retry.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-22 17:11:15 -04:00

155 lines
4.8 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bats
# tests/lifecycle/bats/required-stack.bats
#
# Read-only release-gate checks for the required Bitcoin stack on .116.
#
# This suite is intentionally non-destructive and does not use RPC auth;
# it can run anytime as a health gate during long sync/reindex windows.
required_containers=(
"bitcoin-knots"
"electrumx"
"lnd"
"archy-mempool-db"
"mempool-api"
"mempool"
"filebrowser"
"archy-bitcoin-ui"
"archy-lnd-ui"
"archy-electrs-ui"
)
podman_names() {
podman ps --format '{{.Names}}'
}
container_running() {
local name="$1"
podman inspect --format '{{.State.Running}}' "$name" 2>/dev/null
}
bitcoin_rpc() {
curl -fsS --max-time 60 \
--user "archipelago:$(cat /var/lib/archipelago/secrets/bitcoin-rpc-password)" \
--data-binary '{"jsonrpc":"1.0","id":"required-stack","method":"getblockchaininfo","params":[]}' \
-H 'content-type: text/plain;' \
http://127.0.0.1:8332/
}
bitcoin_json() {
python3 -c 'import json,sys; r=json.load(sys.stdin)["result"]; print(r[sys.argv[1]])' "$1"
}
@test "required containers are present" {
# Under sustained 5× churn an app may still be mid-restart when this runs;
# wait for the whole required set rather than single-shot.
local deadline=$(( $(date +%s) + 180 )) names missing
while (( $(date +%s) < deadline )); do
names="$(podman_names)"; missing=""
for c in "${required_containers[@]}"; do
echo "$names" | grep -Fx "$c" >/dev/null || missing="$missing $c"
done
[[ -z "$missing" ]] && return 0
sleep 3
done
fail "required containers never all present; missing:$missing"
}
@test "required containers are running" {
local deadline=$(( $(date +%s) + 180 )) notrunning
while (( $(date +%s) < deadline )); do
notrunning=""
for c in "${required_containers[@]}"; do
[[ "$(container_running "$c" 2>/dev/null)" == "true" ]] || notrunning="$notrunning $c"
done
[[ -z "$notrunning" ]] && return 0
sleep 3
done
fail "required containers never all running; not-running:$notrunning"
}
@test "bitcoin-knots RPC responds" {
run bitcoin_rpc
[ "$status" -eq 0 ]
echo "$output" | python3 -c 'import json,sys; r=json.load(sys.stdin)["result"]; assert r["chain"] == "main" and r["blocks"] >= 0'
}
@test "bitcoin backend is synced archival for electrumx/lnd gate" {
run bitcoin_rpc
[ "$status" -eq 0 ]
local pruned ibd blocks headers
pruned="$(echo "$output" | bitcoin_json pruned)"
ibd="$(echo "$output" | bitcoin_json initialblockdownload)"
blocks="$(echo "$output" | bitcoin_json blocks)"
headers="$(echo "$output" | bitcoin_json headers)"
if [ "$pruned" = "True" ] || [ "$pruned" = "true" ]; then
echo "bitcoin is pruned (blocks=$blocks headers=$headers); electrumx cannot index pruned historical blocks"
return 1
fi
if [ "$ibd" = "True" ] || [ "$ibd" = "true" ]; then
echo "bitcoin is still in initial block download (blocks=$blocks headers=$headers)"
return 1
fi
}
@test "electrumx TCP port accepts connections" {
run python3 - <<'PY'
import socket
s = socket.create_connection(("127.0.0.1", 50001), 3)
s.close()
print("ok")
PY
[ "$status" -eq 0 ]
}
@test "lnd CLI getinfo succeeds" {
# lnd RPC readiness lags the container "running" state (wallet auto-unlock on
# start), so retry until ready rather than single-shot. See lnd.bats note.
run sh -lc 'for i in $(seq 1 30); do
timeout 20 podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon --rpcserver localhost:10009 getinfo >/dev/null 2>&1 && exit 0
sleep 3
done; exit 1'
[ "$status" -eq 0 ]
}
@test "lnd REST port accepts connections" {
run python3 - <<'PY'
import socket
s = socket.create_connection(("127.0.0.1", 18080), 3)
s.close()
print("ok")
PY
[ "$status" -eq 0 ]
}
@test "mempool api endpoint responds" {
# mempool-api reconnects to electrumx after a stack restart — retry ~180s.
run sh -lc 'for i in $(seq 1 60); do curl -fsS -m 5 -o /dev/null "http://127.0.0.1:8999/api/v1/backend-info" && exit 0; sleep 3; done; exit 1'
[ "$status" -eq 0 ]
}
@test "mempool frontend responds" {
run sh -lc 'for i in $(seq 1 60); do curl -fsS -m 5 -o /dev/null "http://127.0.0.1:4080/" && exit 0; sleep 3; done; exit 1'
[ "$status" -eq 0 ]
}
@test "bitcoin ui responds" {
# The companion (archy-bitcoin-ui) may have just been recreated by an earlier
# companion-survives test; its nginx takes a moment to serve. Retry ~120s
# rather than single-shot.
run sh -lc 'for i in $(seq 1 40); do curl -fsS -o /dev/null "http://127.0.0.1:8334/" && exit 0; sleep 3; done; exit 1'
[ "$status" -eq 0 ]
}
@test "lnd ui responds" {
run curl -fsS "http://127.0.0.1:18083/"
[ "$status" -eq 0 ]
}
@test "filebrowser responds" {
run curl -fsS "http://127.0.0.1:8083/"
[ "$status" -eq 0 ]
}