#!/usr/bin/env bash # tests/lifecycle/run-gate.sh — loop the lifecycle harness N times (default 5×, the release gate). # # Each iteration: setup-teardown → run.sh (with the same args you'd pass # to run.sh) → setup-teardown. Tallies pass/fail per iteration and prints a # summary at the end. Returns non-zero if any iteration failed. # # Env: # ARCHY_ITERATIONS (default: 5) # ARCHY_FAIL_FAST=1 stop on first failed iteration # ARCHY_GATE_CASCADE=1 after the 5× loop, run ONE cascade pass # (uninstall→no-ghost→reinstall a throwaway # app); requires ARCHY_ALLOW_DESTRUCTIVE=1 # plus everything run.sh / lib/rpc.bash respects # (ARCHY_PASSWORD, ARCHY_HOST, ARCHY_SCHEME, ARCHY_ALLOW_DESTRUCTIVE, # ARCHY_ALLOW_CASCADE_DESTRUCTIVE, ARCHY_ALLOW_NOAUTH) # # Usage: # tests/lifecycle/run-gate.sh # 5× full bats/ suite # ARCHY_ITERATIONS=5 tests/lifecycle/run-gate.sh # 5× full suite # tests/lifecycle/run-gate.sh bitcoin-knots # 5× a single suite # # Suggested release-gate invocation: # ARCHY_PASSWORD=password123 ARCHY_ALLOW_DESTRUCTIVE=1 \ # tests/lifecycle/run-gate.sh # # Release-gate WITH the cascade tier (uninstall/reinstall regression guard): # ARCHY_PASSWORD=password123 ARCHY_ALLOW_DESTRUCTIVE=1 ARCHY_GATE_CASCADE=1 \ # tests/lifecycle/run-gate.sh set -euo pipefail HERE="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" cd "$HERE" ITER="${ARCHY_ITERATIONS:-5}" if ! [[ "$ITER" =~ ^[1-9][0-9]*$ ]]; then echo "ARCHY_ITERATIONS must be a positive integer, got: $ITER" >&2 exit 2 fi passed=0 failed=0 failures=() start=$(date +%s) # Best-effort settle: wait for the backend stack to be healthy before an # iteration starts, so back-to-back destructive iterations don't compound # restart churn (lnd wallet-unlock + the 4-container mempool stack reconnect # need time to recover). On-node gate only (localhost probes); never fails the # run — just delays up to the deadline. Disable with ARCHY_SETTLE=0. settle_stack() { [[ "${ARCHY_SETTLE:-1}" == "1" && "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || return 0 # 300s (not 180s): on heavy nodes the immich stack's recovery after the prior # iteration's archipelago-restart test (crash_recovery retries on a ~120s # cadence) can take several minutes, and the next iteration's read-only # lan_address probe false-fails if immich is still mid-boot. The settle is a # cap, not a fixed wait — it returns the instant every probe is green. local deadline=$(( $(date +%s) + ${ARCHY_SETTLE_SECS:-300} )) while (( $(date +%s) < deadline )); do local ok=1 # mempool-api + frontend + bitcoin-ui = good proxies for "stack reconnected" curl -fsS -m 4 -o /dev/null "http://127.0.0.1:8999/api/v1/backend-info" 2>/dev/null || ok=0 curl -fsS -m 4 -o /dev/null "http://127.0.0.1:4080/" 2>/dev/null || ok=0 podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert \ --macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon \ --rpcserver localhost:10009 getinfo >/dev/null 2>&1 || ok=0 # Only gate on immich where it's actually installed (heavy nodes). Its web # port is the same signal test 64 checks, so settling here keeps the next # iteration's read-only immich probe from racing a still-recovering stack. if podman container exists immich_server 2>/dev/null; then curl -fsS -m 4 -o /dev/null "http://127.0.0.1:2283/" 2>/dev/null || ok=0 fi (( ok == 1 )) && { echo " (stack settled)"; return 0; } sleep 4 done echo " (stack settle deadline reached — proceeding anyway)" } # One initial teardown so a previous run's cookies don't poison iteration 1. ./setup-teardown.sh for i in $(seq 1 "$ITER"); do echo echo "═══ iteration $i / $ITER ═══" iter_start=$(date +%s) settle_stack if ./run.sh "$@"; then iter_end=$(date +%s) passed=$((passed + 1)) echo "── iteration $i: PASS ($((iter_end - iter_start))s) ──" else rc=$? iter_end=$(date +%s) failed=$((failed + 1)) failures+=("$i") echo "── iteration $i: FAIL (exit=$rc, $((iter_end - iter_start))s) ──" if [[ "${ARCHY_FAIL_FAST:-0}" == "1" ]]; then echo "ARCHY_FAIL_FAST=1, stopping early" break fi fi # Teardown between iterations so iteration N+1 starts with a clean # session-cookie state regardless of what iteration N did. ./setup-teardown.sh done # Optional CASCADE pass — uninstall → no-ghost → reinstall of a throwaway app # (default grafana, via cascade-uninstall.bats). Run ONCE, not folded into the # 5× loop on purpose: uninstall/reinstall every iteration would balloon runtime # and re-pull images. One pass gates the #13 ghost / #14 reinstall-stop / # uninstall-hang class (the bug fixed in 71cc9ac4). Opt-in so default gate # behavior is unchanged; counts into the pass/fail tally. if [[ "${ARCHY_GATE_CASCADE:-0}" == "1" && "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]]; then echo echo "═══ CASCADE pass (1×) ═══" settle_stack if ARCHY_ALLOW_CASCADE_DESTRUCTIVE=1 ./run.sh cascade-uninstall; then passed=$((passed + 1)) echo "── CASCADE: PASS ──" else failed=$((failed + 1)) failures+=("cascade") echo "── CASCADE: FAIL ──" fi ./setup-teardown.sh fi end=$(date +%s) echo echo "════════════════════════════════════════" echo " RESULTS" echo " iterations: $((passed + failed)) / $ITER" echo " passed: $passed" echo " failed: $failed" if (( failed > 0 )); then echo " failed at: ${failures[*]}" fi echo " wall time: $((end - start))s" echo "════════════════════════════════════════" if (( failed > 0 )); then exit 1 fi