2026-06-13 01:23:32 -04:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
# Release gate harness — seed of the full-system test harness.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Ties together the checks that already exist in this repo (catalog drift,
|
|
|
|
|
|
# release manifest, lifecycle bats, vitest, cargo tests) plus live-node
|
|
|
|
|
|
# smoke probes, so "is this release OK?" is one command instead of folklore.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Usage:
|
|
|
|
|
|
# tests/release/run.sh # static + frontend + backend stages
|
|
|
|
|
|
# tests/release/run.sh --quick # static + frontend unit only
|
|
|
|
|
|
# tests/release/run.sh --with-build # also production-build the frontend
|
|
|
|
|
|
# # and verify the dist version changed
|
|
|
|
|
|
# tests/release/run.sh --manifest # also validate releases/manifest.json
|
|
|
|
|
|
# # (run AFTER create-release staged it)
|
|
|
|
|
|
# tests/release/run.sh --live [URL] # also smoke-probe a running node
|
|
|
|
|
|
# # (default http://127.0.0.1)
|
|
|
|
|
|
#
|
|
|
|
|
|
# Flags compose. Exits non-zero on the first failing stage.
|
|
|
|
|
|
#
|
|
|
|
|
|
# CAUTION (.116 and other dev nodes): full `cargo test -p archipelago` has
|
|
|
|
|
|
# hung tool PTYs here before — every cargo invocation below is wrapped in
|
|
|
|
|
|
# `timeout` and scoped to focused module filters.
|
|
|
|
|
|
|
|
|
|
|
|
set -u
|
|
|
|
|
|
REPO="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
|
|
|
|
cd "$REPO"
|
|
|
|
|
|
|
|
|
|
|
|
QUICK=0 WITH_BUILD=0 MANIFEST=0 LIVE=0 LIVE_URL="http://127.0.0.1"
|
|
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
|
|
|
case "$1" in
|
|
|
|
|
|
--quick) QUICK=1 ;;
|
|
|
|
|
|
--with-build) WITH_BUILD=1 ;;
|
|
|
|
|
|
--manifest) MANIFEST=1 ;;
|
|
|
|
|
|
--live) LIVE=1; [[ "${2:-}" == http* ]] && { LIVE_URL="$2"; shift; } ;;
|
|
|
|
|
|
*) echo "unknown flag: $1" >&2; exit 2 ;;
|
|
|
|
|
|
esac
|
|
|
|
|
|
shift
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
PASS=() FAIL=()
|
|
|
|
|
|
stage() { # stage <name> <cmd...>
|
|
|
|
|
|
local name="$1"; shift
|
|
|
|
|
|
echo
|
|
|
|
|
|
echo "=== [$name] $*"
|
|
|
|
|
|
if "$@"; then
|
|
|
|
|
|
echo "=== [$name] PASS"
|
|
|
|
|
|
PASS+=("$name")
|
|
|
|
|
|
else
|
|
|
|
|
|
echo "=== [$name] FAIL (exit $?)"
|
|
|
|
|
|
FAIL+=("$name")
|
|
|
|
|
|
summary 1
|
|
|
|
|
|
fi
|
|
|
|
|
|
}
|
|
|
|
|
|
summary() {
|
|
|
|
|
|
echo
|
|
|
|
|
|
echo "──────── release gate summary ────────"
|
|
|
|
|
|
printf 'PASS: %s\n' "${PASS[@]:-none}"
|
|
|
|
|
|
[[ ${#FAIL[@]} -gt 0 ]] && printf 'FAIL: %s\n' "${FAIL[@]}"
|
|
|
|
|
|
exit "${1:-0}"
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# ── Stage 1: static ──────────────────────────────────────────────────
|
|
|
|
|
|
stage "git-diff-check" git diff --check
|
|
|
|
|
|
stage "cargo-fmt" timeout 240 cargo fmt --manifest-path core/Cargo.toml --all --check
|
|
|
|
|
|
stage "catalog-drift" python3 scripts/check-app-catalog-drift.py
|
2026-06-14 08:31:43 -04:00
|
|
|
|
# Every release must surface its CHANGELOG entry in the Settings "What's New"
|
|
|
|
|
|
# modal. The modal hardcodes a block per version and has drifted behind before
|
|
|
|
|
|
# (sat at v1.7.84 while the fleet shipped to v1.7.92). Fail if any CHANGELOG
|
|
|
|
|
|
# version is missing a block; `python3 scripts/sync-whats-new.py` inserts them.
|
|
|
|
|
|
stage "whats-new-sync" python3 scripts/sync-whats-new.py --check
|
2026-06-13 01:23:32 -04:00
|
|
|
|
if [[ $MANIFEST -eq 1 ]]; then
|
|
|
|
|
|
stage "release-manifest" scripts/check-release-manifest.sh
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# ── Stage 2: frontend ────────────────────────────────────────────────
|
|
|
|
|
|
stage "ui-type-check" bash -c 'cd neode-ui && npm run --silent type-check'
|
|
|
|
|
|
stage "ui-unit-tests" bash -c 'cd neode-ui && npx vitest run --silent 2>&1 | tail -4; exit ${PIPESTATUS[0]}'
|
|
|
|
|
|
|
|
|
|
|
|
if [[ $WITH_BUILD -eq 1 ]]; then
|
|
|
|
|
|
# npm run build can fail silently (vue-tsc EACCES burned us before) —
|
|
|
|
|
|
# require the packaged output to actually contain the current version.
|
|
|
|
|
|
VERSION=$(grep -m1 '^version' core/archipelago/Cargo.toml | cut -d'"' -f2)
|
|
|
|
|
|
stage "ui-build" bash -c 'cd neode-ui && npm run build'
|
|
|
|
|
|
stage "ui-dist-version" bash -c "grep -rqo '${VERSION}' web/dist/neode-ui/assets/*.js"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
[[ $QUICK -eq 1 ]] && summary 0
|
|
|
|
|
|
|
|
|
|
|
|
# ── Stage 3: backend ─────────────────────────────────────────────────
|
|
|
|
|
|
stage "cargo-check" timeout 580 cargo check --manifest-path core/Cargo.toml -p archipelago
|
|
|
|
|
|
# Focused suites for the subsystems this release train touched:
|
|
|
|
|
|
# update:: — OTA download/apply/rollback/probe (v1.7.89 hardening)
|
fix: wallet receive reliability, bitcoin install self-heal, ElectrumX app tile
Fixes three Bitcoin/wallet failures observed across the fleet on v1.7.90-alpha
(all nodes were already on the latest build — these were live bugs, not stale
builds), plus the missing ElectrumX tile, and adds automated coverage so each
can't regress silently.
Receive address (".116 receive fails", ".228 false 'wallet is locked'"):
- LND publishes its REST API on a host port that can drift from the manifest
(a container created when the mapping was 8080 kept publishing 8080 after the
manifest moved to 18080). The in-process client connects to the manifest port,
gets connection-refused, and wallet init fails forever while the container
looks "Up". Add published-port drift detection to the reconciler
(container_ports_drifted / host_port_bindings_drifted) that recreates a
drifted backend even for restart-sensitive apps — a drifted container is
already broken, so leaving it "untouched" only perpetuates the failure.
- Receive errors now carry a stable [CODE] token (REST_UNREACHABLE, WALLET_LOCKED,
WALLET_UNINITIALIZED, SYNCING) and always start with "Bitcoin address" so they
survive the RPC error sanitizer instead of collapsing to the generic
"Operation failed". The UI maps the code instead of guessing wallet state from
substrings — so an unreachable REST endpoint is no longer mislabelled "locked".
Bitcoin install (".198 bitcoin gone / reinstall just stops"):
- bitcoin-knots requires the secret bitcoin-rpc-txrelay-rpcauth, which was only
generated by the tx-relay flow. Nodes that never used tx-relay lacked it, so
secret resolution hard-failed and the whole Bitcoin stack cascaded. Generate
it idempotently before bitcoin starts (ensure_app_secrets, reusing
ensure_txrelay_credentials), and name the missing secret in the error so a
genuine gap is actionable instead of a bare "IO error".
ElectrumX app tile missing on every node with it installed:
- The catalog generator dropped electrumx because the manifest had no
interfaces.main block, so the tile had no launch URL and was hidden. Declare
the companion UI port (50002) in the manifest, regenerate the catalog, and let
an app with a known launch URL stay launchable while its backend is still
"starting" (ElectrumX indexes for 10m+).
Test harness:
- New lifecycle bats suites: bitcoin-receive, port-drift, secret-completeness
(validated live; port-drift catches the real .116 drift).
- Rust unit tests for drift detection, the receive reason-code classifier, and
the named-missing-secret error; vitest for the UI code mapping.
- create-release.sh now runs tests/release/run.sh and aborts the release on
failure — previously it ran no tests at all.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 03:12:56 -04:00
|
|
|
|
# lnd — receive address + wallet readiness (v1.7.85–.89), incl. the
|
|
|
|
|
|
# structured receive-error reason-code classifier
|
2026-06-13 01:23:32 -04:00
|
|
|
|
# container::image_versions — image pinning / false-update detection
|
|
|
|
|
|
# scanner — RAII in-flight guard (v1.7.84)
|
fix: wallet receive reliability, bitcoin install self-heal, ElectrumX app tile
Fixes three Bitcoin/wallet failures observed across the fleet on v1.7.90-alpha
(all nodes were already on the latest build — these were live bugs, not stale
builds), plus the missing ElectrumX tile, and adds automated coverage so each
can't regress silently.
Receive address (".116 receive fails", ".228 false 'wallet is locked'"):
- LND publishes its REST API on a host port that can drift from the manifest
(a container created when the mapping was 8080 kept publishing 8080 after the
manifest moved to 18080). The in-process client connects to the manifest port,
gets connection-refused, and wallet init fails forever while the container
looks "Up". Add published-port drift detection to the reconciler
(container_ports_drifted / host_port_bindings_drifted) that recreates a
drifted backend even for restart-sensitive apps — a drifted container is
already broken, so leaving it "untouched" only perpetuates the failure.
- Receive errors now carry a stable [CODE] token (REST_UNREACHABLE, WALLET_LOCKED,
WALLET_UNINITIALIZED, SYNCING) and always start with "Bitcoin address" so they
survive the RPC error sanitizer instead of collapsing to the generic
"Operation failed". The UI maps the code instead of guessing wallet state from
substrings — so an unreachable REST endpoint is no longer mislabelled "locked".
Bitcoin install (".198 bitcoin gone / reinstall just stops"):
- bitcoin-knots requires the secret bitcoin-rpc-txrelay-rpcauth, which was only
generated by the tx-relay flow. Nodes that never used tx-relay lacked it, so
secret resolution hard-failed and the whole Bitcoin stack cascaded. Generate
it idempotently before bitcoin starts (ensure_app_secrets, reusing
ensure_txrelay_credentials), and name the missing secret in the error so a
genuine gap is actionable instead of a bare "IO error".
ElectrumX app tile missing on every node with it installed:
- The catalog generator dropped electrumx because the manifest had no
interfaces.main block, so the tile had no launch URL and was hidden. Declare
the companion UI port (50002) in the manifest, regenerate the catalog, and let
an app with a known launch URL stay launchable while its backend is still
"starting" (ElectrumX indexes for 10m+).
Test harness:
- New lifecycle bats suites: bitcoin-receive, port-drift, secret-completeness
(validated live; port-drift catches the real .116 drift).
- Rust unit tests for drift detection, the receive reason-code classifier, and
the named-missing-secret error; vitest for the UI code mapping.
- create-release.sh now runs tests/release/run.sh and aborts the release on
failure — previously it ran no tests at all.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 03:12:56 -04:00
|
|
|
|
# drift — published-port drift detection (the .116 self-heal)
|
|
|
|
|
|
# missing_secret — secret-resolution names the missing file (the .198 fix)
|
2026-06-13 01:23:32 -04:00
|
|
|
|
# 1500s: the non-incremental test-profile compile alone takes ~9 min on the
|
|
|
|
|
|
# .116 ThinkPad; 580s expires mid-compile (exit 124) before a single test runs.
|
|
|
|
|
|
stage "cargo-test-weekly" timeout 1500 env CARGO_INCREMENTAL=0 \
|
|
|
|
|
|
cargo test --manifest-path core/Cargo.toml -p archipelago -- \
|
fix: wallet receive reliability, bitcoin install self-heal, ElectrumX app tile
Fixes three Bitcoin/wallet failures observed across the fleet on v1.7.90-alpha
(all nodes were already on the latest build — these were live bugs, not stale
builds), plus the missing ElectrumX tile, and adds automated coverage so each
can't regress silently.
Receive address (".116 receive fails", ".228 false 'wallet is locked'"):
- LND publishes its REST API on a host port that can drift from the manifest
(a container created when the mapping was 8080 kept publishing 8080 after the
manifest moved to 18080). The in-process client connects to the manifest port,
gets connection-refused, and wallet init fails forever while the container
looks "Up". Add published-port drift detection to the reconciler
(container_ports_drifted / host_port_bindings_drifted) that recreates a
drifted backend even for restart-sensitive apps — a drifted container is
already broken, so leaving it "untouched" only perpetuates the failure.
- Receive errors now carry a stable [CODE] token (REST_UNREACHABLE, WALLET_LOCKED,
WALLET_UNINITIALIZED, SYNCING) and always start with "Bitcoin address" so they
survive the RPC error sanitizer instead of collapsing to the generic
"Operation failed". The UI maps the code instead of guessing wallet state from
substrings — so an unreachable REST endpoint is no longer mislabelled "locked".
Bitcoin install (".198 bitcoin gone / reinstall just stops"):
- bitcoin-knots requires the secret bitcoin-rpc-txrelay-rpcauth, which was only
generated by the tx-relay flow. Nodes that never used tx-relay lacked it, so
secret resolution hard-failed and the whole Bitcoin stack cascaded. Generate
it idempotently before bitcoin starts (ensure_app_secrets, reusing
ensure_txrelay_credentials), and name the missing secret in the error so a
genuine gap is actionable instead of a bare "IO error".
ElectrumX app tile missing on every node with it installed:
- The catalog generator dropped electrumx because the manifest had no
interfaces.main block, so the tile had no launch URL and was hidden. Declare
the companion UI port (50002) in the manifest, regenerate the catalog, and let
an app with a known launch URL stay launchable while its backend is still
"starting" (ElectrumX indexes for 10m+).
Test harness:
- New lifecycle bats suites: bitcoin-receive, port-drift, secret-completeness
(validated live; port-drift catches the real .116 drift).
- Rust unit tests for drift detection, the receive reason-code classifier, and
the named-missing-secret error; vitest for the UI code mapping.
- create-release.sh now runs tests/release/run.sh and aborts the release on
failure — previously it ran no tests at all.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 03:12:56 -04:00
|
|
|
|
update:: lnd container::image_versions scanner drift missing_secret
|
2026-06-13 01:23:32 -04:00
|
|
|
|
|
|
|
|
|
|
# ── Stage 4: live node smoke ─────────────────────────────────────────
|
|
|
|
|
|
if [[ $LIVE -eq 1 ]]; then
|
|
|
|
|
|
stage "live-frontend" bash -c "curl -skf -o /dev/null '$LIVE_URL/' || curl -skf -o /dev/null '${LIVE_URL/http:/https:}/'"
|
|
|
|
|
|
stage "live-aiui" curl -sf -o /dev/null "$LIVE_URL/aiui/"
|
|
|
|
|
|
stage "live-rpc" bash -c "curl -s -X POST '$LIVE_URL/rpc/v1' -H 'Content-Type: application/json' -d '{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"update.status\",\"params\":{}}' | grep -qE '\"(result|error)\"'"
|
2026-06-13 04:49:32 -04:00
|
|
|
|
|
|
|
|
|
|
# Bitcoin-receive regression guard. The backend asks LND REST for a new
|
|
|
|
|
|
# on-chain address with ?type=<AddressType>. The REST gateway parses that
|
|
|
|
|
|
# as the proto enum (WITNESS_PUBKEY_HASH / 0), NOT the lncli aliases —
|
|
|
|
|
|
# sending "p2wkh" returns 400 "parsing field type ... is not a valid
|
|
|
|
|
|
# value" and bitcoin-receive silently breaks for the whole fleet (the bug
|
|
|
|
|
|
# that slipped through v1.7.88/89 because nothing exercised LND live).
|
|
|
|
|
|
# This hits LND REST directly and FAILS only on that exact parse-error
|
|
|
|
|
|
# signature; a "wallet locked" / "still syncing" reply means the type was
|
|
|
|
|
|
# accepted, which is all we're validating here.
|
|
|
|
|
|
stage "live-lnd-address-type" bash -c '
|
|
|
|
|
|
mac=$(sudo cat /var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon 2>/dev/null | od -An -tx1 | tr -d " \n")
|
|
|
|
|
|
for port in 18080 8080; do
|
|
|
|
|
|
resp=$(curl -sk --max-time 8 "https://127.0.0.1:$port/v1/newaddress?type=WITNESS_PUBKEY_HASH" -H "Grpc-Metadata-macaroon: $mac" 2>/dev/null)
|
|
|
|
|
|
[ -z "$resp" ] && continue
|
|
|
|
|
|
echo "LND($port): $resp"
|
|
|
|
|
|
echo "$resp" | grep -q "is not a valid value" && { echo "FAIL: LND rejected the address type the backend sends"; exit 1; }
|
|
|
|
|
|
echo "OK: LND accepted the address type"; exit 0
|
|
|
|
|
|
done
|
|
|
|
|
|
echo "SKIP: LND REST not reachable on 18080/8080 — cannot validate address type live"; exit 0
|
|
|
|
|
|
'
|
2026-06-13 01:23:32 -04:00
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
summary 0
|