The 5x gate is DESTRUCTIVE-only and never exercised uninstall/reinstall — where the worst field bugs lived (#13 app ghosting in My Apps after uninstall, #14 reinstall stalling on stale state). New cascade-uninstall.bats drives the full teardown path on a throwaway app (default grafana, precondition-skips if already installed so it can't destroy real data) and asserts: - fresh install reaches running via a truthful, non-silent progression - uninstall makes the entry DISAPPEAR from server.get-state package-data (the literal My Apps map) — no ghost, no stuck uninstall stage - container + (on-node) data dir are gone - reinstall returns to running - node left as found Opt-in via ARCHY_ALLOW_CASCADE_DESTRUCTIVE=1; not yet folded into the canonical gate. Verified 7/7 against .228. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
154 lines
6.3 KiB
Bash
154 lines
6.3 KiB
Bash
#!/usr/bin/env bats
|
|
# tests/lifecycle/bats/cascade-uninstall.bats
|
|
#
|
|
# CASCADE-tier regression guard for the uninstall → reinstall lifecycle — the
|
|
# exact bug class the gate's DESTRUCTIVE tier never exercised:
|
|
# #13 "uninstall ghost" — app stayed in My Apps after uninstall because the
|
|
# package state entry wasn't cleared when teardown hit
|
|
# cleanup residue (returned Err before removing it).
|
|
# #14 "reinstall stops" — a reinstall stalled partway on the stale state/data
|
|
# left behind by the broken uninstall.
|
|
#
|
|
# Uses a THROWAWAY app (default grafana — not installed on prod/test nodes, no
|
|
# user data) so it can drive the FULL teardown path (no preserve_data), which is
|
|
# where #13 actually bit. Precondition-skips if the app is already installed, so
|
|
# it can NEVER destroy real data on a populated node.
|
|
#
|
|
# "No ghost" is asserted against server.get-state's package-data (literally the
|
|
# My Apps map) — the entry must disappear, not linger with a stale state /
|
|
# stuck uninstall stage.
|
|
#
|
|
# Gated on ARCHY_ALLOW_CASCADE_DESTRUCTIVE=1. RPC-based, so it works on-node or
|
|
# against a remote ARCHY_HOST (the data-dir residue check is on-node only).
|
|
|
|
load '../lib/rpc.bash'
|
|
|
|
CASCADE_APP="${ARCHY_CASCADE_APP:-grafana}"
|
|
CASCADE_IMAGE="${ARCHY_CASCADE_IMAGE:-docker.io/grafana/grafana:10.2.0}"
|
|
CASCADE_CONFIG="${ARCHY_CASCADE_CONFIG:-{\"ports\":[\"3000:3000\"],\"volumes\":[\"/var/lib/archipelago/grafana:/var/lib/grafana\"],\"env\":[\"GF_PATHS_DATA=/var/lib/grafana\",\"GF_USERS_ALLOW_SIGN_UP=false\"]}}"
|
|
CASCADE_DATA_DIR="${ARCHY_CASCADE_DATA_DIR:-/var/lib/archipelago/${CASCADE_APP}}"
|
|
|
|
setup_file() {
|
|
: "${ARCHY_PASSWORD:?Set ARCHY_PASSWORD env var to the UI password}"
|
|
export ARCHY_FORCE_LOGIN=1
|
|
rpc_login
|
|
unset ARCHY_FORCE_LOGIN
|
|
}
|
|
|
|
teardown_file() {
|
|
rpc_logout_local
|
|
}
|
|
|
|
cascade_enabled() {
|
|
[[ "${ARCHY_ALLOW_CASCADE_DESTRUCTIVE:-0}" == "1" ]]
|
|
}
|
|
|
|
# True when CASCADE_APP has an entry in My Apps (server.get-state package-data).
|
|
app_in_my_apps() {
|
|
rpc_result server.get-state '{}' 2>/dev/null \
|
|
| jq -e --arg id "$CASCADE_APP" '.data["package-data"] | has($id)' >/dev/null 2>&1
|
|
}
|
|
|
|
# Top-level state of CASCADE_APP in My Apps, or "absent" when the entry is gone.
|
|
app_state() {
|
|
rpc_result server.get-state '{}' 2>/dev/null \
|
|
| jq -r --arg id "$CASCADE_APP" '.data["package-data"][$id].state // "absent"'
|
|
}
|
|
|
|
# Poll My Apps until CASCADE_APP reaches $1 (a state, or "absent").
|
|
wait_app_state() {
|
|
local target="$1" timeout="${2:-180}"
|
|
local deadline=$(( $(date +%s) + timeout ))
|
|
while (( $(date +%s) < deadline )); do
|
|
[[ "$(app_state)" == "$target" ]] && return 0
|
|
sleep 3
|
|
done
|
|
echo "wait_app_state: $CASCADE_APP never reached '$target' (last='$(app_state)') within ${timeout}s" >&2
|
|
return 1
|
|
}
|
|
|
|
# ────────────────────────────────────────────────────────────────────
|
|
@test "cascade gate enabled" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
}
|
|
|
|
@test "precondition: ${CASCADE_APP} is not already installed (protects real data)" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
if app_in_my_apps; then
|
|
skip "${CASCADE_APP} already installed here — refusing to uninstall (would destroy data); set ARCHY_CASCADE_APP to an uninstalled throwaway"
|
|
fi
|
|
}
|
|
|
|
@test "install ${CASCADE_APP} (fresh) reaches running with a truthful, non-silent progression" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
app_in_my_apps && skip "already installed (precondition skip)"
|
|
|
|
run rpc_result package.install "{\"id\":\"${CASCADE_APP}\",\"dockerImage\":\"${CASCADE_IMAGE}\",\"containerConfig\":${CASCADE_CONFIG}}"
|
|
[ "$status" -eq 0 ]
|
|
|
|
# Progress truthfulness: must pass through a transitional install state (not a
|
|
# silent no-op) and land on running. A warm image cache can blow through the
|
|
# transitional states between polls, so a missed transitional is a warn, not a
|
|
# failure; reaching running is the hard assertion.
|
|
local saw_transitional=0 deadline=$(( $(date +%s) + 300 ))
|
|
while (( $(date +%s) < deadline )); do
|
|
case "$(app_state)" in
|
|
installing|pulling-image|pulling|downloading|starting|created) saw_transitional=1 ;;
|
|
running) break ;;
|
|
esac
|
|
sleep 2
|
|
done
|
|
[ "$(app_state)" == "running" ]
|
|
[ "$saw_transitional" -eq 1 ] || echo "# note: no transitional install state observed (image likely cached)" >&3
|
|
}
|
|
|
|
@test "uninstall ${CASCADE_APP} clears it from My Apps — NO ghost (#13)" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
app_in_my_apps || skip "${CASCADE_APP} not installed (install step must have failed)"
|
|
|
|
run rpc_result package.uninstall "{\"id\":\"${CASCADE_APP}\"}"
|
|
[ "$status" -eq 0 ]
|
|
|
|
# The container must go away…
|
|
run wait_for_container_status "$CASCADE_APP" absent 180
|
|
[ "$status" -eq 0 ]
|
|
|
|
# …AND the My Apps entry must be GONE — the #13 ghost was the entry lingering
|
|
# with a stale state / stuck uninstall stage. Poll: removal trails teardown.
|
|
run wait_app_state absent 120
|
|
[ "$status" -eq 0 ]
|
|
|
|
# Belt-and-suspenders: the key is truly absent from package-data.
|
|
run app_in_my_apps
|
|
[ "$status" -ne 0 ]
|
|
}
|
|
|
|
@test "uninstall removed the data dir (full teardown, no residue)" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
# Needs the local filesystem — on-node runs only.
|
|
case "${ARCHY_HOST:-127.0.0.1}" in
|
|
127.0.0.1|localhost) : ;;
|
|
*) skip "data-dir residue check is on-node only (ARCHY_HOST=${ARCHY_HOST})" ;;
|
|
esac
|
|
[[ ! -e "$CASCADE_DATA_DIR" ]]
|
|
}
|
|
|
|
@test "reinstall ${CASCADE_APP} returns to running (#14)" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
|
|
run rpc_result package.install "{\"id\":\"${CASCADE_APP}\",\"dockerImage\":\"${CASCADE_IMAGE}\",\"containerConfig\":${CASCADE_CONFIG}}"
|
|
[ "$status" -eq 0 ]
|
|
run wait_app_state running 300
|
|
[ "$status" -eq 0 ]
|
|
}
|
|
|
|
@test "cleanup: uninstall ${CASCADE_APP} to leave the node as found" {
|
|
cascade_enabled || skip "ARCHY_ALLOW_CASCADE_DESTRUCTIVE not set"
|
|
run rpc_result package.uninstall "{\"id\":\"${CASCADE_APP}\"}"
|
|
[ "$status" -eq 0 ]
|
|
run wait_for_container_status "$CASCADE_APP" absent 180
|
|
[ "$status" -eq 0 ]
|
|
run wait_app_state absent 120
|
|
[ "$status" -eq 0 ]
|
|
}
|