#!/usr/bin/env bash # tests/lifecycle/os-audit.sh — one non-destructive OS-wide health gate. # # Ties together, in a single pass with one scorecard + exit code: # A. Backend / RPC health — node is up, not wedged mid-OTA, core daemons answer # B. All-apps lifecycle audit — every catalog app: valid state, real health, # reachable launch URL, populated launch metadata # (delegates to remote-lifecycle.sh, audit-only) # C. FM-guards — the concrete failure modes that have bitten the # fleet: port-drift (FM8), secret-completeness (FM2), # orphaned container states (FM9), OTA wedge (FM12) # # Everything here is READ-ONLY: no install/stop/start/uninstall, no service bounce. # Safe to run against a live production node. It is the per-boot building block the # reboot-survival harness (L3) calls after each reboot. # # Env: # ARCHY_HOST (default 127.0.0.1) # ARCHY_SCHEME (default https; use http for .116 / nginx-:80-only nodes) # ARCHY_PASSWORD (required) # ARCHY_LOCAL (auto: 1 when ARCHY_HOST is loopback) — gates host-only podman checks # # Usage: # ARCHY_HOST=127.0.0.1 ARCHY_SCHEME=http ARCHY_PASSWORD=... tests/lifecycle/os-audit.sh # # Exit: 0 = every section green; 1 = one or more checks failed; 2 = setup/usage error. set -uo pipefail HERE="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" ARCHY_HOST="${ARCHY_HOST:-127.0.0.1}" ARCHY_SCHEME="${ARCHY_SCHEME:-https}" ARCHY_PASSWORD="${ARCHY_PASSWORD:-}" BASE_URL="${ARCHY_SCHEME}://${ARCHY_HOST}" # Host-only checks (podman sweeps) make sense only when this script runs ON the node. if [[ -z "${ARCHY_LOCAL:-}" ]]; then case "$ARCHY_HOST" in 127.0.0.1|localhost|::1) ARCHY_LOCAL=1 ;; *) ARCHY_LOCAL=0 ;; esac fi if [[ -z "$ARCHY_PASSWORD" ]]; then echo "ARCHY_PASSWORD env var must be set." >&2 exit 2 fi for tool in curl jq; do command -v "$tool" >/dev/null 2>&1 || { echo "missing required tool: $tool" >&2; exit 2; } done # ── scorecard state ─────────────────────────────────────────────────────────── PASS=0; FAIL=0; WARN=0 declare -a RESULTS=() record() { # record