#!/usr/bin/env bats # tests/lifecycle/bats/use-quadlet-backends-install.bats # # Validates the post-condition of Phase 3.2's `use_quadlet_backends` # install path. When the orchestrator routed at least one backend # install through `install_via_quadlet`, this suite asserts that the # resulting state has the four properties the Phase 3 design promises: # # 1. A `.container` unit file exists in ~/.config/containers/systemd/ # and is well-formed (required sections + directives). # 2. The corresponding `.service` is active under `systemctl --user`. # 3. The container is in `podman ps` (running). # 4. The container's cgroup is under `user.slice/...`, NOT under # `archipelago.service` — proving FM3 (cgroup cascade SIGKILL on # archipelago restart) is structurally fixed for that container. # # Auto-skips if no Quadlet-managed backend exists yet — so it runs as a # no-op on nodes where `use_quadlet_backends` is still false (today's # default), and turns into a hard regression gate as soon as anyone # flips the flag and reinstalls. # # Run on a node with rootless podman + systemd-user (every alpha-fleet # box). No env vars required for the read-only checks. The cleanup # section at the bottom is gated by ARCHY_ALLOW_DESTRUCTIVE=1. quadlet_dir() { echo "${XDG_CONFIG_HOME:-$HOME/.config}/containers/systemd" } # List Quadlet `.container` units that correspond to backend containers # (i.e., NOT companions like archy-*-ui, which already shipped via Quadlet # in v1.7.41 and have their own coverage in companion-survives-archipelago- # restart.bats). Echoes one container name per line; empty if none found. backend_quadlet_units() { local d d="$(quadlet_dir)" [[ -d "$d" ]] || return 0 # Strip the .container extension; filter out archy-*-ui companions. for f in "$d"/*.container; do [[ -e "$f" ]] || continue local name name="$(basename "$f" .container)" [[ "$name" =~ ^archy-.*-ui$ ]] && continue echo "$name" done } # Read the cgroup path of a running container's main process. For # rootless podman the conmon-run target lands the container's pid1 in # the cgroup that owns its supervising .service. container_cgroup_path() { local name="$1" local pid pid="$(podman inspect --format '{{.State.Pid}}' "$name" 2>/dev/null)" [[ -n "$pid" && "$pid" != "0" ]] || return 1 # cgroup v2 line: "0::/path/to/cgroup" awk -F: '$1=="0"{print $3}' "/proc/$pid/cgroup" 2>/dev/null } # Per-test gate. Each @test calls this so the suite is a clean no-op on # nodes where use_quadlet_backends is still false (today's default) — # bats doesn't propagate setup-level skip semantics across @test blocks. require_quadlet_backends() { local count count="$(backend_quadlet_units | wc -l)" (( count > 0 )) || skip "no backend .container units in $(quadlet_dir) — use_quadlet_backends not enabled or no backends installed" } @test "Quadlet unit dir exists or is plausibly creatable" { local d d="$(quadlet_dir)" # Either it already exists, or its parent does (so quadlet can mkdir it). [[ -d "$d" ]] || [[ -d "$(dirname "$d")" ]] \ || skip "no XDG_CONFIG_HOME and no \$HOME/.config — not a desktop-style host" } @test "each backend Quadlet unit has the required sections + directives" { require_quadlet_backends local d d="$(quadlet_dir)" while read -r name; do [[ -z "$name" ]] && continue local body body="$(<"$d/$name.container")" # [Container] section + Image= [[ "$body" == *"[Container]"* ]] || fail "$name: missing [Container] section" [[ "$body" == *"Image="* ]] || fail "$name: missing Image= directive" # [Service] section with the Phase 3.2 backend invariant: Restart=on-failure. # Companions use Restart=always; backends use on-failure so an operator-issued # `systemctl stop` actually stays stopped. [[ "$body" == *"[Service]"* ]] || fail "$name: missing [Service] section" [[ "$body" == *"Restart=on-failure"* ]] \ || fail "$name: backend unit must use Restart=on-failure (got companion-style Restart=always)" # [Install] section so `systemctl --user enable` is well-defined. [[ "$body" == *"[Install]"* ]] || fail "$name: missing [Install] section" [[ "$body" == *"WantedBy="* ]] || fail "$name: missing WantedBy= in [Install]" done < <(backend_quadlet_units) } @test "Phase 3.4: any unit emitting HealthCmd= also emits Notify=healthy" { require_quadlet_backends local d d="$(quadlet_dir)" while read -r name; do [[ -z "$name" ]] && continue local body body="$(<"$d/$name.container")" if [[ "$body" == *"HealthCmd="* ]]; then [[ "$body" == *"Notify=healthy"* ]] \ || fail "$name: HealthCmd= present but Notify=healthy missing — systemctl start won't gate on health" fi done < <(backend_quadlet_units) } @test "every backend Quadlet unit's .service is active in systemctl --user" { require_quadlet_backends while read -r name; do [[ -z "$name" ]] && continue run systemctl --user is-active "$name.service" [[ "$status" -eq 0 ]] || fail "$name.service is '$output' — expected 'active'" done < <(backend_quadlet_units) } @test "every backend Quadlet unit has a running podman container" { require_quadlet_backends while read -r name; do [[ -z "$name" ]] && continue run sh -c "podman inspect --format '{{.State.Running}}' '$name'" [[ "$status" -eq 0 ]] || fail "$name not present in podman" [[ "$output" == "true" ]] || fail "$name container exists but not running (state=$output)" done < <(backend_quadlet_units) } @test "FM3 fix: backend cgroup is under user.slice, not archipelago.service" { require_quadlet_backends # The whole point of Phase 3 — verify the kernel-level invariant. while read -r name; do [[ -z "$name" ]] && continue local cg cg="$(container_cgroup_path "$name")" || skip "$name has no readable PID; container may have crashed mid-test" [[ -n "$cg" ]] || fail "$name: empty cgroup path" # Acceptable: anything under user.slice (rootless podman lands here when # quadlet-managed). Forbidden: anything under archipelago.service's tree. [[ "$cg" == *"user.slice"* ]] \ || fail "$name: cgroup '$cg' is not under user.slice — FM3 cascade still possible" [[ "$cg" != *"archipelago.service"* ]] \ || fail "$name: cgroup '$cg' is under archipelago.service — Phase 3 promise broken" done < <(backend_quadlet_units) }