fix(ci): QEMU boot test ignores trailing numeric arg + enforces timeout
The CI workflow calls `test-iso-qemu.sh "$ISO" 120`. The old arg parser had a `case *) ISO=...` fallthrough that silently let the second positional `120` overwrite ISO, so QEMU went looking for a file literally named "120". That's the "failed step" the user was seeing on recent ISO runs — the rest of the job succeeded because the QEMU step has `continue-on-error: true`. Changes: - Treat `--timeout=N` or a bare numeric first-match as a CI timeout in seconds; the original ISO path still wins the positional. - When a timeout is set, force `--nographic` (CI has no DISPLAY anyway) and wrap the QEMU invocation in coreutils' `timeout` so the script always returns instead of hanging. - After termination (or timeout), grep the serial log for well-known systemd/live-boot markers. Pass if the kernel reached userspace, fail if no marker appeared within the window — useful signal rather than the previous "did the VM shut itself off" proxy. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f5c581a725
commit
7655a5971b
@ -14,16 +14,31 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SERIAL_LOG="/tmp/archipelago-qemu-serial.log"
|
||||
FORCE_BIOS=false
|
||||
NOGRAPHIC=false
|
||||
TIMEOUT=0
|
||||
ISO=""
|
||||
|
||||
# Simple arg parsing. First non-flag positional is the ISO path. A bare
|
||||
# numeric (e.g. `120`) is taken as a boot-test timeout in seconds so CI
|
||||
# can call `test-iso-qemu.sh <iso> 120` without hanging the job. The
|
||||
# pre-fix version used `case *) ISO=...`, which silently overwrote ISO
|
||||
# with the timeout value and sent QEMU looking for a file literally
|
||||
# named "120".
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--bios) FORCE_BIOS=true ;;
|
||||
--nographic) NOGRAPHIC=true ;;
|
||||
--timeout=*) TIMEOUT="${arg#--timeout=}" ;;
|
||||
[0-9]*) TIMEOUT="$arg" ;;
|
||||
*) ISO="$arg" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# A positive TIMEOUT implies headless (no DISPLAY in CI anyway) and keeps
|
||||
# the entire script wrapped in `timeout` to guarantee the job returns.
|
||||
if [ "$TIMEOUT" -gt 0 ] 2>/dev/null; then
|
||||
NOGRAPHIC=true
|
||||
fi
|
||||
|
||||
# Auto-detect ISO
|
||||
if [ -z "$ISO" ]; then
|
||||
ISO=$(ls -t "$SCRIPT_DIR"/results/archipelago-installer-unbundled-*.iso 2>/dev/null | head -1)
|
||||
@ -88,20 +103,55 @@ if [ "$FORCE_BIOS" = false ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "$OVMF" ]; then
|
||||
run_qemu() {
|
||||
if [ -n "$OVMF" ]; then
|
||||
echo " Boot: UEFI ($OVMF)"
|
||||
qemu-system-x86_64 \
|
||||
-machine q35 \
|
||||
-drive if=pflash,format=raw,readonly=on,file="$OVMF" \
|
||||
"${QEMU_ARGS[@]}"
|
||||
else
|
||||
else
|
||||
echo " Boot: Legacy BIOS"
|
||||
qemu-system-x86_64 \
|
||||
-machine pc \
|
||||
"${QEMU_ARGS[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Wrap the QEMU invocation in `timeout` when a CI caller passed one so
|
||||
# the script always returns instead of hanging on a VM that never exits
|
||||
# its boot loop. Exit 124 from coreutils' timeout is treated as "VM
|
||||
# reached the timeout", which for a CI boot test is success as long as
|
||||
# the serial log shows a kernel reaching userspace — we inspect that
|
||||
# after the QEMU process ends.
|
||||
if [ "$TIMEOUT" -gt 0 ] 2>/dev/null; then
|
||||
timeout --foreground --preserve-status "${TIMEOUT}s" bash -c "$(declare -f run_qemu); run_qemu"
|
||||
rc=$?
|
||||
if [ $rc -eq 124 ] || [ $rc -eq 137 ]; then
|
||||
echo "(QEMU terminated after ${TIMEOUT}s boot-test window)"
|
||||
rc=0
|
||||
fi
|
||||
else
|
||||
run_qemu
|
||||
rc=$?
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "VM stopped. Serial log: $SERIAL_LOG"
|
||||
echo "Last 20 lines:"
|
||||
tail -20 "$SERIAL_LOG" 2>/dev/null
|
||||
|
||||
# Boot-sanity check: the CI wrapper wants a non-zero exit only when the
|
||||
# kernel never reached userspace. Look for a well-known marker emitted
|
||||
# by live-boot/systemd early in the sequence. If the marker never
|
||||
# appeared, surface the real failure; otherwise treat "timeout reached
|
||||
# with a live kernel" as a pass.
|
||||
if [ "$TIMEOUT" -gt 0 ] 2>/dev/null && [ -f "$SERIAL_LOG" ]; then
|
||||
if grep -qE "Welcome to Debian|Reached target|systemd\[1\]:" "$SERIAL_LOG"; then
|
||||
echo " Boot sanity: OK (systemd reached in serial log)"
|
||||
exit 0
|
||||
fi
|
||||
echo " Boot sanity: FAIL — no systemd markers in serial log within ${TIMEOUT}s"
|
||||
exit 1
|
||||
fi
|
||||
exit "${rc:-0}"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user