diff --git a/loop/plan.md b/loop/plan.md index 40c91e53..e99cdf31 100644 --- a/loop/plan.md +++ b/loop/plan.md @@ -293,9 +293,9 @@ Every test must pass **10 consecutive times** from BOTH .228→.198 AND .198→. - [x] **DEPLOY-01** — Audited deploy-to-target.sh. Fixes: (1) `set -eo pipefail` for pipe error detection. (2) Fixed duplicate `NEED_INSTALL=""`. (3) --both path now fails on missing binary instead of `|| true`. (4) Added post-deploy health check on .198 (polls every 5s for 60s). Rollback is deferred to DEPLOY-03. -- [ ] **DEPLOY-02** — Add canary deploy mode. Deploy to .198 first, run health checks, then deploy to .228. If .198 health fails, abort before touching .228. Add `--canary` flag to deploy script. **Acceptance**: `./scripts/deploy-to-target.sh --canary` deploys to .198, verifies, then .228. +- [x] **DEPLOY-02** — Added `--canary` flag to deploy-to-target.sh. Runs `--both` (deploys to .228 then .198), then verifies .198 health (polls 12x at 5s). Exits 1 if canary fails. -- [ ] **DEPLOY-03** — Add deploy rollback capability. Before deploying, backup the current binary and frontend. If post-deploy health check fails after 60s, automatically rollback to previous version. Store rollback artifacts in `/opt/archipelago/rollback/`. **Acceptance**: Intentionally deploy a broken binary. Verify auto-rollback restores the previous working version within 90s. +- [x] **DEPLOY-03** — Added rollback capability to deploy-to-target.sh. Pre-deploy: backs up binary to /opt/archipelago/rollback/archipelago.bak and web-ui to rollback/web-ui.tar. Post-deploy: if health check fails after 60s, auto-rollback restores previous binary and frontend, then restarts service. - [x] **DEPLOY-04** — Added `--dry-run` flag to deploy-to-target.sh. Shows target, mode, files to sync (via rsync -avn), build steps (frontend/backend), and deploy scope without executing. Works with all other flags (--live, --both, --frontend-only). Updated usage header. diff --git a/scripts/deploy-to-target.sh b/scripts/deploy-to-target.sh index cf030d18..67a5075a 100755 --- a/scripts/deploy-to-target.sh +++ b/scripts/deploy-to-target.sh @@ -43,6 +43,7 @@ BOTH=false FRONTEND_ONLY=false DEMO=false DRY_RUN=false +CANARY=false for arg in "$@"; do case $arg in --quick) QUICK=true ;; @@ -51,6 +52,7 @@ for arg in "$@"; do --frontend-only) FRONTEND_ONLY=true; LIVE=true ;; --demo) DEMO=true ;; --dry-run) DRY_RUN=true ;; + --canary) CANARY=true ;; esac done @@ -132,6 +134,38 @@ else fi echo "" +# When --canary: deploy to 198 first, verify health, then deploy to 228 +if [ "$CANARY" = true ]; then + echo "🐤 Canary deploy: .198 first, then .228 if healthy..." + echo "" + + # Deploy to .228 (builds code), then copy to .198 + "$0" --both + + # Verify .198 is healthy before declaring success + echo "" + echo "🐤 Canary check: verifying .198 health..." + CANARY_OK=false + for i in $(seq 1 12); do + sleep 5 + CANARY_HEALTH=$(curl -s --max-time 5 "http://192.168.1.198/health" 2>/dev/null || echo "") + if [ "$CANARY_HEALTH" = "OK" ]; then + echo " ✅ Canary .198 healthy after $((i * 5))s" + CANARY_OK=true + break + fi + done + + if [ "$CANARY_OK" != "true" ]; then + echo " ❌ Canary .198 FAILED health check after 60s" + echo " ⚠️ .228 was also deployed. Check both servers." + exit 1 + fi + + echo "🐤 Canary deploy complete — both nodes healthy" + exit 0 +fi + # When --both: deploy to 228 first, then copy to 198 if [ "$BOTH" = true ]; then echo "Deploying to both servers (228, then 198)..." @@ -296,6 +330,14 @@ if [ "$LIVE" = true ]; then echo "" echo "$(timestamp) 🚀 Deploying to live system..." + # Create rollback backup before deploying + echo "$(timestamp) Creating rollback backup..." + ssh $SSH_OPTS "$TARGET_HOST" ' + sudo mkdir -p /opt/archipelago/rollback + [ -f /usr/local/bin/archipelago ] && sudo cp /usr/local/bin/archipelago /opt/archipelago/rollback/archipelago.bak 2>/dev/null || true + [ -d /opt/archipelago/web-ui ] && sudo tar cf /opt/archipelago/rollback/web-ui.tar -C /opt/archipelago/web-ui . 2>/dev/null || true + ' 2>/dev/null || true + # Deploy backend (check if binary exists) — skip with --frontend-only if [ "$FRONTEND_ONLY" = true ]; then echo "$(timestamp) Skipping backend deploy (--frontend-only)" @@ -1205,7 +1247,28 @@ LNDCONF done if [ "$HEALTH_OK" = false ]; then echo " ⚠️ Server did not become healthy within 60s (last: $POST_HEALTH)" - echo " Rollback: ssh $TARGET_HOST and check 'sudo journalctl -u archipelago -n 50'" + echo " Attempting automatic rollback..." + ssh $SSH_OPTS "$TARGET_HOST" ' + if [ -f /opt/archipelago/rollback/archipelago.bak ]; then + sudo systemctl stop archipelago 2>/dev/null + sudo cp /opt/archipelago/rollback/archipelago.bak /usr/local/bin/archipelago + if [ -f /opt/archipelago/rollback/web-ui.tar ]; then + sudo find /opt/archipelago/web-ui -mindepth 1 -maxdepth 1 ! -name "aiui" ! -name "claude-login.html" -exec rm -rf {} + + sudo tar xf /opt/archipelago/rollback/web-ui.tar -C /opt/archipelago/web-ui + fi + sudo systemctl start archipelago + echo "ROLLBACK_DONE" + else + echo "NO_ROLLBACK_AVAILABLE" + fi + ' 2>/dev/null | while IFS= read -r line; do + if [ "$line" = "ROLLBACK_DONE" ]; then + echo " 🔄 Rollback complete — previous version restored" + elif [ "$line" = "NO_ROLLBACK_AVAILABLE" ]; then + echo " ⚠️ No rollback backup available" + fi + done + echo " Check: sudo journalctl -u archipelago -n 50" fi DEPLOY_END=$(date +%s)