archy/scripts/test-cross-node.sh
Dorian 91cad8a9ab test: US-15 boot recovery tests — .228 passes 9/9, .198 needs CONT-02
- Add US-15 boot recovery test to test-cross-node.sh (--skip-reboot flag)
- .228: 32/32 containers survive all 3 reboots, 0 exited
- .198: sequential crash recovery blocks health for 260s
- Add federation rate limits (federation.join 5/60, peer RPCs 10/60)
- Add DWN message data size limit (10MB max)
- Known: .228 unreachable after reboot tests, needs physical access

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 02:54:16 +00:00

872 lines
42 KiB
Bash
Executable File

#!/usr/bin/env bash
# test-cross-node.sh — Master cross-node test suite for Archipelago
# Runs all acceptance tests from BOTH directions (.228→.198 and .198→.228)
# Usage: ./scripts/test-cross-node.sh [--iterations N] [--skip-reboot]
#
# Output: TAP format (Test Anything Protocol)
# Exit 0 only if ALL tests pass ALL iterations from BOTH directions.
set -euo pipefail
# ── Config ──────────────────────────────────────────────────────────────────
NODE_A="192.168.1.228"
NODE_B="192.168.1.198"
SSH_KEY="${HOME}/.ssh/archipelago-deploy"
SSH_OPTS="-i ${SSH_KEY} -o StrictHostKeyChecking=no -o ConnectTimeout=10"
ITERATIONS=10
SKIP_REBOOT=false
SUDO_PASS="EwPDR8q45l0Upx@"
PASS=0
FAIL=0
TEST_NUM=0
# ── Parse args ──────────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--iterations) ITERATIONS="$2"; shift 2 ;;
--skip-reboot) SKIP_REBOOT=true; shift ;;
*) echo "Unknown arg: $1"; exit 1 ;;
esac
done
# ── Helpers ─────────────────────────────────────────────────────────────────
ssh_cmd() {
local host="$1"; shift
ssh ${SSH_OPTS} "archipelago@${host}" "$@" 2>/dev/null
}
ssh_sudo() {
local host="$1"; shift
ssh ${SSH_OPTS} "archipelago@${host}" "echo '${SUDO_PASS}' | sudo -S $*" 2>/dev/null
}
tap_ok() {
TEST_NUM=$((TEST_NUM + 1))
PASS=$((PASS + 1))
echo "ok ${TEST_NUM} - $1"
}
tap_fail() {
TEST_NUM=$((TEST_NUM + 1))
FAIL=$((FAIL + 1))
echo "not ok ${TEST_NUM} - $1"
echo "# $2"
}
run_check() {
local desc="$1"
local result
result=$(eval "$2" 2>/dev/null) || true
if eval "$3" <<< "$result" >/dev/null 2>&1; then
tap_ok "$desc"
else
tap_fail "$desc" "Got: ${result:-<empty>}"
fi
}
# ── Auth helper ─────────────────────────────────────────────────────────────
get_session() {
local host="$1"
curl -s -D- -o/dev/null -X POST \
-H "Content-Type: application/json" \
-d '{"method":"auth.login","params":{"password":"password123"}}' \
"http://${host}:5678/rpc/v1" 2>/dev/null | \
grep -i "set-cookie" | tr '\r' '\n'
}
rpc_call() {
local host="$1"
local method="$2"
local session="$3"
local csrf="$4"
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session}; csrf_token=${csrf}" \
-H "X-CSRF-Token: ${csrf}" \
-d "{\"method\":\"${method}\"}" \
"http://${host}:5678/rpc/v1" 2>/dev/null
}
echo "TAP version 13"
echo "# Archipelago Cross-Node Test Suite"
echo "# Nodes: ${NODE_A} (A) ↔ ${NODE_B} (B)"
echo "# Iterations: ${ITERATIONS}"
echo "# Started: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo ""
# ═══════════════════════════════════════════════════════════════════════════
# US-01: System Health
# ═══════════════════════════════════════════════════════════════════════════
echo "# --- US-01: System Health ---"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
for i in $(seq 1 "$ITERATIONS"); do
# Check 1: Health endpoint
result=$(curl -s --connect-timeout 5 "http://${node}:5678/health" 2>/dev/null || echo "FAIL")
if [[ "$result" == "OK" ]]; then
tap_ok "US01-${node_label}-health-${i}"
else
tap_fail "US01-${node_label}-health-${i}" "Expected OK, got: ${result}"
fi
# Check 2: Services active
svc_status=$(ssh_sudo "$node" "systemctl is-active archipelago nginx" 2>/dev/null | tr '\n' ' ')
if echo "$svc_status" | grep -q "active active"; then
tap_ok "US01-${node_label}-services-${i}"
else
tap_fail "US01-${node_label}-services-${i}" "Services: ${svc_status}"
fi
# Check 3: Memory available > 500MB (relaxed from 1GB given tight memory)
avail_kb=$(ssh_cmd "$node" "grep MemAvailable /proc/meminfo | awk '{print \$2}'" 2>/dev/null)
if [[ -n "$avail_kb" ]] && [[ "$avail_kb" -gt 512000 ]]; then
tap_ok "US01-${node_label}-memory-${i} # available=${avail_kb}KB"
else
tap_fail "US01-${node_label}-memory-${i}" "Available: ${avail_kb:-unknown}KB (need >512000)"
fi
# Check 4: Load average < 2x cores
cores=$(ssh_cmd "$node" "nproc" 2>/dev/null || echo "4")
load_1m=$(ssh_cmd "$node" "awk '{print \$1}' /proc/loadavg" 2>/dev/null)
max_load=$((cores * 2))
load_int=${load_1m%%.*}
if [[ -n "$load_int" ]] && [[ "$load_int" -lt "$max_load" ]]; then
tap_ok "US01-${node_label}-load-${i} # load=${load_1m}, cores=${cores}"
else
tap_fail "US01-${node_label}-load-${i}" "Load ${load_1m} >= ${max_load} (${cores} cores x 2)"
fi
# Check 5: Disk usage < 85%
disk_pct=$(ssh_cmd "$node" "df / --output=pcent | tail -1 | tr -d ' %'" 2>/dev/null)
if [[ -n "$disk_pct" ]] && [[ "$disk_pct" -lt 85 ]]; then
tap_ok "US01-${node_label}-disk-${i} # ${disk_pct}%"
else
tap_fail "US01-${node_label}-disk-${i}" "Disk at ${disk_pct:-unknown}%"
fi
# Check 6: Zero exited containers
exited=$(ssh_sudo "$node" "podman ps -a --format '{{.State}}' | grep -c -i exited" 2>/dev/null || echo "0")
exited=$(echo "$exited" | tail -1 | tr -d '[:space:]')
if [[ "$exited" == "0" ]]; then
tap_ok "US01-${node_label}-containers-${i}"
else
tap_fail "US01-${node_label}-containers-${i}" "${exited} exited containers"
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-02: Container Lifecycle
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-02: Container Lifecycle ---"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
for i in $(seq 1 "$ITERATIONS"); do
# Check 1: All containers running (none exited)
exited=$(ssh_sudo "$node" "podman ps -a --format '{{.State}}' | grep -c -i exited" 2>/dev/null || echo "0")
exited=$(echo "$exited" | tail -1 | tr -d '[:space:]')
if [[ "$exited" == "0" ]]; then
tap_ok "US02-${node_label}-all-running-${i}"
else
tap_fail "US02-${node_label}-all-running-${i}" "${exited} exited containers"
fi
# Check 2: Container count matches expectations (>= 20)
count=$(ssh_sudo "$node" "podman ps --format '{{.Names}}' | wc -l" 2>/dev/null | tail -1 | tr -d '[:space:]')
if [[ -n "$count" ]] && [[ "$count" -ge 20 ]]; then
tap_ok "US02-${node_label}-container-count-${i} # count=${count}"
else
tap_fail "US02-${node_label}-container-count-${i}" "Only ${count:-0} containers running (need >=20)"
fi
# Check 3: Health monitor auto-restart (stop filebrowser, wait, verify it restarts)
# Only run this on first iteration to avoid disruption
if [[ "$i" -eq 1 ]]; then
# Stop filebrowser
ssh_sudo "$node" "podman stop filebrowser" 2>/dev/null || true
echo "# Stopped filebrowser on ${node_label}, waiting for health monitor to restart..."
# Wait up to 90s for health monitor to restart it
restarted=false
for wait_i in $(seq 1 18); do
sleep 5
fb_state=$(ssh_sudo "$node" "podman inspect filebrowser --format '{{.State.Status}}'" 2>/dev/null | tail -1 | tr -d '[:space:]')
if [[ "$fb_state" == "running" ]]; then
restarted=true
break
fi
done
if [[ "$restarted" == "true" ]]; then
tap_ok "US02-${node_label}-health-restart-${i} # filebrowser restarted in $((wait_i * 5))s"
else
tap_fail "US02-${node_label}-health-restart-${i}" "filebrowser not restarted after 90s"
# Manually restart to not leave it broken
ssh_sudo "$node" "podman start filebrowser" 2>/dev/null || true
fi
else
# For subsequent iterations, just verify filebrowser is running
fb_state=$(ssh_sudo "$node" "podman inspect filebrowser --format '{{.State.Status}}'" 2>/dev/null | tail -1 | tr -d '[:space:]')
if [[ "$fb_state" == "running" ]]; then
tap_ok "US02-${node_label}-filebrowser-running-${i}"
else
tap_fail "US02-${node_label}-filebrowser-running-${i}" "filebrowser state: ${fb_state:-unknown}"
fi
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-05: Tor Hidden Services
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-05: Tor Hidden Services ---"
# Get onion addresses
ONION_A=$(ssh_sudo "$NODE_A" "cat /var/lib/archipelago/tor/hidden_service_archipelago/hostname" 2>/dev/null | tail -1)
ONION_B=$(ssh_sudo "$NODE_B" "cat /var/lib/tor/hidden_service_archipelago/hostname" 2>/dev/null | tail -1)
echo "# Node A onion: ${ONION_A:-unknown}"
echo "# Node B onion: ${ONION_B:-unknown}"
for i in $(seq 1 "$ITERATIONS"); do
# Test: .228 can reach .198 via Tor
if [[ -n "$ONION_B" ]]; then
tor_result=$(ssh_cmd "$NODE_A" "curl --socks5-hostname 127.0.0.1:9050 -s --connect-timeout 30 http://${ONION_B}/health" 2>/dev/null || echo "FAIL")
if [[ "$tor_result" == "OK" ]]; then
tap_ok "US05-A→B-tor-${i}"
else
tap_fail "US05-A→B-tor-${i}" "Got: ${tor_result}"
fi
else
tap_fail "US05-A→B-tor-${i}" "No onion address for B"
fi
# Test: .198 can reach .228 via Tor
if [[ -n "$ONION_A" ]]; then
tor_result=$(ssh_cmd "$NODE_B" "curl --socks5-hostname 127.0.0.1:9050 -s --connect-timeout 30 http://${ONION_A}/health" 2>/dev/null || echo "FAIL")
if [[ "$tor_result" == "OK" ]]; then
tap_ok "US05-B→A-tor-${i}"
else
tap_fail "US05-B→A-tor-${i}" "Got: ${tor_result}"
fi
else
tap_fail "US05-B→A-tor-${i}" "No onion address for A"
fi
done
# ═══════════════════════════════════════════════════════════════════════════
# US-03: Federation Join (verify existing federation)
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-03: Federation Join ---"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
# Get session for RPC calls
session_header=$(get_session "$node")
session_val=$(echo "$session_header" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_val=$(echo "$session_header" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
for i in $(seq 1 "$ITERATIONS"); do
# Call federation.list-nodes
fed_result=$(rpc_call "$node" "federation.list-nodes" "$session_val" "$csrf_val")
# Check 1: At least 1 peer present
peer_count=$(echo "$fed_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print(len(d.get('result',{}).get('nodes',[])))" 2>/dev/null || echo "0")
if [[ "$peer_count" -ge 1 ]]; then
tap_ok "US03-${node_label}-peers-present-${i} # count=${peer_count}"
else
tap_fail "US03-${node_label}-peers-present-${i}" "No federation peers found"
fi
# Check 2: Trust level is 'trusted'
trust=$(echo "$fed_result" | python3 -c "import sys,json; d=json.load(sys.stdin); nodes=d.get('result',{}).get('nodes',[]); print(nodes[0].get('trust_level','') if nodes else '')" 2>/dev/null || echo "")
if [[ "$trust" == "trusted" ]]; then
tap_ok "US03-${node_label}-trust-level-${i}"
else
tap_fail "US03-${node_label}-trust-level-${i}" "Trust level: ${trust:-unknown}"
fi
# Check 3: DID present
did=$(echo "$fed_result" | python3 -c "import sys,json; d=json.load(sys.stdin); nodes=d.get('result',{}).get('nodes',[]); print(nodes[0].get('did','') if nodes else '')" 2>/dev/null || echo "")
if [[ -n "$did" ]] && [[ "$did" == did:* ]]; then
tap_ok "US03-${node_label}-did-present-${i}"
else
tap_fail "US03-${node_label}-did-present-${i}" "DID: ${did:-missing}"
fi
# Check 4: last_seen within 10 minutes
last_seen=$(echo "$fed_result" | python3 -c "
import sys,json
from datetime import datetime, timezone, timedelta
d=json.load(sys.stdin)
nodes=d.get('result',{}).get('nodes',[])
if not nodes: print('missing'); sys.exit()
ls = nodes[0].get('last_seen','')
if not ls: print('never'); sys.exit()
try:
dt = datetime.fromisoformat(ls.replace('Z','+00:00'))
diff = datetime.now(timezone.utc) - dt
print('ok' if diff < timedelta(minutes=10) else f'stale:{diff}')
except: print('parse_error')
" 2>/dev/null || echo "error")
if [[ "$last_seen" == "ok" ]]; then
tap_ok "US03-${node_label}-last-seen-${i}"
else
tap_fail "US03-${node_label}-last-seen-${i}" "last_seen: ${last_seen}"
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-04: Federation Sync
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-04: Federation Sync ---"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
session_header=$(get_session "$node")
session_val=$(echo "$session_header" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_val=$(echo "$session_header" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
for i in $(seq 1 "$ITERATIONS"); do
# Trigger sync
sync_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_val}; csrf_token=${csrf_val}" \
-H "X-CSRF-Token: ${csrf_val}" \
-d '{"method":"federation.sync-state"}' \
"http://${node}:5678/rpc/v1" 2>/dev/null)
# Check 1: Sync returns results
has_results=$(echo "$sync_result" | python3 -c "import sys,json; d=json.load(sys.stdin); r=d.get('result',{}); print('ok' if r and 'results' in r else 'no')" 2>/dev/null || echo "error")
if [[ "$has_results" == "ok" ]]; then
tap_ok "US04-${node_label}-sync-returns-${i}"
else
tap_fail "US04-${node_label}-sync-returns-${i}" "No sync results"
fi
# Check 2: At least one sync target succeeded
sync_ok=$(echo "$sync_result" | python3 -c "import sys,json; d=json.load(sys.stdin); results=d.get('result',{}).get('results',[]); ok=[r for r in results if r.get('status')=='ok']; print(len(ok))" 2>/dev/null || echo "0")
if [[ "$sync_ok" -ge 1 ]]; then
tap_ok "US04-${node_label}-sync-success-${i} # ok=${sync_ok}"
else
tap_fail "US04-${node_label}-sync-success-${i}" "No successful syncs"
fi
# Check 3: Synced node has apps list
apps_count=$(echo "$sync_result" | python3 -c "import sys,json; d=json.load(sys.stdin); results=d.get('result',{}).get('results',[]); ok=[r for r in results if r.get('status')=='ok']; print(ok[0].get('apps',0) if ok else 0)" 2>/dev/null || echo "0")
if [[ "$apps_count" -gt 0 ]]; then
tap_ok "US04-${node_label}-sync-apps-${i} # apps=${apps_count}"
else
tap_fail "US04-${node_label}-sync-apps-${i}" "Synced app count: ${apps_count}"
fi
# Check 4: last_seen updated after sync (re-check federation list)
fed_after=$(rpc_call "$node" "federation.list-nodes" "$session_val" "$csrf_val")
ls_fresh=$(echo "$fed_after" | python3 -c "
import sys,json
from datetime import datetime, timezone, timedelta
d=json.load(sys.stdin)
nodes=d.get('result',{}).get('nodes',[])
if not nodes: print('missing'); sys.exit()
ls = nodes[0].get('last_seen','')
if not ls: print('never'); sys.exit()
try:
dt = datetime.fromisoformat(ls.replace('Z','+00:00'))
diff = datetime.now(timezone.utc) - dt
print('ok' if diff < timedelta(minutes=2) else f'stale:{diff}')
except: print('parse_error')
" 2>/dev/null || echo "error")
if [[ "$ls_fresh" == "ok" ]]; then
tap_ok "US04-${node_label}-last-seen-fresh-${i}"
else
tap_fail "US04-${node_label}-last-seen-fresh-${i}" "last_seen after sync: ${ls_fresh}"
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-07: File Sharing
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-07: File Sharing ---"
# Create a test file on both nodes for sharing (use bash -c to keep entire command under sudo)
ssh_sudo "$NODE_A" "bash -c 'mkdir -p /var/lib/archipelago/content/files && echo test-content-from-228 > /var/lib/archipelago/content/files/test-share.txt && chown -R archipelago:archipelago /var/lib/archipelago/content'" 2>/dev/null || true
ssh_sudo "$NODE_B" "bash -c 'mkdir -p /var/lib/archipelago/content/files && echo test-content-from-198 > /var/lib/archipelago/content/files/test-share-b.txt && chown -R archipelago:archipelago /var/lib/archipelago/content'" 2>/dev/null || true
for i in $(seq 1 "$ITERATIONS"); do
# --- .228 shares content, .198 browses ---
# Get .228 auth
session_header_a=$(get_session "$NODE_A")
session_a=$(echo "$session_header_a" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_a=$(echo "$session_header_a" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
# Add content on .228
add_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d '{"method":"content.add","params":{"filename":"test-share.txt","mime_type":"text/plain","description":"Test share from 228"}}' \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
has_item=$(echo "$add_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print('ok' if d.get('result',{}).get('item') else 'no')" 2>/dev/null || echo "error")
if [[ "$has_item" == "ok" ]]; then
tap_ok "US07-A-content-add-${i}"
else
tap_fail "US07-A-content-add-${i}" "content.add failed: ${add_result:0:80}"
fi
# List content on .228
list_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d '{"method":"content.list-mine"}' \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
item_count=$(echo "$list_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print(len(d.get('result',{}).get('items',[])))" 2>/dev/null || echo "0")
if [[ "$item_count" -gt 0 ]]; then
tap_ok "US07-A-content-listed-${i} # items=${item_count}"
else
tap_fail "US07-A-content-listed-${i}" "No items in catalog"
fi
# Browse .228's catalog from .198 over Tor
session_header_b=$(get_session "$NODE_B")
session_b=$(echo "$session_header_b" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_b=$(echo "$session_header_b" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
browse_result=$(curl -s --max-time 45 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_b}; csrf_token=${csrf_b}" \
-H "X-CSRF-Token: ${csrf_b}" \
-d "{\"method\":\"content.browse-peer\",\"params\":{\"onion\":\"${ONION_A}\"}}" \
"http://${NODE_B}:5678/rpc/v1" 2>/dev/null)
peer_items=$(echo "$browse_result" | python3 -c "import sys,json; d=json.load(sys.stdin); r=d.get('result',{}); items=r.get('items',[]); print(len(items))" 2>/dev/null || echo "0")
if [[ "$peer_items" -gt 0 ]]; then
tap_ok "US07-B-browse-A-${i} # items=${peer_items}"
else
tap_fail "US07-B-browse-A-${i}" "Could not browse .228 catalog: ${browse_result:0:80}"
fi
# --- Reverse: .198 shares, .228 browses ---
add_result_b=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_b}; csrf_token=${csrf_b}" \
-H "X-CSRF-Token: ${csrf_b}" \
-d '{"method":"content.add","params":{"filename":"test-share-b.txt","mime_type":"text/plain","description":"Test share from 198"}}' \
"http://${NODE_B}:5678/rpc/v1" 2>/dev/null)
has_item_b=$(echo "$add_result_b" | python3 -c "import sys,json; d=json.load(sys.stdin); print('ok' if d.get('result',{}).get('item') else 'no')" 2>/dev/null || echo "error")
if [[ "$has_item_b" == "ok" ]]; then
tap_ok "US07-B-content-add-${i}"
else
tap_fail "US07-B-content-add-${i}" "content.add failed on .198"
fi
# Browse .198's catalog from .228 over Tor
browse_result_a=$(curl -s --max-time 45 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d "{\"method\":\"content.browse-peer\",\"params\":{\"onion\":\"${ONION_B}\"}}" \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
peer_items_a=$(echo "$browse_result_a" | python3 -c "import sys,json; d=json.load(sys.stdin); r=d.get('result',{}); items=r.get('items',[]); print(len(items))" 2>/dev/null || echo "0")
if [[ "$peer_items_a" -gt 0 ]]; then
tap_ok "US07-A-browse-B-${i} # items=${peer_items_a}"
else
tap_fail "US07-A-browse-B-${i}" "Could not browse .198 catalog: ${browse_result_a:0:80}"
fi
done
# Clean up test content entries (remove duplicates)
for node in "$NODE_A" "$NODE_B"; do
session_header=$(get_session "$node")
sv=$(echo "$session_header" | sed -n 's/.*session=\([^;]*\).*/\1/p')
cv=$(echo "$session_header" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
# Get all items and remove test ones
items_json=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${sv}; csrf_token=${cv}" \
-H "X-CSRF-Token: ${cv}" \
-d '{"method":"content.list-mine"}' \
"http://${node}:5678/rpc/v1" 2>/dev/null)
echo "$items_json" | python3 -c "
import sys,json
d=json.load(sys.stdin)
items=d.get('result',{}).get('items',[])
test_items=[i['id'] for i in items if 'test-share' in i.get('filename','')]
for tid in test_items:
print(tid)
" 2>/dev/null | while read -r tid; do
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${sv}; csrf_token=${cv}" \
-H "X-CSRF-Token: ${cv}" \
-d "{\"method\":\"content.remove\",\"params\":{\"id\":\"${tid}\"}}" \
"http://${node}:5678/rpc/v1" >/dev/null 2>&1
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-08: DWN Sync
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-08: DWN Sync ---"
TEST_PROTOCOL="https://archipelago.test/cross-node-$(date +%s)"
# Helper: trigger sync and wait for completion (polls dwn.status)
trigger_sync_and_wait() {
local host="$1" session="$2" csrf="$3" max_wait="${4:-120}"
# Trigger sync (returns immediately with "syncing")
curl -s --max-time 10 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session}; csrf_token=${csrf}" \
-H "X-CSRF-Token: ${csrf}" \
-d '{"method":"dwn.sync"}' \
"http://${host}:5678/rpc/v1" >/dev/null 2>&1
# Poll until sync completes or times out
local elapsed=0
while [[ $elapsed -lt $max_wait ]]; do
sleep 5
elapsed=$((elapsed + 5))
local status_result
status_result=$(curl -s --max-time 5 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session}; csrf_token=${csrf}" \
-H "X-CSRF-Token: ${csrf}" \
-d '{"method":"dwn.status"}' \
"http://${host}:5678/rpc/v1" 2>/dev/null)
local sync_st
sync_st=$(echo "$status_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('result',{}).get('sync_status','unknown'))" 2>/dev/null || echo "unknown")
if [[ "$sync_st" != "syncing" ]]; then
echo "$sync_st"
return 0
fi
done
echo "timeout"
return 1
}
for i in $(seq 1 "$ITERATIONS"); do
# Get auth for both nodes
session_header_a=$(get_session "$NODE_A")
session_a=$(echo "$session_header_a" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_a=$(echo "$session_header_a" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
session_header_b=$(get_session "$NODE_B")
session_b=$(echo "$session_header_b" | sed -n 's/.*session=\([^;]*\).*/\1/p')
csrf_b=$(echo "$session_header_b" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
iter_protocol="${TEST_PROTOCOL}-${i}"
# Check 1: Register protocol on .228
reg_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d "{\"method\":\"dwn.register-protocol\",\"params\":{\"protocol\":\"${iter_protocol}\",\"published\":true}}" \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
reg_ok=$(echo "$reg_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print('ok' if d.get('result',{}).get('registered') else 'no')" 2>/dev/null || echo "error")
if [[ "$reg_ok" == "ok" ]]; then
tap_ok "US08-A-register-protocol-${i}"
else
tap_fail "US08-A-register-protocol-${i}" "register failed: ${reg_result:0:80}"
fi
# Check 2: Write 3 messages on .228
write_ok=0
for msg_i in 1 2 3; do
w_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d "{\"method\":\"dwn.write-message\",\"params\":{\"author\":\"did:key:test228\",\"protocol\":\"${iter_protocol}\",\"schema\":\"test/msg\",\"dataFormat\":\"application/json\",\"data\":{\"seq\":${msg_i},\"iter\":${i}}}}" \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
written=$(echo "$w_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print('ok' if d.get('result',{}).get('written') else 'no')" 2>/dev/null || echo "error")
[[ "$written" == "ok" ]] && write_ok=$((write_ok + 1))
done
if [[ "$write_ok" -eq 3 ]]; then
tap_ok "US08-A-write-messages-${i} # wrote=3"
else
tap_fail "US08-A-write-messages-${i}" "Only ${write_ok}/3 messages written"
fi
# Check 3: Trigger DWN sync on .228 and wait for completion
sync_status=$(trigger_sync_and_wait "$NODE_A" "$session_a" "$csrf_a" 120)
if [[ "$sync_status" == "synced" || "$sync_status" == "idle" ]]; then
tap_ok "US08-A-sync-${i}"
else
tap_fail "US08-A-sync-${i}" "sync status: ${sync_status}"
fi
# Trigger sync on .198 to pull messages and wait
trigger_sync_and_wait "$NODE_B" "$session_b" "$csrf_b" 120 >/dev/null 2>&1
# Check 4: Query messages on .198 — should have the 3 from .228
query_result=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_b}; csrf_token=${csrf_b}" \
-H "X-CSRF-Token: ${csrf_b}" \
-d "{\"method\":\"dwn.query-messages\",\"params\":{\"protocol\":\"${iter_protocol}\"}}" \
"http://${NODE_B}:5678/rpc/v1" 2>/dev/null)
msg_count=$(echo "$query_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('result',{}).get('count',0))" 2>/dev/null || echo "0")
if [[ "$msg_count" -ge 3 ]]; then
tap_ok "US08-B-received-messages-${i} # count=${msg_count}"
else
tap_fail "US08-B-received-messages-${i}" "Only ${msg_count}/3 messages synced to .198"
fi
# Check 5: Write on .198, sync, verify on .228 (reverse direction)
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_b}; csrf_token=${csrf_b}" \
-H "X-CSRF-Token: ${csrf_b}" \
-d "{\"method\":\"dwn.write-message\",\"params\":{\"author\":\"did:key:test198\",\"protocol\":\"${iter_protocol}\",\"schema\":\"test/msg\",\"dataFormat\":\"application/json\",\"data\":{\"from\":\"198\",\"iter\":${i}}}}" \
"http://${NODE_B}:5678/rpc/v1" >/dev/null 2>&1
# Sync .198 → .228
trigger_sync_and_wait "$NODE_B" "$session_b" "$csrf_b" 120 >/dev/null 2>&1
# Pull on .228
trigger_sync_and_wait "$NODE_A" "$session_a" "$csrf_a" 120 >/dev/null 2>&1
# Check 6: Query on .228 — should have 3 from .228 + synced from .198
query_result_a=$(curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${session_a}; csrf_token=${csrf_a}" \
-H "X-CSRF-Token: ${csrf_a}" \
-d "{\"method\":\"dwn.query-messages\",\"params\":{\"protocol\":\"${iter_protocol}\"}}" \
"http://${NODE_A}:5678/rpc/v1" 2>/dev/null)
msg_count_a=$(echo "$query_result_a" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('result',{}).get('count',0))" 2>/dev/null || echo "0")
if [[ "$msg_count_a" -ge 4 ]]; then
tap_ok "US08-A-bidirectional-${i} # count=${msg_count_a}"
else
tap_fail "US08-A-bidirectional-${i}" "Expected >=4 messages on .228, got ${msg_count_a}"
fi
done
# Clean up test protocols
for node in "$NODE_A" "$NODE_B"; do
session_header=$(get_session "$node")
sv=$(echo "$session_header" | sed -n 's/.*session=\([^;]*\).*/\1/p')
cv=$(echo "$session_header" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
for ci in $(seq 1 "$ITERATIONS"); do
curl -s -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${sv}; csrf_token=${cv}" \
-H "X-CSRF-Token: ${cv}" \
-d "{\"method\":\"dwn.remove-protocol\",\"params\":{\"protocol\":\"${TEST_PROTOCOL}-${ci}\"}}" \
"http://${node}:5678/rpc/v1" >/dev/null 2>&1
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-09: NIP-07 Signing
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-09: NIP-07 Signing ---"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
for i in $(seq 1 "$ITERATIONS"); do
# Check: nostr-provider.js injected in app pages
provider=$(curl -s --connect-timeout 5 "http://${node}/app/mempool/" 2>/dev/null | grep -c "nostr-provider" || echo "0")
if [[ "$provider" -gt 0 ]]; then
tap_ok "US09-${node_label}-provider-${i}"
else
tap_fail "US09-${node_label}-provider-${i}" "nostr-provider.js not found in /app/mempool/"
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-10: Backup/Restore
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-10: Backup/Restore ---"
BACKUP_PASS="test-backup-passphrase-$(date +%s)"
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
session_header=$(get_session "$node")
bk_session=$(echo "$session_header" | sed -n 's/.*session=\([^;]*\).*/\1/p')
bk_csrf=$(echo "$session_header" | sed -n 's/.*csrf_token=\([^;]*\).*/\1/p')
for i in $(seq 1 "$ITERATIONS"); do
desc="test-backup-${node_label}-${i}"
# Check 1: Create encrypted backup
create_result=$(curl -s --max-time 30 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${bk_session}; csrf_token=${bk_csrf}" \
-H "X-CSRF-Token: ${bk_csrf}" \
-d "{\"method\":\"backup.create\",\"params\":{\"passphrase\":\"${BACKUP_PASS}\",\"description\":\"${desc}\"}}" \
"http://${node}:5678/rpc/v1" 2>/dev/null)
backup_id=$(echo "$create_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('result',{}).get('id',''))" 2>/dev/null || echo "")
if [[ -n "$backup_id" && "$backup_id" != "None" ]]; then
tap_ok "US10-${node_label}-create-${i} # id=${backup_id:0:8}"
else
tap_fail "US10-${node_label}-create-${i}" "create failed: ${create_result:0:100}"
continue
fi
# Check 2: List backups — verify our backup appears
list_result=$(curl -s --max-time 10 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${bk_session}; csrf_token=${bk_csrf}" \
-H "X-CSRF-Token: ${bk_csrf}" \
-d '{"method":"backup.list"}' \
"http://${node}:5678/rpc/v1" 2>/dev/null)
found=$(echo "$list_result" | python3 -c "import sys,json; d=json.load(sys.stdin); bks=d.get('result',{}).get('backups',[]); print('yes' if any(b.get('id')=='${backup_id}' for b in bks) else 'no')" 2>/dev/null || echo "error")
if [[ "$found" == "yes" ]]; then
tap_ok "US10-${node_label}-list-${i}"
else
tap_fail "US10-${node_label}-list-${i}" "backup ${backup_id:0:8} not in list"
fi
# Check 3: Verify backup integrity
verify_result=$(curl -s --max-time 30 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${bk_session}; csrf_token=${bk_csrf}" \
-H "X-CSRF-Token: ${bk_csrf}" \
-d "{\"method\":\"backup.verify\",\"params\":{\"id\":\"${backup_id}\",\"passphrase\":\"${BACKUP_PASS}\"}}" \
"http://${node}:5678/rpc/v1" 2>/dev/null)
valid=$(echo "$verify_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print('yes' if d.get('result',{}).get('valid') else 'no')" 2>/dev/null || echo "error")
if [[ "$valid" == "yes" ]]; then
tap_ok "US10-${node_label}-verify-${i}"
else
tap_fail "US10-${node_label}-verify-${i}" "verify failed: ${verify_result:0:100}"
fi
# Check 4: Delete backup
delete_result=$(curl -s --max-time 10 -X POST \
-H "Content-Type: application/json" \
-H "Cookie: session=${bk_session}; csrf_token=${bk_csrf}" \
-H "X-CSRF-Token: ${bk_csrf}" \
-d "{\"method\":\"backup.delete\",\"params\":{\"id\":\"${backup_id}\"}}" \
"http://${node}:5678/rpc/v1" 2>/dev/null)
deleted=$(echo "$delete_result" | python3 -c "import sys,json; d=json.load(sys.stdin); print('yes' if d.get('result',{}).get('deleted') else 'no')" 2>/dev/null || echo "error")
if [[ "$deleted" == "yes" ]]; then
tap_ok "US10-${node_label}-delete-${i}"
else
tap_fail "US10-${node_label}-delete-${i}" "delete failed: ${delete_result:0:100}"
fi
done
done
# ═══════════════════════════════════════════════════════════════════════════
# US-15: Boot Recovery
# ═══════════════════════════════════════════════════════════════════════════
echo ""
echo "# --- US-15: Boot Recovery ---"
if [[ "$SKIP_REBOOT" == "false" ]]; then
REBOOT_ITERATIONS=3
for node in "$NODE_A" "$NODE_B"; do
node_label=$([[ "$node" == "$NODE_A" ]] && echo "A(.228)" || echo "B(.198)")
for ri in $(seq 1 "$REBOOT_ITERATIONS"); do
echo "# [$(date +%H:%M:%S)] Reboot test ${ri}/${REBOOT_ITERATIONS} on ${node_label}"
# Record container count before reboot
pre_count=$(ssh_sudo "$node" "podman ps --format '{{.Names}}' | wc -l" 2>/dev/null | tail -1 | tr -d '[:space:]')
echo "# Pre-reboot containers: ${pre_count}"
# Reboot the node
ssh_sudo "$node" "reboot" 2>/dev/null || true
# Wait for SSH to come back (poll every 10s, max 180s)
echo "# Waiting for SSH..."
ssh_back=false
for poll in $(seq 1 18); do
sleep 10
if ssh ${SSH_OPTS} "archipelago@${node}" "echo ok" 2>/dev/null | grep -q ok; then
ssh_back=true
echo "# SSH back after $((poll * 10))s"
break
fi
done
if [[ "$ssh_back" != "true" ]]; then
tap_fail "US15-${node_label}-ssh-back-${ri}" "SSH not available after 180s"
continue
fi
# Wait for backend health (poll every 5s, max 120s)
echo "# Waiting for backend health..."
health_ok=false
for poll in $(seq 1 24); do
sleep 5
if curl -s --max-time 5 "http://${node}/health" 2>/dev/null | grep -q OK; then
health_ok=true
echo "# Health OK after $((poll * 5))s"
break
fi
done
if [[ "$health_ok" == "true" ]]; then
tap_ok "US15-${node_label}-health-${ri}"
else
tap_fail "US15-${node_label}-health-${ri}" "Backend not healthy after 120s"
continue
fi
# Wait an additional 30s for containers to finish starting
sleep 30
# Verify containers recovered
post_count=$(ssh_sudo "$node" "podman ps --format '{{.Names}}' | wc -l" 2>/dev/null | tail -1 | tr -d '[:space:]')
exited=$(ssh_sudo "$node" "podman ps -a --format '{{.State}}' | grep -c -i exited" 2>/dev/null || echo "0")
exited=$(echo "$exited" | tail -1 | tr -d '[:space:]')
echo "# Post-reboot containers: ${post_count} (was ${pre_count}), exited: ${exited}"
# Check: container count recovered (within 3 of pre-reboot)
if [[ -n "$post_count" ]] && [[ -n "$pre_count" ]] && [[ "$post_count" -ge $((pre_count - 3)) ]]; then
tap_ok "US15-${node_label}-containers-recovered-${ri} # ${post_count}/${pre_count}"
else
tap_fail "US15-${node_label}-containers-recovered-${ri}" "Only ${post_count:-0}/${pre_count:-?} containers"
fi
# Check: no containers exited
if [[ "$exited" == "0" ]]; then
tap_ok "US15-${node_label}-no-exited-${ri}"
else
tap_fail "US15-${node_label}-no-exited-${ri}" "${exited} containers exited"
fi
done
done
else
echo "# SKIPPED (--skip-reboot flag set)"
fi
# ═══════════════════════════════════════════════════════════════════════════
# Summary
# ═══════════════════════════════════════════════════════════════════════════
echo ""
TOTAL=$((PASS + FAIL))
echo "1..${TOTAL}"
echo ""
echo "# ═══════════════════════════════════════════════════════════════"
echo "# Results: ${PASS} passed, ${FAIL} failed, ${TOTAL} total"
echo "# Finished: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "# ═══════════════════════════════════════════════════════════════"
if [[ "$FAIL" -gt 0 ]]; then
exit 1
fi
exit 0