#!/bin/bash # node-profile.sh — CPU/memory/container profiling across all Archipelago nodes # # Usage: # ./scripts/node-profile.sh # All reachable nodes # ./scripts/node-profile.sh 192.168.1.228 # Single node # ./scripts/node-profile.sh --watch # Repeat every 30s # # Requires: SSH key at ~/.ssh/archipelago-deploy (or ARCHIPELAGO_SSH_KEY) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" source "$SCRIPT_DIR/lib/common.sh" source "$SCRIPT_DIR/deploy-config-defaults.sh" [ -f "$SCRIPT_DIR/deploy-config.sh" ] && source "$SCRIPT_DIR/deploy-config.sh" ALL_NODES=( "$DEFAULT_PRIMARY" "$DEFAULT_SECONDARY" "$TAILSCALE_ARCH1" "$TAILSCALE_ARCH2" "$TAILSCALE_ARCH3" ) NODE_LABELS=( "primary (.228)" "secondary (.198)" "tailscale-1" "tailscale-2" "tailscale-3" ) WATCH_MODE=false WATCH_INTERVAL=30 TARGET_NODES=() # ── Parse args ───────────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case "$1" in --watch) WATCH_MODE=true shift ;; --interval) WATCH_INTERVAL="$2" shift 2 ;; *) TARGET_NODES+=("$1") shift ;; esac done # If specific nodes given, use those; otherwise use all if [ ${#TARGET_NODES[@]} -eq 0 ]; then TARGET_NODES=("${ALL_NODES[@]}") fi # ── Remote profiling command ─────────────────────────────────────────── PROFILE_CMD=' hostname_val=$(hostname 2>/dev/null || echo "unknown") uptime_val=$(uptime -p 2>/dev/null || uptime | sed "s/.*up/up/;s/,.*//") # CPU info cpu_cores=$(nproc 2>/dev/null || echo "?") load_avg=$(cat /proc/loadavg 2>/dev/null | awk "{print \$1, \$2, \$3}") # Memory mem_info=$(free -h 2>/dev/null | awk "/^Mem:/{printf \"%s / %s (%s free)\", \$3, \$2, \$4}") swap_info=$(free -h 2>/dev/null | awk "/^Swap:/{if(\$2 != \"0B\" && \$2 != \"0\") printf \"%s / %s\", \$3, \$2; else print \"none\"}") # Disk disk_info=$(df -h / 2>/dev/null | awk "NR==2{printf \"%s / %s (%s)\", \$3, \$2, \$5}") # CPU temperature (if available) temp="n/a" if [ -f /sys/class/thermal/thermal_zone0/temp ]; then raw=$(cat /sys/class/thermal/thermal_zone0/temp) temp="$((raw / 1000))°C" fi echo "HEADER|${hostname_val}|${uptime_val}|${cpu_cores} cores|load ${load_avg}|${temp}" echo "MEM|${mem_info}" echo "SWAP|${swap_info}" echo "DISK|${disk_info}" # Top 10 processes by CPU echo "PROCS_START" ps aux --sort=-%cpu 2>/dev/null | head -11 | awk "NR>1{printf \"%-6s %-5s %-5s %s\n\", \$2, \$3, \$4, \$11}" 2>/dev/null echo "PROCS_END" # Container status echo "CONTAINERS_START" if command -v podman >/dev/null 2>&1; then podman ps -a --format "{{.Names}}|{{.Status}}|{{.Size}}" 2>/dev/null || \ podman ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || \ echo "podman error" elif command -v docker >/dev/null 2>&1; then docker ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || echo "docker error" else echo "no container runtime" fi echo "CONTAINERS_END" ' # ── Formatting ───────────────────────────────────────────────────────── BOLD="\033[1m" DIM="\033[2m" GREEN="\033[0;32m" YELLOW="\033[0;33m" RED="\033[0;31m" CYAN="\033[0;36m" RESET="\033[0m" SEP="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" print_node_report() { local ip="$1" local label="$2" local output="$3" echo -e "\n${BOLD}${CYAN}${SEP}${RESET}" echo -e "${BOLD}${CYAN} ${label} ${DIM}(${ip})${RESET}" echo -e "${BOLD}${CYAN}${SEP}${RESET}" # Parse HEADER line local header header=$(echo "$output" | grep "^HEADER|" | head -1) if [ -n "$header" ]; then IFS='|' read -r _ hostname uptime cores load temp <<< "$header" echo -e " ${BOLD}Host:${RESET} ${hostname} ${DIM}${uptime}${RESET}" echo -e " ${BOLD}CPU:${RESET} ${cores} ${load} ${temp}" fi # Memory local mem mem=$(echo "$output" | grep "^MEM|" | cut -d'|' -f2) [ -n "$mem" ] && echo -e " ${BOLD}Mem:${RESET} ${mem}" local swap swap=$(echo "$output" | grep "^SWAP|" | cut -d'|' -f2) [ -n "$swap" ] && echo -e " ${BOLD}Swap:${RESET} ${swap}" local disk disk=$(echo "$output" | grep "^DISK|" | cut -d'|' -f2) [ -n "$disk" ] && echo -e " ${BOLD}Disk:${RESET} ${disk}" # Top processes echo "" echo -e " ${BOLD}Top processes by CPU:${RESET}" echo -e " ${DIM}PID CPU% MEM% Command${RESET}" local procs procs=$(echo "$output" | sed -n '/^PROCS_START$/,/^PROCS_END$/p' | grep -v "^PROCS_") if [ -n "$procs" ]; then while IFS= read -r line; do local cpu_pct cpu_pct=$(echo "$line" | awk '{print $2}' | tr -d '.') if [ "${cpu_pct:-0}" -gt 500 ] 2>/dev/null; then echo -e " ${RED}${line}${RESET}" elif [ "${cpu_pct:-0}" -gt 100 ] 2>/dev/null; then echo -e " ${YELLOW}${line}${RESET}" else echo -e " ${line}" fi done <<< "$procs" else echo -e " ${DIM}(no process data)${RESET}" fi # Containers echo "" echo -e " ${BOLD}Containers:${RESET}" local containers containers=$(echo "$output" | sed -n '/^CONTAINERS_START$/,/^CONTAINERS_END$/p' | grep -v "^CONTAINERS_") if [ -n "$containers" ] && [ "$containers" != "no container runtime" ] && [ "$containers" != "podman error" ]; then while IFS='|' read -r name status size; do local icon if echo "$status" | grep -qi "up"; then icon="${GREEN}●${RESET}" else icon="${RED}○${RESET}" fi echo -e " ${icon} ${BOLD}${name}${RESET} ${DIM}${status}${RESET}" done <<< "$containers" else echo -e " ${DIM}${containers:-none}${RESET}" fi } # ── Main profiling loop ─────────────────────────────────────────────── profile_all() { echo -e "\n${BOLD}Archipelago Node Profile${RESET} ${DIM}$(date '+%Y-%m-%d %H:%M:%S')${RESET}" local tmpdir tmpdir=$(mktemp -d) # Probe all nodes in parallel local pids=() for i in "${!TARGET_NODES[@]}"; do local ip="${TARGET_NODES[$i]}" local label="${NODE_LABELS[$i]:-$ip}" ( result=$(ssh_cmd "$ip" "$PROFILE_CMD" 2>/dev/null) && \ echo "$result" > "$tmpdir/$i.out" || \ echo "UNREACHABLE" > "$tmpdir/$i.out" ) & pids+=($!) done # Wait for all probes for pid in "${pids[@]}"; do wait "$pid" 2>/dev/null || true done # Print reports local reachable=0 unreachable=0 for i in "${!TARGET_NODES[@]}"; do local ip="${TARGET_NODES[$i]}" local label="${NODE_LABELS[$i]:-$ip}" local outfile="$tmpdir/$i.out" if [ -f "$outfile" ] && [ "$(cat "$outfile")" != "UNREACHABLE" ]; then print_node_report "$ip" "$label" "$(cat "$outfile")" reachable=$((reachable + 1)) else echo -e "\n${DIM}${SEP}${RESET}" echo -e "${RED} ${label} (${ip}) — unreachable${RESET}" echo -e "${DIM}${SEP}${RESET}" unreachable=$((unreachable + 1)) fi done echo -e "\n${DIM}${reachable} reachable, ${unreachable} unreachable${RESET}\n" rm -rf "$tmpdir" } if $WATCH_MODE; then while true; do clear profile_all echo -e "${DIM}Refreshing every ${WATCH_INTERVAL}s — Ctrl+C to stop${RESET}" sleep "$WATCH_INTERVAL" done else profile_all fi