archy/scripts/reconcile-containers.sh
Dorian 92a429535a perf: reduce CPU — Chromium GPU flags, healthcheck 30s to 120s, app card fixed height
- Chromium kiosk: add --disable-gpu-compositing, --disable-gpu-rasterization,
  --disable-software-rasterizer, --renderer-process-limit=1
  drops GPU process from 64% to 12% CPU
- Container healthchecks: 30s to 120s interval in first-boot and reconcile
- AppCard: min-height on description so cards dont shift

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 02:42:44 +01:00

524 lines
18 KiB
Bash
Executable File

#!/bin/bash
#
# Archipelago Container Reconciler
# Ensures every container matches the canonical spec from container-specs.sh.
# Safe to run repeatedly (idempotent). Run on any node.
#
# Usage:
# sudo ./reconcile-containers.sh # Fix everything
# sudo ./reconcile-containers.sh --check-only # Audit only, no changes
# sudo ./reconcile-containers.sh --force # Override user-stopped
# sudo ./reconcile-containers.sh --tier=2 # Only reconcile tier 2
# sudo ./reconcile-containers.sh --container=lnd # Only reconcile lnd
#
set -o pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# ── Parse arguments ──────────────────────────────────────────────────
CHECK_ONLY=false
FORCE=false
FILTER_TIER=""
FILTER_CONTAINER=""
for arg in "$@"; do
case "$arg" in
--check-only) CHECK_ONLY=true ;;
--force) FORCE=true ;;
--tier=*) FILTER_TIER="${arg#*=}" ;;
--container=*) FILTER_CONTAINER="${arg#*=}" ;;
-h|--help)
echo "Usage: $0 [--check-only] [--force] [--tier=N] [--container=NAME]"
exit 0 ;;
esac
done
# ── Colors ───────────────────────────────────────────────────────────
RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[0;33m'
BLUE='\033[0;34m' CYAN='\033[0;36m' BOLD='\033[1m'
NC='\033[0m'
ok() { echo -e " ${GREEN}[OK]${NC} $*"; }
fixed() { echo -e " ${CYAN}[FIXED]${NC} $*"; }
skip() { echo -e " ${YELLOW}[SKIP]${NC} $*"; }
fail() { echo -e " ${RED}[FAIL]${NC} $*"; }
info() { echo -e " ${BLUE}[INFO]${NC} $*"; }
header(){ echo -e "\n${BOLD}$*${NC}"; }
# ── Source specs ─────────────────────────────────────────────────────
source "$SCRIPT_DIR/container-specs.sh" || { echo "Cannot source container-specs.sh"; exit 1; }
detect_environment
# ── Podman command ───────────────────────────────────────────────────
# Run as archipelago user — podman sees rootless containers directly.
# Use sudo only for chown/mkdir operations.
PODMAN="podman"
# ── Pre-flight ───────────────────────────────────────────────────────
header "╔══════════════════════════════════════════════════╗"
header "║ ARCHIPELAGO CONTAINER RECONCILER ║"
header "╚══════════════════════════════════════════════════╝"
echo ""
info "Host: $(hostname) ($HOST_IP)"
info "Disk: ${DISK_GB}GB | RAM: ${TOTAL_MEM_MB}MB | Low-mem: $LOW_MEM"
info "Mode: $($CHECK_ONLY && echo 'CHECK ONLY (no changes)' || echo 'APPLY FIXES')"
echo ""
# Ensure archy-net exists
if ! $PODMAN network exists archy-net 2>/dev/null; then
if $CHECK_ONLY; then
info "archy-net missing (would create)"
else
$PODMAN network create archy-net 2>/dev/null && info "Created archy-net" || fail "Cannot create archy-net"
fi
fi
# Load user-stopped list
USER_STOPPED_FILE="/var/lib/archipelago/user-stopped.json"
USER_STOPPED=""
if [ -f "$USER_STOPPED_FILE" ]; then
USER_STOPPED=$(cat "$USER_STOPPED_FILE" 2>/dev/null)
fi
is_user_stopped() {
[ "$FORCE" = "true" ] && return 1
echo "$USER_STOPPED" | grep -q "\"$1\"" 2>/dev/null
}
# ── Inspection helpers ───────────────────────────────────────────────
container_exists() {
$PODMAN ps -a --format '{{.Names}}' 2>/dev/null | grep -qx "$1"
}
container_running() {
$PODMAN ps --format '{{.Names}}' 2>/dev/null | grep -qx "$1"
}
container_image() {
$PODMAN inspect "$1" --format '{{.ImageName}}' 2>/dev/null
}
container_network() {
# Use actual Networks map — NetworkMode is unreliable (always shows 'bridge' in rootless)
local nets
nets=$($PODMAN inspect "$1" --format '{{range $k,$v := .NetworkSettings.Networks}}{{$k}} {{end}}' 2>/dev/null)
# Return first network name, trimmed
echo "$nets" | awk '{print $1}'
}
container_memory() {
$PODMAN inspect "$1" --format '{{.HostConfig.Memory}}' 2>/dev/null
}
image_exists() {
$PODMAN images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null | grep -q "$1"
}
# Convert memory string to bytes for comparison
mem_to_bytes() {
local m="$1"
case "$m" in
*g|*G) echo $(( ${m%[gG]} * 1073741824 )) ;;
*m|*M) echo $(( ${m%[mM]} * 1048576 )) ;;
*) echo "$m" ;;
esac
}
# ── Build podman run command from spec ───────────────────────────────
build_run_cmd() {
local cmd="$PODMAN run -d --name $SPEC_NAME"
cmd+=" --restart $SPEC_RESTART"
# Network
if [ "$SPEC_NETWORK" = "host" ]; then
cmd+=" --network=host"
elif [ "$SPEC_NETWORK" = "archy-net" ]; then
cmd+=" --network archy-net"
fi
# Memory
[ -n "$SPEC_MEMORY" ] && cmd+=" --memory=$SPEC_MEMORY"
# Capabilities
cmd+=" --cap-drop ALL"
for cap in $SPEC_CAPS; do
cmd+=" --cap-add $cap"
done
# Security
[ -n "$SPEC_SECURITY" ] && cmd+=" --security-opt $SPEC_SECURITY"
# Read-only
[ "$SPEC_READONLY" = "true" ] && cmd+=" --read-only"
# Tmpfs
for t in $SPEC_TMPFS; do
cmd+=" --tmpfs $t"
done
# Health check
if [ -n "$SPEC_HEALTH_CMD" ]; then
cmd+=" --health-cmd=\"$SPEC_HEALTH_CMD\" --health-interval=120s --health-timeout=10s --health-retries=3"
fi
# Ports
for p in $SPEC_PORTS; do
cmd+=" -p $p"
done
# Volumes
for v in $SPEC_VOLUMES; do
cmd+=" -v $v"
done
# Environment
for e in $SPEC_ENV; do
cmd+=" -e \"$e\""
done
# Image
cmd+=" $SPEC_IMAGE"
# Custom args
[ -n "$SPEC_CUSTOM_ARGS" ] && cmd+=" $SPEC_CUSTOM_ARGS"
# Entrypoint override
[ -n "$SPEC_ENTRYPOINT" ] && cmd+=" $SPEC_ENTRYPOINT"
echo "$cmd"
}
# ── Counters ─────────────────────────────────────────────────────────
COUNT_OK=0 COUNT_FIXED=0 COUNT_CREATED=0 COUNT_SKIPPED=0 COUNT_FAILED=0
FAILED_LIST=""
# ── Reconcile one container ──────────────────────────────────────────
reconcile() {
local name="$1"
if ! load_spec "$name"; then
skip "$name — no spec defined"
COUNT_SKIPPED=$((COUNT_SKIPPED + 1))
return
fi
# Filter by tier
[ -n "$FILTER_TIER" ] && [ "$SPEC_TIER" != "$FILTER_TIER" ] && return
# User-stopped
if is_user_stopped "$name"; then
skip "$name — user-stopped"
COUNT_SKIPPED=$((COUNT_SKIPPED + 1))
fix_ownership "$name"
return
fi
# Optional/local images: skip if image doesn't exist and container doesn't exist
if [ "$SPEC_OPTIONAL" = "true" ] || [ "$SPEC_LOCAL_IMAGE" = "true" ]; then
if ! image_exists "$SPEC_IMAGE" && ! container_exists "$name"; then
skip "$name — image not available"
COUNT_SKIPPED=$((COUNT_SKIPPED + 1))
return
fi
fi
# Check dependencies
for dep in $SPEC_DEPENDS; do
if ! container_running "$dep"; then
skip "$name — dependency $dep not running"
COUNT_SKIPPED=$((COUNT_SKIPPED + 1))
return
fi
done
local action="OK"
local reasons=""
if container_exists "$name"; then
local cur_image cur_network cur_memory
cur_image=$(container_image "$name")
cur_network=$(container_network "$name")
cur_memory=$(container_memory "$name")
local spec_memory_bytes expected_network
spec_memory_bytes=$(mem_to_bytes "$SPEC_MEMORY")
# Check network mismatch
# For archy-net and host: exact match required
# For bridge/default: accept any non-archy-net, non-host network
if [ "$SPEC_NETWORK" = "archy-net" ]; then
if [ "$cur_network" != "archy-net" ]; then
action="RECREATE"
reasons+="network($cur_network→archy-net) "
fi
elif [ "$SPEC_NETWORK" = "host" ]; then
if [ "$cur_network" != "host" ]; then
action="RECREATE"
reasons+="network($cur_network→host) "
fi
else
# Default/bridge: anything that isn't archy-net or host is fine
if [ "$cur_network" = "archy-net" ] || [ "$cur_network" = "host" ]; then
action="RECREATE"
reasons+="network($cur_network→bridge) "
fi
fi
# Check memory limit (0 = no limit)
if [ "${cur_memory:-0}" = "0" ] && [ "${spec_memory_bytes:-0}" != "0" ]; then
action="RECREATE"
reasons+="memory(none→$SPEC_MEMORY) "
fi
# Check if running
if ! container_running "$name" && [ "$action" = "OK" ]; then
action="START"
reasons+="not-running "
fi
else
action="CREATE"
reasons+="missing "
fi
# Fix ownership regardless
fix_ownership "$name"
case "$action" in
OK)
ok "$name"
COUNT_OK=$((COUNT_OK + 1))
;;
START)
if $CHECK_ONLY; then
info "$name — would start ($reasons)"
else
if $PODMAN start "$name" >/dev/null 2>&1; then
fixed "$name — started ($reasons)"
else
fail "$name — start failed"
COUNT_FAILED=$((COUNT_FAILED + 1))
FAILED_LIST+=" $name"
return
fi
fi
COUNT_FIXED=$((COUNT_FIXED + 1))
;;
RECREATE)
if $CHECK_ONLY; then
info "$name — would recreate ($reasons)"
else
info "$name — recreating ($reasons)"
$PODMAN stop "$name" >/dev/null 2>&1
$PODMAN rm "$name" >/dev/null 2>&1
if eval "$(build_run_cmd)" >/dev/null 2>&1; then
fixed "$name — recreated ($reasons)"
else
fail "$name — recreate failed: $(eval "$(build_run_cmd)" 2>&1 | tail -1)"
COUNT_FAILED=$((COUNT_FAILED + 1))
FAILED_LIST+=" $name"
return
fi
fi
COUNT_FIXED=$((COUNT_FIXED + 1))
;;
CREATE)
if $CHECK_ONLY; then
info "$name — would create ($reasons)"
else
for v in $SPEC_VOLUMES; do
local host_dir="${v%%:*}"
[ -n "$host_dir" ] && sudo mkdir -p "$host_dir" 2>/dev/null
done
if eval "$(build_run_cmd)" >/dev/null 2>&1; then
fixed "$name — created"
else
fail "$name — create failed"
COUNT_FAILED=$((COUNT_FAILED + 1))
FAILED_LIST+=" $name"
return
fi
fi
COUNT_CREATED=$((COUNT_CREATED + 1))
;;
esac
}
# ── Fix ownership ────────────────────────────────────────────────────
fix_ownership() {
local name="$1"
[ -z "$SPEC_DATA_DIR" ] && return
[ ! -d "$SPEC_DATA_DIR" ] && return
[ "$SPEC_DATA_UID" = "100000:100000" ] && return
local expected_uid="${SPEC_DATA_UID%%:*}"
local current_uid
current_uid=$(stat -c '%u' "$SPEC_DATA_DIR" 2>/dev/null)
if [ "$current_uid" != "$expected_uid" ]; then
if $CHECK_ONLY; then
info "$name — ownership: $current_uid$SPEC_DATA_UID"
else
sudo chown -R "$SPEC_DATA_UID" "$SPEC_DATA_DIR" 2>/dev/null
info "$name — fixed ownership → $SPEC_DATA_UID"
fi
fi
}
# ── Ensure secrets exist ─────────────────────────────────────────────
ensure_secrets() {
local SECRETS_DIR="/var/lib/archipelago/secrets"
sudo mkdir -p "$SECRETS_DIR" 2>/dev/null
sudo chmod 700 "$SECRETS_DIR" 2>/dev/null
for svc in bitcoin-rpc-password mempool-db-password btcpay-db-password mysql-root-db-password; do
if [ ! -f "$SECRETS_DIR/$svc" ]; then
if $CHECK_ONLY; then
info "Would generate secret: $svc"
else
openssl rand -hex 16 | sudo tee "$SECRETS_DIR/$svc" >/dev/null
sudo chmod 600 "$SECRETS_DIR/$svc"
info "Generated secret: $svc"
fi
fi
done
if [ ! -f "$SECRETS_DIR/fedimint-gateway-password" ]; then
if ! $CHECK_ONLY; then
local fpass
fpass=$(openssl rand -base64 16)
echo "$fpass" | sudo tee "$SECRETS_DIR/fedimint-gateway-password" >/dev/null
sudo chmod 600 "$SECRETS_DIR/fedimint-gateway-password"
if command -v htpasswd >/dev/null 2>&1; then
htpasswd -bnBC 10 "" "$fpass" | tr -d ':\n' | sudo tee "$SECRETS_DIR/fedimint-gateway-hash" >/dev/null
sudo chmod 600 "$SECRETS_DIR/fedimint-gateway-hash"
fi
info "Generated fedimint gateway secret"
fi
fi
# Reload after generation
detect_environment
}
# ── Ensure bitcoin.conf ─────────────────────────────────────────────
ensure_bitcoin_conf() {
local BITCOIN_CONF="/var/lib/archipelago/bitcoin/bitcoin.conf"
sudo mkdir -p /var/lib/archipelago/bitcoin 2>/dev/null
if [ ! -f "$BITCOIN_CONF" ] || ! grep -q "^rpcauth=" "$BITCOIN_CONF" 2>/dev/null; then
if ! $CHECK_ONLY && [ -n "$BITCOIN_RPC_PASS" ]; then
local salt hash rpcauth
salt=$(openssl rand -hex 16)
hash=$(echo -n "$BITCOIN_RPC_PASS" | openssl dgst -sha256 -hmac "$salt" -hex 2>/dev/null | awk '{print $NF}')
rpcauth="${BITCOIN_RPC_USER}:${salt}\$${hash}"
# Only rpcauth + printtoconsole here — all other options are in SPEC_CUSTOM_ARGS
# to avoid duplicate bind conflicts
sudo tee "$BITCOIN_CONF" >/dev/null << BTCEOF
rpcauth=${rpcauth}
printtoconsole=1
BTCEOF
info "Generated bitcoin.conf"
fi
fi
# Strip duplicate server/rpc/listen lines from existing conf to avoid conflicts with custom args
if [ -f "$BITCOIN_CONF" ]; then
sudo sed -i '/^server=/d; /^rpcbind=/d; /^rpcallowip=/d; /^rpcport=/d; /^listen=/d' "$BITCOIN_CONF" 2>/dev/null
fi
sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin 2>/dev/null
}
# ── Ensure lnd.conf ─────────────────────────────────────────────────
ensure_lnd_conf() {
local LND_CONF="/var/lib/archipelago/lnd/lnd.conf"
sudo mkdir -p /var/lib/archipelago/lnd 2>/dev/null
if [ ! -f "$LND_CONF" ] && [ -n "$BITCOIN_RPC_PASS" ]; then
if ! $CHECK_ONLY; then
sudo tee "$LND_CONF" >/dev/null << LNDEOF
[Application Options]
listen=0.0.0.0:9735
rpclisten=0.0.0.0:10009
restlisten=0.0.0.0:8080
debuglevel=info
noseedbackup=true
[Bitcoin]
bitcoin.mainnet=true
bitcoin.node=bitcoind
[Bitcoind]
bitcoind.rpchost=bitcoin-knots:8332
bitcoind.rpcuser=$BITCOIN_RPC_USER
bitcoind.rpcpass=$BITCOIN_RPC_PASS
bitcoind.rpcpolling=true
bitcoind.estimatemode=ECONOMICAL
[autopilot]
autopilot.active=false
LNDEOF
info "Generated lnd.conf"
fi
fi
}
# ── Ensure BTCPay databases ─────────────────────────────────────────
ensure_btcpay_db() {
if container_running "archy-btcpay-db"; then
$PODMAN exec archy-btcpay-db psql -U postgres -tc \
"SELECT 1 FROM pg_database WHERE datname='nbxplorer'" 2>/dev/null | grep -q 1 || \
$PODMAN exec archy-btcpay-db psql -U postgres -c \
"CREATE DATABASE nbxplorer;" 2>/dev/null || true
fi
}
# ══════════════════════════════════════════════════════════════════════
# MAIN
# ══════════════════════════════════════════════════════════════════════
START_TIME=$(date +%s)
header "Phase 0: Prerequisites"
ensure_secrets
ensure_bitcoin_conf
ensure_lnd_conf
TIER_NAMES=("Databases" "Core Infrastructure" "Services" "Applications" "Frontend UIs")
for tier in 0 1 2 3 4; do
[ -n "$FILTER_TIER" ] && [ "$FILTER_TIER" != "$tier" ] && continue
header "Tier $tier: ${TIER_NAMES[$tier]}"
for name in "${ALL_CONTAINER_SPECS[@]}"; do
[ -n "$FILTER_CONTAINER" ] && [ "$name" != "$FILTER_CONTAINER" ] && continue
# Load spec to check tier before reconciling
if load_spec "$name" && [ "$SPEC_TIER" = "$tier" ]; then
reconcile "$name"
fi
done
# After databases, ensure BTCPay DB schemas exist
[ "$tier" = "0" ] && ensure_btcpay_db
# Brief pause between tiers
[ "$tier" -lt 4 ] && ! $CHECK_ONLY && sleep 2
done
# ── Summary ──────────────────────────────────────────────────────────
ELAPSED=$(( $(date +%s) - START_TIME ))
TOTAL=$((COUNT_OK + COUNT_FIXED + COUNT_CREATED + COUNT_SKIPPED + COUNT_FAILED))
echo ""
header "╔══════════════════════════════════════════════════╗"
header "║ RECONCILIATION REPORT ║"
header "╚══════════════════════════════════════════════════╝"
echo ""
echo -e " Total: ${BOLD}$TOTAL${NC}"
echo -e " OK: ${GREEN}$COUNT_OK${NC}"
echo -e " Fixed: ${CYAN}$COUNT_FIXED${NC}"
echo -e " Created: ${CYAN}$COUNT_CREATED${NC}"
echo -e " Skipped: ${YELLOW}$COUNT_SKIPPED${NC}"
echo -e " Failed: ${RED}$COUNT_FAILED${NC}"
[ -n "$FAILED_LIST" ] && echo -e " Failed: ${RED}$FAILED_LIST${NC}"
echo -e " Duration: ${ELAPSED}s"
echo ""
[ "$COUNT_FAILED" -gt 0 ] && exit 1
exit 0