From aa65780392a70178c999cdfad0f55df57ee34604 Mon Sep 17 00:00:00 2001 From: Dorian Date: Thu, 2 Apr 2026 11:10:25 +0100 Subject: [PATCH] chore: remove stale Claude/Cursor configs from repo Remove old agents, hooks, plans, skills, rules, and settings that accumulated in .claude/ and .cursor/. These are not used by the build and were bloating the repo. Active memory is in the project-level .claude/projects/ directory (not tracked in git). Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/agents/code-reviewer.md | 47 - .claude/agents/deploy-specialist.md | 42 - .claude/agents/iframe-specialist.md | 677 --------------- .claude/hooks/block-risky-bash.sh | 76 -- .claude/hooks/post-deploy-check.sh | 43 - .claude/hooks/protect-files.sh | 82 -- .claude/memory/MEMORY.md | 43 - .claude/memory/claude-proxy-setup.md | 9 - .claude/memory/deploy-automation.md | 18 - .claude/memory/feedback_app_display_modes.md | 15 - .../feedback_apps_always_direct_port.md | 35 - .claude/memory/feedback_asset_workflow.md | 16 - .claude/memory/feedback_deploy_patterns.md | 19 - .claude/memory/feedback_fullscreen_modals.md | 11 - .claude/memory/feedback_gamepad_unfinished.md | 12 - .../memory/feedback_indeedhub_nginx_ips.md | 17 - .claude/memory/feedback_local_dev.md | 15 - .claude/memory/feedback_logo_ascii.md | 19 - .../memory/feedback_searxng_no_cap_drop.md | 15 - .../memory/iso-build-session-2026-03-10.md | 84 -- .claude/memory/pending-features.md | 26 - .claude/memory/project-plan.md | 292 ------- .claude/memory/project_app_registry.md | 19 - .claude/memory/project_bitcoin_rpc_auth.md | 21 - .claude/memory/project_cicd_setup.md | 20 - .../memory/project_container_orchestration.md | 20 - .claude/memory/project_demo_deploy.md | 62 -- .../project_deploy_session_2026_03_22.md | 98 --- .claude/memory/project_environments.md | 21 - .claude/memory/project_gamepad_nav.md | 22 - .claude/memory/project_indeedhub_arch3_fix.md | 33 - .claude/memory/project_iso_size_reduction.md | 29 - .claude/memory/project_mesh_198_issue.md | 20 - .../project_repo_cleanup_and_dev_env.md | 44 - .claude/memory/project_session_20260328.md | 42 - .claude/memory/reference_tailscale_nodes.md | 25 - .claude/memory/second-server.md | 23 - .claude/memory/tailscale_servers.md | 20 - .claude/memory/third-server.md | 12 - .claude/memory/unbundled-iso.md | 30 - .claude/memory/web-only-apps.md | 34 - .claude/plans/luminous-snacking-snowflake.md | 138 --- .claude/plans/memoized-plotting-sifakis.md | 145 ---- .claude/plans/mutable-roaming-pancake.md | 357 -------- .claude/plans/plan.md | 803 ------------------ .claude/plans/polished-napping-squid.md | 108 --- .claude/plans/prancy-scribbling-pnueli.md | 80 -- .claude/plans/reflective-meandering-castle.md | 145 ---- .claude/plans/rosy-floating-lightning.md | 174 ---- .claude/plans/sequential-jingling-moth.md | 244 ------ .claude/plans/shiny-bouncing-raven.md | 103 --- .claude/plans/silly-wondering-flamingo.md | 243 ------ .claude/plans/smooth-roaming-wadler.md | 241 ------ .claude/plans/synchronous-greeting-rose.md | 173 ---- .claude/plans/tailscale-migration.md | 119 --- .claude/plans/toasty-inventing-cascade.md | 89 -- .claude/plans/twinkly-baking-ladybug.md | 205 ----- .claude/rules/backend.md | 14 - .claude/rules/containers.md | 50 -- .claude/rules/frontend.md | 16 - .claude/settings.json | 35 - .claude/skills/add-app/SKILL.md | 49 -- .claude/skills/add-web-app/SKILL.md | 125 --- .claude/skills/bitcoin-conventions/SKILL.md | 113 --- .claude/skills/build-iso/SKILL.md | 121 --- .../skills/build-iso/references/branding.md | 80 -- .claude/skills/check-server/SKILL.md | 14 - .claude/skills/deploy-both/SKILL.md | 23 - .claude/skills/deploy/SKILL.md | 24 - .claude/skills/design-pixel-retro/SKILL.md | 107 --- .claude/skills/diagnose/SKILL.md | 21 - .claude/skills/frontend-dev/SKILL.md | 20 - .claude/skills/gamepad-nav/SKILL.md | 114 --- .claude/skills/harden/SKILL.md | 49 -- .claude/skills/iso-branding/SKILL.md | 146 ---- .claude/skills/iso-debug/SKILL.md | 175 ---- .../references/boot-chain-reference.md | 383 --------- .claude/skills/lint/SKILL.md | 52 -- .claude/skills/mesh/SKILL.md | 155 ---- .claude/skills/podman-doctor/SKILL.md | 275 ------ .../references/common-failures.md | 102 --- .../podman-doctor/references/port-map.md | 71 -- .../podman-doctor/references/uid-mapping.md | 93 -- .claude/skills/podman-fix/SKILL.md | 338 -------- .claude/skills/podman-uptime/SKILL.md | 410 --------- .claude/skills/podman/SKILL.md | 89 -- .../podman/references/common-failures.md | 102 --- .claude/skills/podman/references/port-map.md | 71 -- .../skills/podman/references/uid-mapping.md | 93 -- .claude/skills/polish-backend/SKILL.md | 156 ---- .claude/skills/polish-deploy/SKILL.md | 181 ---- .claude/skills/polish-errors/SKILL.md | 87 -- .claude/skills/polish-forms/SKILL.md | 125 --- .claude/skills/polish-loading/SKILL.md | 88 -- .claude/skills/polish-security/SKILL.md | 162 ---- .claude/skills/polish-websocket/SKILL.md | 172 ---- .claude/skills/polish/SKILL.md | 109 --- .claude/skills/polish/references/backend.md | 27 - .claude/skills/polish/references/deploy.md | 26 - .claude/skills/polish/references/errors.md | 23 - .claude/skills/polish/references/forms.md | 30 - .claude/skills/polish/references/loading.md | 26 - .claude/skills/polish/references/security.md | 22 - .claude/skills/polish/references/websocket.md | 25 - .claude/skills/refactor/SKILL.md | 41 - .claude/skills/server-logs/SKILL.md | 19 - .claude/skills/sweep/SKILL.md | 110 --- .claude/skills/sync-configs/SKILL.md | 24 - .claude/skills/test/SKILL.md | 59 -- .claude/skills/ux-review/SKILL.md | 90 -- .cursor/rules/APP-UI-QUICK-REF.md | 98 --- .cursor/rules/APP-UI-STANDARDS.md | 588 ------------- .cursor/rules/Architecture.mdc | 188 ---- .cursor/rules/Development-Workflow.md | 248 ------ .cursor/rules/Development-Workflow.mdc | 271 ------ .cursor/rules/UI-STANDARDS.md | 355 -------- .cursor/rules/coding-rules.mdc | 751 ---------------- 117 files changed, 13206 deletions(-) delete mode 100644 .claude/agents/code-reviewer.md delete mode 100644 .claude/agents/deploy-specialist.md delete mode 100644 .claude/agents/iframe-specialist.md delete mode 100755 .claude/hooks/block-risky-bash.sh delete mode 100755 .claude/hooks/post-deploy-check.sh delete mode 100755 .claude/hooks/protect-files.sh delete mode 100644 .claude/memory/MEMORY.md delete mode 100644 .claude/memory/claude-proxy-setup.md delete mode 100644 .claude/memory/deploy-automation.md delete mode 100644 .claude/memory/feedback_app_display_modes.md delete mode 100644 .claude/memory/feedback_apps_always_direct_port.md delete mode 100644 .claude/memory/feedback_asset_workflow.md delete mode 100644 .claude/memory/feedback_deploy_patterns.md delete mode 100644 .claude/memory/feedback_fullscreen_modals.md delete mode 100644 .claude/memory/feedback_gamepad_unfinished.md delete mode 100644 .claude/memory/feedback_indeedhub_nginx_ips.md delete mode 100644 .claude/memory/feedback_local_dev.md delete mode 100644 .claude/memory/feedback_logo_ascii.md delete mode 100644 .claude/memory/feedback_searxng_no_cap_drop.md delete mode 100644 .claude/memory/iso-build-session-2026-03-10.md delete mode 100644 .claude/memory/pending-features.md delete mode 100644 .claude/memory/project-plan.md delete mode 100644 .claude/memory/project_app_registry.md delete mode 100644 .claude/memory/project_bitcoin_rpc_auth.md delete mode 100644 .claude/memory/project_cicd_setup.md delete mode 100644 .claude/memory/project_container_orchestration.md delete mode 100644 .claude/memory/project_demo_deploy.md delete mode 100644 .claude/memory/project_deploy_session_2026_03_22.md delete mode 100644 .claude/memory/project_environments.md delete mode 100644 .claude/memory/project_gamepad_nav.md delete mode 100644 .claude/memory/project_indeedhub_arch3_fix.md delete mode 100644 .claude/memory/project_iso_size_reduction.md delete mode 100644 .claude/memory/project_mesh_198_issue.md delete mode 100644 .claude/memory/project_repo_cleanup_and_dev_env.md delete mode 100644 .claude/memory/project_session_20260328.md delete mode 100644 .claude/memory/reference_tailscale_nodes.md delete mode 100644 .claude/memory/second-server.md delete mode 100644 .claude/memory/tailscale_servers.md delete mode 100644 .claude/memory/third-server.md delete mode 100644 .claude/memory/unbundled-iso.md delete mode 100644 .claude/memory/web-only-apps.md delete mode 100644 .claude/plans/luminous-snacking-snowflake.md delete mode 100644 .claude/plans/memoized-plotting-sifakis.md delete mode 100644 .claude/plans/mutable-roaming-pancake.md delete mode 100644 .claude/plans/plan.md delete mode 100644 .claude/plans/polished-napping-squid.md delete mode 100644 .claude/plans/prancy-scribbling-pnueli.md delete mode 100644 .claude/plans/reflective-meandering-castle.md delete mode 100644 .claude/plans/rosy-floating-lightning.md delete mode 100644 .claude/plans/sequential-jingling-moth.md delete mode 100644 .claude/plans/shiny-bouncing-raven.md delete mode 100644 .claude/plans/silly-wondering-flamingo.md delete mode 100644 .claude/plans/smooth-roaming-wadler.md delete mode 100644 .claude/plans/synchronous-greeting-rose.md delete mode 100644 .claude/plans/tailscale-migration.md delete mode 100644 .claude/plans/toasty-inventing-cascade.md delete mode 100644 .claude/plans/twinkly-baking-ladybug.md delete mode 100644 .claude/rules/backend.md delete mode 100644 .claude/rules/containers.md delete mode 100644 .claude/rules/frontend.md delete mode 100644 .claude/settings.json delete mode 100644 .claude/skills/add-app/SKILL.md delete mode 100644 .claude/skills/add-web-app/SKILL.md delete mode 100644 .claude/skills/bitcoin-conventions/SKILL.md delete mode 100644 .claude/skills/build-iso/SKILL.md delete mode 100644 .claude/skills/build-iso/references/branding.md delete mode 100644 .claude/skills/check-server/SKILL.md delete mode 100644 .claude/skills/deploy-both/SKILL.md delete mode 100644 .claude/skills/deploy/SKILL.md delete mode 100644 .claude/skills/design-pixel-retro/SKILL.md delete mode 100644 .claude/skills/diagnose/SKILL.md delete mode 100644 .claude/skills/frontend-dev/SKILL.md delete mode 100644 .claude/skills/gamepad-nav/SKILL.md delete mode 100644 .claude/skills/harden/SKILL.md delete mode 100644 .claude/skills/iso-branding/SKILL.md delete mode 100644 .claude/skills/iso-debug/SKILL.md delete mode 100644 .claude/skills/iso-debug/references/boot-chain-reference.md delete mode 100644 .claude/skills/lint/SKILL.md delete mode 100644 .claude/skills/mesh/SKILL.md delete mode 100644 .claude/skills/podman-doctor/SKILL.md delete mode 100644 .claude/skills/podman-doctor/references/common-failures.md delete mode 100644 .claude/skills/podman-doctor/references/port-map.md delete mode 100644 .claude/skills/podman-doctor/references/uid-mapping.md delete mode 100644 .claude/skills/podman-fix/SKILL.md delete mode 100644 .claude/skills/podman-uptime/SKILL.md delete mode 100644 .claude/skills/podman/SKILL.md delete mode 100644 .claude/skills/podman/references/common-failures.md delete mode 100644 .claude/skills/podman/references/port-map.md delete mode 100644 .claude/skills/podman/references/uid-mapping.md delete mode 100644 .claude/skills/polish-backend/SKILL.md delete mode 100644 .claude/skills/polish-deploy/SKILL.md delete mode 100644 .claude/skills/polish-errors/SKILL.md delete mode 100644 .claude/skills/polish-forms/SKILL.md delete mode 100644 .claude/skills/polish-loading/SKILL.md delete mode 100644 .claude/skills/polish-security/SKILL.md delete mode 100644 .claude/skills/polish-websocket/SKILL.md delete mode 100644 .claude/skills/polish/SKILL.md delete mode 100644 .claude/skills/polish/references/backend.md delete mode 100644 .claude/skills/polish/references/deploy.md delete mode 100644 .claude/skills/polish/references/errors.md delete mode 100644 .claude/skills/polish/references/forms.md delete mode 100644 .claude/skills/polish/references/loading.md delete mode 100644 .claude/skills/polish/references/security.md delete mode 100644 .claude/skills/polish/references/websocket.md delete mode 100644 .claude/skills/refactor/SKILL.md delete mode 100644 .claude/skills/server-logs/SKILL.md delete mode 100644 .claude/skills/sweep/SKILL.md delete mode 100644 .claude/skills/sync-configs/SKILL.md delete mode 100644 .claude/skills/test/SKILL.md delete mode 100644 .claude/skills/ux-review/SKILL.md delete mode 100644 .cursor/rules/APP-UI-QUICK-REF.md delete mode 100644 .cursor/rules/APP-UI-STANDARDS.md delete mode 100644 .cursor/rules/Architecture.mdc delete mode 100644 .cursor/rules/Development-Workflow.md delete mode 100644 .cursor/rules/Development-Workflow.mdc delete mode 100644 .cursor/rules/UI-STANDARDS.md delete mode 100644 .cursor/rules/coding-rules.mdc diff --git a/.claude/agents/code-reviewer.md b/.claude/agents/code-reviewer.md deleted file mode 100644 index 026cb8ef..00000000 --- a/.claude/agents/code-reviewer.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -name: code-reviewer -description: Reviews Archipelago code changes for quality — frontend patterns, Rust safety, container security, crypto rules, and project conventions. -tools: Read, Grep, Glob -model: sonnet ---- - -You are an Archipelago code reviewer. Check changes against project standards. - -## Frontend (neode-ui/) -- `'; - sub_filter_once on; - sub_filter_types text/html; - - # Required: disable upstream compression - proxy_set_header Accept-Encoding ""; -} -``` - -**Use cases:** -- Injecting a postMessage bridge (e.g., NIP-07 Nostr provider) -- Adding resize reporting scripts -- Injecting theme CSS -- Adding custom error handlers - -**Safety rules:** -- Only inject into `text/html` responses -- Inject before `` or after `` — never in the middle of content -- The injected script should check `if (window === window.top) return` to only activate inside iframes -- Use `sub_filter_once on` to prevent double-injection - ---- - -## 11. Performance Considerations - -### iframe Resource Impact - -Each iframe creates: -- Separate browsing context (DOM, CSS engine, JS runtime) -- 10-50MB memory per iframe depending on app complexity -- Own JavaScript execution on main thread - -### Mitigation - -- Only load visible iframes (`loading="lazy"` or Intersection Observer) -- Destroy iframes when hidden (remove from DOM, not just `display:none`) -- Use `about:blank` for pre-created iframe elements, set real src when needed -- Limit concurrent iframes to 3-5 for acceptable performance -- Consider `credentialless` for public content (lighter weight) - -### Caching - -- iframes follow standard HTTP caching (Cache-Control, ETag) -- Setting `src` to the same URL does NOT trigger reload -- To force reload: append query param (`?t=${Date.now()}`) or call `iframe.contentWindow.location.reload()` (same-origin only) - ---- - -## 12. Debugging Checklist - -When an app doesn't work in an iframe, check in this order: - -1. **Check response headers:** - ```bash - curl -sI http://localhost:{PORT} | grep -iE 'x-frame|content-security|cross-origin' - ``` - -2. **Check if Nginx is stripping headers:** - ```bash - curl -sI http://{node-ip}/app/{id}/ | grep -iE 'x-frame|content-security' - ``` - -3. **Check browser console** for: - - "Refused to display in a frame" → XFO or frame-ancestors blocking - - "Mixed Content" → HTTP iframe on HTTPS page - - "WebSocket connection failed" → Missing WebSocket proxy config - - "net::ERR_BLOCKED_BY_RESPONSE" → COEP/CORP/COOP headers blocking - -4. **Check if app has JavaScript frame-busting:** - - Open the app directly, view source, search for `window.top`, `window.parent`, `frameElement` - -5. **Check if cookies/auth work:** - - Open DevTools → Application → Cookies in the iframe context - - Look for blocked cookies (yellow warning triangle) - -6. **Check base path issues:** - - DevTools → Network tab → look for 404s on CSS/JS/API requests - - If assets load from `/` instead of `/app/{id}/`, the app needs base path config - -7. **Check WebSocket connections:** - - DevTools → Network → WS tab → check if WebSocket connections upgrade successfully - ---- - -## 13. Archipelago-Specific Patterns - -### Port-to-Proxy Mapping - -The `appLauncher.ts` store maintains `PORT_TO_PROXY` mapping: direct ports → `/app/{name}/` paths. When running on HTTPS, direct HTTP port URLs are rewritten to same-origin proxy paths via `toEmbeddableUrl()`. - -### mustOpenInNewTab Detection - -Apps that cannot work in iframes are listed in `IFRAME_BLOCKED_HOSTS` (external sites) and port-based checks (local apps with unstrippable restrictions). These automatically open in a new browser tab. - -### Nostr Provider Injection - -All proxied apps receive `/nostr-provider.js` via `sub_filter` injection. This provides `window.nostr` (NIP-07) inside iframes, allowing apps to request signing, key access, and encryption from the parent portal without exposing secret keys. - -### Identity Protocol - -Identity-aware apps (IndeedHub) receive user identity via `archipelago:identity` postMessage after an identity picker modal. Identity includes DID, pubkey, npub, and a signed challenge for verification. - -### Payment Protocol - -Apps can request Bitcoin payments via `archipelago:payment-request` postMessage. The parent validates, shows a confirmation modal, executes the payment (ecash/LN/on-chain based on amount), and responds with a receipt. - -### iframe Load Fallback - -If an iframe fails to load within 15 seconds or loads empty content, a fallback UI is shown with a "Can't display in frame" message and an "Open in new tab" button. - ---- - -## Decision Framework - -When adding a new app to Archipelago: - -``` -1. Does the app set X-Frame-Options or CSP frame-ancestors? - ├── No → iframe via /app/{id}/ proxy, done - └── Yes → - 2. Can you strip headers at Nginx? - ├── Yes, and app works → iframe via /app/{id}/ proxy - └── App still broken after stripping → - 3. Does the app have JavaScript frame-busting? - ├── Yes → Open in new tab (add to mustOpenInNewTab) - └── No → - 4. Is it a base path issue? - ├── Yes → Configure app's native base path or use sub_filter - └── No → - 5. Is it a WebSocket issue? - ├── Yes → Add WebSocket proxy config - └── No → - 6. Is it a cookie/auth issue? - ├── Yes → Same-origin proxy should fix it - └── No → Debug with browser DevTools, check console errors -``` diff --git a/.claude/hooks/block-risky-bash.sh b/.claude/hooks/block-risky-bash.sh deleted file mode 100755 index 0a68d6b9..00000000 --- a/.claude/hooks/block-risky-bash.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash -# PreToolUse Bash guard: block dangerous shell commands. -# Denies: rm -rf, git reset --hard, git push -f, git clean -fd, chmod -R 777, -# fork bombs, block device overwrites, mkfs, building Rust on macOS for Linux. -set -euo pipefail - -INPUT=$(cat) -CMD=$(python3 -c " -import json, sys -try: - data = json.loads(sys.stdin.read()) - print(data.get('tool_input', {}).get('command', '')) -except: pass -" <<< "$INPUT") -BASE="${CLAUDE_PROJECT_DIR:-}" -[[ -z "$BASE" ]] && BASE=$(python3 -c " -import json, sys -try: - data = json.loads(sys.stdin.read()) - print(data.get('cwd', '')) -except: pass -" <<< "$INPUT") -[[ -z "$BASE" ]] && BASE="$(pwd)" - -# Normalize: collapse whitespace, strip leading/trailing -CMD_NORM=$(echo "$CMD" | tr -s '[:space:]' ' ' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') - -deny() { - local reason="$1" - python3 -c " -import json -print(json.dumps({ - 'hookSpecificOutput': { - 'hookEventName': 'PreToolUse', - 'permissionDecision': 'deny', - 'permissionDecisionReason': '$reason' - } -})) -" - exit 0 -} - -# Dangerous patterns -case "$CMD_NORM" in - *"rm -rf"*|*"rm -fr"*|*"rm -f -r"*|*"rm -r -f"*) deny "Destructive rm -rf blocked by security hook" ;; - *"git reset --hard"*) deny "git reset --hard would lose uncommitted work" ;; - *"git push --force"*|*"git push -f"*|*"git push -f "*) deny "git push --force would rewrite history" ;; - *"git clean -fd"*|*"git clean -f -d"*) deny "git clean -fd deletes untracked files" ;; - *"chmod -R 777"*|*"chmod -R 0777"*) deny "chmod -R 777 is a security risk" ;; - *":(){ :"*"};:"*) deny "Fork bomb pattern blocked" ;; - *"> /dev/sd"*|*">/dev/sd"*) deny "Block device overwrite blocked" ;; - *"mkfs "*|*"mkfs."*) deny "Disk format command blocked" ;; -esac - -# Block building Rust locally on macOS (should always build on dev server) -if [[ "$(uname)" == "Darwin" ]]; then - if echo "$CMD_NORM" | grep -qE '^\s*cargo\s+build'; then - # Allow if it's clearly an SSH command (building on remote) - if ! echo "$CMD_NORM" | grep -qE 'ssh|sshpass'; then - deny "NEVER build Rust on macOS — use ./scripts/deploy-to-target.sh --live or build on dev server via SSH" - fi - fi -fi - -# Check for path traversal escaping project root -if [[ -n "$BASE" ]] && [[ -d "$BASE" ]]; then - if echo "$CMD_NORM" | grep -qE '\.\./|/\.\.'; then - if echo "$CMD_NORM" | grep -qE '(rm|mv|cp|cat|chmod|chown)\s+.*\.\.'; then - if echo "$CMD_NORM" | grep -qE '\brm\b.*\.\.'; then - deny "Path traversal with rm blocked" - fi - fi - fi -fi - -exit 0 diff --git a/.claude/hooks/post-deploy-check.sh b/.claude/hooks/post-deploy-check.sh deleted file mode 100755 index ea49b0ac..00000000 --- a/.claude/hooks/post-deploy-check.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash -# PostToolUse Bash hook: detect deploy commands and remind to test. -# Triggers after deploy-to-target.sh runs. -set -euo pipefail - -INPUT=$(cat) - -CMD=$(python3 -c " -import json, sys -try: - data = json.loads(sys.stdin.read()) - print(data.get('tool_input', {}).get('command', '')) -except: pass -" <<< "$INPUT") - -# Only trigger on deploy commands or git push -if ! echo "$CMD" | grep -qE 'deploy-to-target|git\s+push'; then - exit 0 -fi - -TIMESTAMP=$(date '+%Y-%m-%d %H:%M') - -python3 -c " -import json - -message = '''Deploy detected at $TIMESTAMP. - -Post-deploy checklist: -1. Test the web UI at http://192.168.1.228 -2. Verify modified apps load correctly -3. Check backend logs: sudo journalctl -u archipelago -n 20 -4. Check nginx: sudo tail -f /var/log/nginx/error.log -5. If building ISO, sync system configs to image-recipe/configs/ -6. Update CHANGELOG.md if this is a notable change''' - -output = { - 'hookSpecificOutput': { - 'hookEventName': 'PostToolUse', - 'deployReminder': message - } -} -print(json.dumps(output)) -" diff --git a/.claude/hooks/protect-files.sh b/.claude/hooks/protect-files.sh deleted file mode 100755 index 3ff5453a..00000000 --- a/.claude/hooks/protect-files.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env bash -# PreToolUse Edit|Write guard: block edits outside project and to protected paths. -# Denies: paths outside project, .git/, .env*, lockfiles, node_modules/, deploy-config.sh -set -euo pipefail - -INPUT=$(cat) -FILE_PATH=$(python3 -c " -import json, sys -try: - data = json.loads(sys.stdin.read()) - print(data.get('tool_input', {}).get('file_path', '')) -except: pass -" <<< "$INPUT") -BASE="${CLAUDE_PROJECT_DIR:-}" -[[ -z "$BASE" ]] && BASE=$(python3 -c " -import json, sys -try: - data = json.loads(sys.stdin.read()) - print(data.get('cwd', '')) -except: pass -" <<< "$INPUT") -[[ -z "$BASE" ]] && BASE="$(pwd)" - -# Resolve to absolute path -if [[ -z "$FILE_PATH" ]]; then - exit 0 -fi -ABS_BASE=$(cd "$BASE" 2>/dev/null && pwd) || true -[[ -z "$ABS_BASE" ]] && ABS_BASE=$(python3 -c "import os,sys; print(os.path.abspath(os.path.normpath(sys.argv[1])))" "$BASE" 2>/dev/null) || true -[[ -z "$ABS_BASE" ]] && ABS_BASE="$BASE" -[[ "$ABS_BASE" != */ ]] && ABS_BASE="${ABS_BASE}/" -if [[ "$FILE_PATH" != /* ]]; then - ABS_PATH="$ABS_BASE${FILE_PATH#./}" -else - ABS_PATH="$FILE_PATH" -fi -ABS_PATH=$(python3 -c "import os,sys; print(os.path.abspath(os.path.normpath(sys.argv[1])))" "$ABS_PATH" 2>/dev/null) || true -[[ -z "$ABS_PATH" ]] && ABS_PATH="$ABS_BASE${FILE_PATH#./}" - -deny() { - local reason="$1" - echo "Blocked: $ABS_PATH — $reason" >&2 - python3 -c " -import json -print(json.dumps({ - 'hookSpecificOutput': { - 'hookEventName': 'PreToolUse', - 'permissionDecision': 'deny', - 'permissionDecisionReason': '$reason' - } -})) -" - exit 0 -} - -# Protected patterns -PROTECTED_PATTERNS=( - ".git/" - ".env" - ".env.local" - "node_modules/" - "package-lock.json" - "scripts/deploy-config.sh" -) - -for pattern in "${PROTECTED_PATTERNS[@]}"; do - if [[ "$ABS_PATH" == *"$pattern"* ]] || [[ "$ABS_PATH" == *"/$pattern" ]]; then - deny "Edit blocked: path matches protected pattern ($pattern)" - fi -done - -# .env.*.local -if [[ "$ABS_PATH" =~ \.env\..*\.local$ ]]; then - deny "Edit blocked: .env.*.local files contain secrets" -fi - -# Ensure path is under project root -if [[ "$ABS_PATH" != "$ABS_BASE"* ]] && [[ "$ABS_PATH" != "$BASE"* ]]; then - deny "Edit blocked: path is outside project directory" -fi - -exit 0 diff --git a/.claude/memory/MEMORY.md b/.claude/memory/MEMORY.md deleted file mode 100644 index 0584a700..00000000 --- a/.claude/memory/MEMORY.md +++ /dev/null @@ -1,43 +0,0 @@ -# Archipelago Project Memory Index - -## Setup & Architecture -- [claude-proxy-setup.md](claude-proxy-setup.md) — Claude proxy OAuth setup details -- [deploy-automation.md](deploy-automation.md) — Deploy script automation TODOs (API key, AIUI nginx, swap) - -## Servers & Deploy -- [project_environments.md](project_environments.md) — Four environments: dev mode, dev server/prod, demo -- [tailscale_servers.md](tailscale_servers.md) — Tailscale server details (archipelago-2, archipelago-3) -- [reference_tailscale_nodes.md](reference_tailscale_nodes.md) — All node IPs and SSH commands -- [second-server.md](second-server.md) — Second dev server (archipelago-2 via Tailscale) -- [third-server.md](third-server.md) — Third dev server (archipelago-3 via Tailscale) - -## Features & Plans -- [pending-features.md](pending-features.md) — Feature requests: kiosk mode, sideloading, Nostr login, etc. -- [project-plan.md](project-plan.md) — Overall project plan status -- [web-only-apps.md](web-only-apps.md) — Web-only apps (L484 category) and iframe compatibility - -## User Feedback -- [feedback_app_display_modes.md](feedback_app_display_modes.md) — App browser: 3 display modes with persistent setting -- [feedback_fullscreen_modals.md](feedback_fullscreen_modals.md) — Fullscreen modal preferences -- [feedback_local_dev.md](feedback_local_dev.md) — Local dev: use `cd neode-ui && ./start-dev.sh` -- [feedback_apps_always_direct_port.md](feedback_apps_always_direct_port.md) — Apps MUST open at direct port, NEVER proxy paths -- [feedback_indeedhub_nginx_ips.md](feedback_indeedhub_nginx_ips.md) — IndeedHub nginx must use hardcoded container IPs -- [feedback_searxng_no_cap_drop.md](feedback_searxng_no_cap_drop.md) — SearXNG: no cap-drop ALL - -## ISO Build -- [iso-build-session-2026-03-10.md](iso-build-session-2026-03-10.md) — ISO build session notes -- [unbundled-iso.md](unbundled-iso.md) — Unbundled ISO approach notes - -## Infrastructure -- [project_bitcoin_rpc_auth.md](project_bitcoin_rpc_auth.md) — Bitcoin rpcauth, system Tor, reboot survival, container resilience - -## Deploy & Container Fixes -- [project_deploy_session_2026_03_22.md](project_deploy_session_2026_03_22.md) — Fleet deploy fixes: credential mismatches, restart storms, rootless port 80, deploy script hardening - -## Gamepad Navigation -- [project_gamepad_nav.md](project_gamepad_nav.md) — Controller nav system, key files, patterns, Chromium gotchas - -## Completed Work -- [project_mesh_198_issue.md](project_mesh_198_issue.md) — Mesh .198: 3 bugs fixed and deployed -- [project_indeedhub_arch3_fix.md](project_indeedhub_arch3_fix.md) — IndeedHub Arch 3: corrupted combined tarball fixed -- [project_demo_deploy.md](project_demo_deploy.md) — Demo prod deployment via Portainer diff --git a/.claude/memory/claude-proxy-setup.md b/.claude/memory/claude-proxy-setup.md deleted file mode 100644 index b79565c4..00000000 --- a/.claude/memory/claude-proxy-setup.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: claude-proxy-oauth-primary-only -description: Only the primary server (192.168.1.228) uses OAuth proxy for Claude. Second server uses standard API key. -type: feedback ---- - -- Only the primary dev server (192.168.1.228) uses the Claude Max OAuth proxy for AIUI -- The second dev server and any other deployments should use a standard Anthropic API key (`ANTHROPIC_API_KEY` env var) -- Do not replicate the OAuth proxy setup to other servers diff --git a/.claude/memory/deploy-automation.md b/.claude/memory/deploy-automation.md deleted file mode 100644 index 7444e928..00000000 --- a/.claude/memory/deploy-automation.md +++ /dev/null @@ -1,18 +0,0 @@ ---- -name: deploy-automation-todos -description: Items to incorporate into the deploy script for automated setup — Claude API key, AIUI nginx config, swap -type: project ---- - -## Deploy Script Automation TODOs - -These items should be added to `scripts/deploy-to-target.sh` so new server deploys are self-contained: - -1. **Claude API key setup**: After deploying the proxy, auto-configure `/opt/archipelago/claude-proxy/.env.local` with the `ANTHROPIC_API_KEY`. On non-primary servers, use the standard API key (not OAuth). The key is: `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` - -2. **AIUI nginx location block**: Ensure nginx config includes a proper `/aiui/` location block so static JS/CSS files are served with correct MIME types. Without this, AIUI fails to load modules. - -3. **Swap space**: Deploy script should check for swap and create 4GB if missing (`fallocate -l 4G /swapfile && mkswap && swapon + fstab entry`). - -4. **Primary server (192.168.1.228)**: 4GB swap configured on 2026-03-11. -5. **Second server (archipelago-2)**: 4GB swap configured on 2026-03-11. diff --git a/.claude/memory/feedback_app_display_modes.md b/.claude/memory/feedback_app_display_modes.md deleted file mode 100644 index 320ee986..00000000 --- a/.claude/memory/feedback_app_display_modes.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: App display modes -description: App session browser should support 3 display modes - right panel, full overlay, and fullscreen - with a persistent setting -type: feedback ---- - -App session views (the built-in browser for launching apps) should support three display modes, controlled by a setting dropdown in the header bar: - -1. **Display in right panel** — app loads inside the dashboard's right content area (sidebar visible) -2. **Display over whole app** — app overlays the entire viewport including sidebar (like old AppLauncherOverlay with `fixed inset-0 z-[2400]`) -3. **Open fullscreen** — uses browser Fullscreen API for true fullscreen - -**Why:** The user likes the right-panel approach (screenshot showed it working well) but also wants the option to go full overlay or fullscreen. The setting should persist (localStorage) and apply to all apps globally. - -**How to apply:** Store the preference in localStorage. The header bar should have a dropdown/toggle with icons for the three modes. Default to "right panel" mode. diff --git a/.claude/memory/feedback_apps_always_direct_port.md b/.claude/memory/feedback_apps_always_direct_port.md deleted file mode 100644 index c58ecfdc..00000000 --- a/.claude/memory/feedback_apps_always_direct_port.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Apps MUST open at direct port — NEVER proxy paths -description: CRITICAL — All apps in iframes must open at their direct port (http(s)://{host}:{port}), NEVER through /app/{id}/ proxy paths. This is the #1 cause of broken app loading across all nodes. -type: feedback ---- - -## CRITICAL RULE: Apps load at DIRECT PORT, never proxy paths - -All Archipelago apps that open in iframes MUST use the direct port URL: -``` -{protocol}://{hostname}:{port} -``` - -**NEVER** use path-based proxy URLs like `/app/indeedhub/` or `/app/mempool/` for iframe loading. Path proxies break apps because: -1. The main nginx SPA catch-all serves the Archipelago dashboard instead of the app -2. sub_filter URL rewrites break client-side routing in Vue/React apps -3. Different nodes have different nginx configs — path proxies are unreliable - -**Why:** This was broken THREE TIMES in one session (2026-03-17). Every time the iframe URL used a proxy path instead of the direct port, the app showed the Archipelago dashboard or a blank page. .228 and .198 work correctly because they use HTTP which naturally hits the direct port. Tailscale nodes use HTTPS which was falling through to the proxy path. - -**How to apply:** -- In `AppSession.vue`, apps like IndeedHub must ALWAYS construct `{protocol}://{hostname}:{port}` — even on HTTPS -- The `HTTPS_PROXY_PATHS` mapping should NOT include apps that have X-Frame-Options removed (like IndeedHub) -- When adding new apps: use PORT_APPS for the port mapping, do NOT add to HTTPS_PROXY_PATHS unless absolutely necessary -- The deploy script removes X-Frame-Options from IndeedHub's internal nginx, enabling direct port iframe access - -**Also critical for IndeedHub specifically:** -- IndeedHub nginx MUST use hardcoded container IPs (not DNS names) — see feedback_indeedhub_nginx_ips.md -- nostr-provider.js must be injected via sub_filter in the IndeedHub internal nginx -- SearXNG must NOT use --cap-drop ALL — see feedback_searxng_no_cap_drop.md - -**When recreating containers:** -- NEVER recreate containers without reapplying ALL patches (X-Frame-Options removal, nostr-provider injection, IP hardcoding) -- After any container IP change (restart, recreation), update the hardcoded IPs in IndeedHub's nginx config -- Deploy the SAME frontend build to ALL nodes — version mismatch causes different behavior diff --git a/.claude/memory/feedback_asset_workflow.md b/.claude/memory/feedback_asset_workflow.md deleted file mode 100644 index 4dc21386..00000000 --- a/.claude/memory/feedback_asset_workflow.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -name: Asset workflow - designer makes images -description: User is a designer — never generate PNG/JPEG/SVG assets, only provide specs. TUI/text animations are Claude's job. -type: feedback ---- - -Never generate PNG, JPEG, or SVG image assets. The user is a designer and will always create these manually. - -**Claude's job:** TUI text, animations, shell scripts, code -**User's job:** PNG, JPEG, SVG, any visual/graphic assets - -When images are needed, provide clear specs (dimensions, format, constraints, where they go) and let the user create them. - -**Why:** User is a professional designer. Auto-generated pixel art looks generic compared to their actual brand artwork. - -**How to apply:** When boot splash, logos, icons, or any visual assets are needed, output a spec sheet with dimensions/format/constraints. Never run image generation scripts as part of the build. diff --git a/.claude/memory/feedback_deploy_patterns.md b/.claude/memory/feedback_deploy_patterns.md deleted file mode 100644 index be0095e8..00000000 --- a/.claude/memory/feedback_deploy_patterns.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -name: Deploy container patterns -description: Hard-won deploy patterns — rootless port 80, credential sync, health checks, image export -type: feedback ---- - -Container deploy patterns learned from fleet-wide deploy sessions. - -**Rootless port 80:** Containers binding port 80 MUST use `--user 0:0`. `NET_BIND_SERVICE` cap doesn't work in rootless Podman. - -**Why:** Discovered across multiple containers (FileBrowser, Nextcloud, Vaultwarden, Jellyfin) that `--cap-add NET_BIND_SERVICE` is silently ignored in rootless mode. Only `--user 0:0` works. - -**Credential sync:** MariaDB/Postgres only read env vars on FIRST init. If deploy generates new random passwords in `secrets/` but the DB data dir already exists, the DB keeps the OLD password. Fix: either wipe data dir + reinit, or `ALTER USER` to sync. - -**Image export:** Always export custom images as INDIVIDUAL tarballs (`podman save -o name.tar`). Combined tarballs corrupt image IDs. - -**Health checks:** Every container should have `--health-cmd`. Currently 25+ containers have them. - -**How to apply:** Check these patterns in any deploy script changes or new container additions. diff --git a/.claude/memory/feedback_fullscreen_modals.md b/.claude/memory/feedback_fullscreen_modals.md deleted file mode 100644 index 50c84e99..00000000 --- a/.claude/memory/feedback_fullscreen_modals.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -name: Full-screen modals -description: App session modals and overlays must cover the full viewport, not just the right panel area of the dashboard -type: feedback ---- - -Modals and app session overlays must be **full screen** — covering the entire viewport including the sidebar/nav. Do NOT constrain them to just the right content panel of the dashboard layout. - -**Why:** The user has corrected this multiple times. Modals that only cover the right panel look wrong and don't provide an immersive app experience. - -**How to apply:** When creating overlays, modals, or app session views, use `position: fixed; inset: 0; z-index: 2400+` to cover the entire screen. The existing AppLauncherOverlay already does this correctly with `class="fixed inset-0 z-[2400]"` — follow that pattern. On mobile it should be truly fullscreen (no padding/margins). On desktop, the glass panel with margins (md:p-10, md:rounded-2xl) is fine. diff --git a/.claude/memory/feedback_gamepad_unfinished.md b/.claude/memory/feedback_gamepad_unfinished.md deleted file mode 100644 index 1a8e415e..00000000 --- a/.claude/memory/feedback_gamepad_unfinished.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: Gamepad navigation unfinished -description: Gamepad/controller nav rewrite (aada1975) shipped but has issues — needs further work -type: feedback ---- - -Gamepad navigation rewrite was committed (aada1975) and included in CI ISO builds, but user reports it's not working correctly. Issues: -- Can't exit input fields with up/down arrow keys when other elements are available -- Navigation behavior not right (unspecified details — need to investigate) - -**Why:** The rewrite was a major change to `useControllerNav.ts` and focus management. Shipped in beta but needs polish. -**How to apply:** When touching gamepad/controller nav, treat as unfinished work. Test arrow key behavior on inputs, focus trap logic, and spatial navigation thoroughly. diff --git a/.claude/memory/feedback_indeedhub_nginx_ips.md b/.claude/memory/feedback_indeedhub_nginx_ips.md deleted file mode 100644 index 4e1c48e3..00000000 --- a/.claude/memory/feedback_indeedhub_nginx_ips.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -name: IndeedHub nginx must use hardcoded container IPs -description: IndeedHub's internal nginx config MUST use hardcoded container IPs, not DNS names — podman DNS resolver 127.0.0.11 is unreliable and causes 502 errors -type: feedback ---- - -IndeedHub's internal nginx config MUST use hardcoded container IPs (e.g., `http://10.89.2.11:4000`) for API, MinIO, and relay upstreams — NEVER DNS names like `http://api:4000` with `resolver 127.0.0.11`. - -**Why:** Podman's embedded DNS resolver (127.0.0.11) is unreliable inside containers. Using DNS names with `resolver 127.0.0.11 valid=30s ipv6=off;` causes nginx to return 502 Bad Gateway errors. This broke IndeedHub on all Tailscale nodes — frontend loaded but API calls failed. - -**How to apply:** After creating/recreating IndeedHub containers, the deploy script must: -1. Look up container IPs: `podman inspect indeedhub-build_api_1 --format "{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}"` -2. Patch the nginx config inside the container to replace DNS names with IPs -3. Also patch `$host` → `$http_host` in proxy_set_header Host directives -4. Reload nginx inside the container - -This is now handled in `deploy-to-target.sh` and `fix-indeedhub-containers.sh`. diff --git a/.claude/memory/feedback_local_dev.md b/.claude/memory/feedback_local_dev.md deleted file mode 100644 index 8f4e4bf7..00000000 --- a/.claude/memory/feedback_local_dev.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: Local Frontend Dev Workflow -description: How to start the local frontend dev environment — use start-dev.sh from neode-ui/, NOT npm start from root -type: feedback ---- - -Run local frontend dev from `neode-ui/` directory: `./start-dev.sh` (NOT `npm start` from project root — there's no root package.json). - -**Why:** The project root has no package.json. Running `npm start` there fails with ENOENT. The frontend dev script lives in `neode-ui/start-dev.sh`. - -**How to apply:** -- `cd neode-ui && ./start-dev.sh` — clears ports, starts Docker apps, runs `npm run dev:mock` (mock backend on :5959, Vite on :8100) -- Stop with `./stop-dev.sh` or Ctrl+C -- Login password in dev mode: `password123` -- When telling the user how to test locally, always reference `cd neode-ui && ./start-dev.sh` diff --git a/.claude/memory/feedback_logo_ascii.md b/.claude/memory/feedback_logo_ascii.md deleted file mode 100644 index 262e8c80..00000000 --- a/.claude/memory/feedback_logo_ascii.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -name: Archipelago ASCII logo — never change -description: The block-letter ASCII art logo for Archipelago is locked in. Use this exact design everywhere. -type: feedback ---- - -The Archipelago ASCII block-letter logo is finalized. Never change it. - -``` -█▀█ █▀▄ █▀▀ █ █ █ █▀█ █▀▀ █ █▀█ █▀▀ █▀█ -█▀█ █▀▄ █ █▀█ █ █▀▀ ██▀ █ █▀█ █ █ █ █ -▀ ▀ ▀ ▀ ▀▀▀ ▀ ▀ ▀ ▀ ▀▀▀ ▀▀▀ ▀ ▀ ▀▀▀ ▀▀▀ -``` - -Uses ▀ ▄ █ block characters. 45 chars wide, fits in any 52+ col box. -Render in Bitcoin orange (`\033[38;5;208m`) by default. - -**Why:** User explicitly approved this logo and said "save that never change." -**How to apply:** Use this for all TUI contexts — install screens, MOTD, menu banners, boot displays. Replace the old spaced-out `a r c h i p e l a g o` text with this wherever a banner is needed. diff --git a/.claude/memory/feedback_searxng_no_cap_drop.md b/.claude/memory/feedback_searxng_no_cap_drop.md deleted file mode 100644 index bbf2ef30..00000000 --- a/.claude/memory/feedback_searxng_no_cap_drop.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -name: SearXNG must NOT use --cap-drop ALL -description: SearXNG container needs write access to /etc/searxng/ for settings.yml — cap-drop ALL causes Permission denied and exit 127 -type: feedback ---- - -Do NOT use `--cap-drop ALL` or `--security-opt no-new-privileges:true` when creating the SearXNG container. SearXNG needs to create `/etc/searxng/settings.yml` on first run. - -**Why:** SearXNG's entrypoint creates a settings file from a template. With `--cap-drop ALL`, it gets "Permission denied: can't create '/etc/searxng/settings.yml'" and exits with code 127. The .228 reference server runs SearXNG with default capabilities (only drops CAP_AUDIT_WRITE, CAP_MKNOD, CAP_NET_RAW). - -**How to apply:** When creating SearXNG containers, use: -```bash -sudo podman run -d --name searxng --restart unless-stopped -p 8888:8080 docker.io/searxng/searxng:latest -``` -No `--cap-drop ALL`, no `--security-opt no-new-privileges:true`. diff --git a/.claude/memory/iso-build-session-2026-03-10.md b/.claude/memory/iso-build-session-2026-03-10.md deleted file mode 100644 index e7755ef2..00000000 --- a/.claude/memory/iso-build-session-2026-03-10.md +++ /dev/null @@ -1,84 +0,0 @@ -# ISO Build Session — 2026-03-10 - -## Status: Changes ready, NOT yet deployed or built - -All changes are local. Servers were unreachable at end of session (network issue, not crash). -Need to: deploy to .228 → build new ISO → copy to File Browser Builds folder. - -## Changes Made (Local, Uncommitted) - -### 1. ISO Login Fix (`image-recipe/build-auto-installer-iso.sh`) -- **Problem**: `chpasswd` fails silently in chroot (PAM not available), leaving password locked -- **Fix**: Direct `/etc/shadow` manipulation with `sed` using SHA-512 hash from `openssl passwd -6` -- Pre-computed hash as fallback if openssl unavailable -- Verification check + chpasswd fallback -- Also added `root:archipelago` password in Dockerfile -- **Credentials**: `archipelago` / `archipelago` (TTY/SSH), `password123` (Web UI) - -### 2. Onboarding "Server Starting Up" UX (4 Vue files) -- **Problem**: On fresh install, backend takes 2-5 min to start. Onboarding shows scary error messages. -- **OnboardingDid.vue**: Replaced 3-attempt retry with persistent auto-retry every 4s. Shows "Server starting up" with elapsed timer (e.g. `1:23`) to the right. Keeps trying until backend responds. -- **OnboardingIdentity.vue**: Detects 502/503, shows orange "Server is still starting up" instead of red error. -- **OnboardingBackup.vue**: Same friendly server-starting message. -- **OnboardingVerify.vue**: Same friendly server-starting message. - -### 3. First-Boot Container Fixes (`scripts/first-boot-containers.sh`) -- **Problem**: Race conditions — services start before dependencies are ready -- Added `wait_for_container()` function with configurable timeout and logging -- **Bitcoin Knots**: Added RPC health check wait (up to 60s) before LND/NBXplorer/mempool start -- **BTCPay PostgreSQL**: Replaced `sleep 3` with `pg_isready` health check (up to 30s) -- **Mempool MariaDB**: Replaced `sleep 3` with connection check (up to 30s) -- **File Browser**: Removed `--read-only` and `--cap-drop ALL` (was preventing database creation). Added separate `/database` volume mount. - -### 4. Build Skill Updated (`.claude/skills/build-iso/SKILL.md`) -- Added "Post-build: Publish to File Browser" step -- ISO gets copied to `/var/lib/archipelago/filebrowser/Builds/` after every build - -## Fresh Install Issues Found on .198 -- Login was broken (fixed in #1) -- Onboarding showed 502 errors at every step (fixed in #2) -- Containers not launching: Bitcoin Knots, BTCPay, File Browser, Grafana, LND (fixed in #3) -- File Browser specifically: `--read-only` prevented database creation (fixed in #3) -- Could not fully diagnose .198 — went offline before SSH diagnostic completed - -## Deploy Steps When Servers Are Back -```bash -# 1. Deploy to live server -./scripts/deploy-to-target.sh --live - -# 2. Sync build script -rsync -avz -e "ssh -i ~/.ssh/archipelago-deploy" \ - image-recipe/build-auto-installer-iso.sh \ - archipelago@192.168.1.228:~/archy/image-recipe/ - -# 3. Sync first-boot script -rsync -avz -e "ssh -i ~/.ssh/archipelago-deploy" \ - scripts/first-boot-containers.sh \ - archipelago@192.168.1.228:~/archy/scripts/ - -# 4. Build ISO on server -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'cd ~/archy/image-recipe && sudo ./build-auto-installer-iso.sh' - -# 5. Copy to File Browser -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'sudo mkdir -p /var/lib/archipelago/filebrowser/Builds && \ - sudo cp ~/archy/image-recipe/results/archipelago-installer-x86_64.iso \ - /var/lib/archipelago/filebrowser/Builds/' - -# 6. Download to Mac -scp -i ~/.ssh/archipelago-deploy \ - archipelago@192.168.1.228:~/archy/image-recipe/results/archipelago-installer-x86_64.iso \ - ~/Downloads/ -``` - -## Files Modified (git diff summary) -- `image-recipe/build-auto-installer-iso.sh` — password fix + Dockerfile root password -- `scripts/first-boot-containers.sh` — health checks + filebrowser fix -- `scripts/deploy-to-target.sh` — Tor permission fixes (from earlier) -- `neode-ui/src/views/OnboardingDid.vue` — auto-retry with timer -- `neode-ui/src/views/OnboardingIdentity.vue` — server-starting detection -- `neode-ui/src/views/OnboardingBackup.vue` — server-starting detection -- `neode-ui/src/views/OnboardingVerify.vue` — server-starting detection -- `.claude/skills/build-iso/SKILL.md` — added File Browser publish step -- Frontend already built: `web/dist/neode-ui/` is up to date diff --git a/.claude/memory/pending-features.md b/.claude/memory/pending-features.md deleted file mode 100644 index 3f7a74da..00000000 --- a/.claude/memory/pending-features.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: pending-ui-features -description: Feature requests — completed and pending items for the next deployment cycle -type: project ---- - -## Completed (2026-03-11) - -1. **IndieHub in iframe** — Restored. Removed forced new-tab check in `mustOpenInNewTab()`. -2. **App uninstall fix** — Backend now logs errors and returns structured response instead of silently swallowing. -3. **Login music stops after auth** — Added `stopAllAudio()` + router afterEach guard. -4. **Container scanner dev_mode gate removed** — Scanner runs always now. -5. **BotFights app** — Added as web-only app with SVG icon. Opens in new tab (X-Frame-Options blocks iframe). -6. **L484 web apps** — Added 6 web-only apps: NWNN, 484 Kitchen, Call the Operator, Arch Presentation, Syntropy Institute, T-0. L484 category in marketplace. -7. **Kiosk mode** — `/kiosk` route added, `setup-kiosk.sh` installs systemd service, systemd units in image-recipe/configs/. No full-screen iframe overlay — uses standard appLauncher. -8. **AIUI first-install fix** — nginx `try_files` changed to `=404`, Chat.vue probes AIUI availability before loading iframe. -9. **Web-only apps in My Apps** — Injected synthetic PackageDataEntry objects in Apps.vue. Web-only apps sorted first (alphabetically before container apps). No uninstall/start/stop buttons. Launch uses appLauncher with correct URLs. - -## Pending - -1. **Nostr NIP-07 login for containers** — Sign into container apps using onboarding Nostr keys. Not started. -2. **App sideloading** — Settings page to load apps via Docker/OCI image URL. Not started. -3. **Encrypted Nostr peer handshake (NIP-04/NIP-44)** — Exchange Tor onion addresses via encrypted DMs instead of public relay events. Not started. Currently onion addresses are published in plaintext on relays. -4. **Third server deploy** — archipelago-3.tail2b6225.ts.net needs SSH key setup and first deploy. -5. **Kiosk auto-start on servers** — setup-kiosk.sh exists but needs to be run on each server that has a display attached. Not confirmed running on .228. -6. **Deploy to .198** — Secondary server not yet deployed with latest changes. diff --git a/.claude/memory/project-plan.md b/.claude/memory/project-plan.md deleted file mode 100644 index b25ed994..00000000 --- a/.claude/memory/project-plan.md +++ /dev/null @@ -1,292 +0,0 @@ -# Archipelago 3-Year Project Plan - -**Version**: 1.0 -**Period**: March 2026 -- March 2029 -**Goal**: Production-ready Bitcoin Node OS with zero issues for end users -**Visual constraint**: NEVER change animations, user experience, or visuals -- only neater layouts where highlighted - -## Current Status: Year 1, Q1, Sprint 1 (Starting) - ---- - -## Year 1: Foundation & Core Functionality (March 2026 -- February 2027) - -### Q1 2026 (March -- May): Fix Broken UI, Testing Infrastructure, Networking - -#### Sprint 1: Test Infrastructure (Week 1-2) -- [ ] Install Vitest and configure frontend test runner -- [ ] Create first frontend unit tests: RPC client (8+ test cases) -- [ ] Create frontend unit tests: app store (6+ test cases) -- [ ] Create frontend unit tests: container store (5+ test cases) -- [ ] Create frontend unit tests: router guards (6+ test cases) -- [ ] Create backend integration test scaffolding -- [ ] Create backend unit tests: auth module (6+ test cases) -- [ ] Create backend unit tests: identity module (5+ test cases) -- [ ] Add CI-compatible test runner script (scripts/run-tests.sh) - -#### Sprint 2: Fix Broken UI (Week 3-4) -- [ ] Fix Settings.vue: replace .path-option-card with .glass-card -- [ ] Fix Web5.vue top bar: verify glass sub-card consistency with Server.vue -- [ ] Remove duplicate network diagnostics from Settings.vue -- [ ] Server.vue: wire real RPC data to Local Network card -- [ ] Server.vue: wire real RPC data to Web3 card (show "Coming Soon") - -#### Sprint 3: Backend Robustness (Week 5-6) -- [ ] Add system monitoring RPC endpoints (system.stats, system.processes, system.temperature) -- [ ] Add system monitoring to frontend Dashboard (CPU/RAM/Disk gauges) -- [ ] Add WiFi/Ethernet configuration RPC endpoints -- [ ] Add WiFi/Ethernet UI to Server.vue -- [ ] Implement CSRF protection on RPC layer -- [ ] Fix CORS policy: restrict to same-origin -- [ ] Add Nginx security headers - -#### Sprint 4: Quality Baseline (Week 7-8) -- [ ] Run full sweep and record baseline in docs/quality-baseline.md -- [ ] Fix all silent catch blocks -- [ ] Remove all console.log in production paths -- [ ] Eliminate any-type usage in frontend -- [ ] Health-gated deploy: add pre-deploy health check -- [ ] Run canary deploy to secondary server - -### Q2 2026 (June -- August): DWN, Backup/Restore, Kiosk Mode, StartOS Independence - -#### Sprint 5: DWN Protocol Implementation (Week 1-3) -- [ ] Implement DWN message store (dwn_store.rs) -- [ ] Implement DWN HTTP API (POST /dwn) -- [ ] Implement DWN peer sync protocol -- [ ] Add DWN management UI (DwnManager.vue) -- [ ] Add DWN RPC endpoints for protocol management - -#### Sprint 6: Full Backup/Restore System (Week 4-5) -- [ ] Extend backup module for full system backup -- [ ] Add backup/restore RPC endpoints -- [ ] Add backup/restore UI to Settings -- [ ] Add backup to USB drive support - -#### Sprint 7: Kiosk Mode Hardening (Week 6-7) -- [ ] Add kiosk mode crash recovery -- [ ] Add kiosk failsafe route (/recovery) -- [ ] Add kiosk-specific keyboard shortcuts -- [ ] Create kiosk systemd service - -#### Sprint 8: StartOS Independence (Week 8-10) -- [ ] Audit StartOS code usage → docs/startos-dependency-audit.md -- [ ] Migrate essential StartOS utilities to archipelago -- [ ] Remove core/startos from workspace -- [ ] Run full regression test after removal - -### Q3 2026 (September -- November): App Integration, Auto-Updates, ARM64 - -#### Sprint 9: App Integration Testing (Week 1-3) -- [ ] Create app integration test suite (scripts/test-all-apps.sh) -- [ ] Fix all app integration failures -- [ ] Test dependency chains -- [ ] Test fresh install end-to-end - -#### Sprint 10: Auto-Update System (Week 4-6) -- [ ] Implement update download and apply -- [ ] Add update notification to frontend -- [ ] Implement automatic update scheduling -- [ ] Create release manifest infrastructure - -#### Sprint 11: ARM64 Support (Week 7-9) -- [ ] Set up ARM64 cross-compilation -- [ ] Test ARM64 container images -- [ ] Build ARM64 ISO -- [ ] Test ARM64 on Raspberry Pi 5 - -#### Sprint 12: Quality Hardening (Week 10-12) -- [ ] Achieve 50% frontend test coverage -- [ ] Achieve 50% backend test coverage -- [ ] Run overnight chaos test -- [ ] Run full quality sweep vs baseline - -### Q4 2026 (December -- February 2027): Security, Performance, Beta - -#### Sprint 13: Security Hardening (Week 1-3) -- [ ] Implement session expiry and rotation -- [ ] Harden container security profiles -- [ ] Add secrets rotation mechanism -- [ ] Sanitize FileBrowser path traversal -- [ ] Remove FileBrowser token from URLs -- [ ] Run automated security scan - -#### Sprint 14: Performance Optimization (Week 4-6) -- [ ] Profile and optimize backend startup (<3s) -- [ ] Optimize frontend bundle size (<500KB gzipped) -- [ ] Add WebSocket connection pooling and heartbeat -- [ ] Optimize container image pull performance - -#### Sprint 15: Beta Release Prep (Week 7-10) -- [ ] Create comprehensive user documentation -- [ ] Create beta testing checklist -- [ ] Build and test beta ISO -- [ ] Publish v0.5.0-beta release -- [ ] Run 72-hour stability test - ---- - -## Year 2: Feature Completeness & Reliability (March 2027 -- February 2028) - -### Q1 2027 (March -- May): W3C DIDs, JSON-LD VCs, Hardware Wallet - -#### Sprint 16: W3C-Compliant DIDs (Week 1-3) -- [ ] Implement W3C DID Document format -- [ ] Implement DID Document verification -- [ ] Update DID display in Web5.vue -- [ ] Add DID resolution across peers - -#### Sprint 17: JSON-LD Verifiable Credentials (Week 4-6) -- [ ] Implement JSON-LD credential format -- [ ] Add credential presentation protocol -- [ ] Add credential management UI - -#### Sprint 18: Hardware Wallet Integration (Week 7-10) -- [ ] Research and document hardware wallet integration -- [ ] Implement PSBT signing flow in LND RPC -- [ ] Add hardware wallet UI flow -- [ ] Add USB hardware wallet detection - -### Q2 2027 (June -- August): Multi-Node, VPN, Community Marketplace - -#### Sprint 19: Multi-Node Orchestration (Week 1-4) -- [ ] Design multi-node architecture -- [ ] Implement node federation protocol -- [ ] Add multi-node dashboard -- [ ] Implement federated app deployment - -#### Sprint 20: VPN and Mesh Networking (Week 5-8) -- [ ] Add Tailscale/WireGuard VPN integration -- [ ] Add VPN status to Server.vue -- [ ] Implement mesh networking discovery -- [ ] Add DNS-over-HTTPS configuration - -#### Sprint 21: Community App Marketplace (Week 9-12) -- [ ] Design decentralized marketplace protocol -- [ ] Implement marketplace manifest discovery -- [ ] Implement app manifest publishing -- [ ] Add community marketplace tab to frontend - -### Q3 2027 (September -- November): Documentation, Reliability, Pre-Release - -#### Sprint 22: Comprehensive Documentation (Week 1-3) -- [ ] Write developer documentation -- [ ] Write API documentation -- [ ] Write app developer SDK documentation -- [ ] Create Architecture Decision Records - -#### Sprint 23: Reliability Engineering (Week 4-8) -- [ ] Implement graceful shutdown -- [ ] Add crash recovery -- [ ] Implement disk space management -- [ ] Add container health monitoring and auto-recovery -- [ ] Run 1-week continuous uptime test - -#### Sprint 24: Pre-Release Quality (Week 9-12) -- [ ] Achieve 70% frontend test coverage -- [ ] Achieve 70% backend test coverage -- [ ] Run full regression screenshot comparison -- [ ] Publish v0.8.0-rc1 release candidate - -### Q4 2027 (December -- February 2028): Polish, Community, v0.9.0 - -#### Sprint 25: User Experience Polish (Week 1-4) -- [ ] Run complete UX audit -- [ ] Fix all UX audit findings -- [ ] Polish error handling across entire frontend -- [ ] Polish all forms - -#### Sprint 26: Community Infrastructure (Week 5-8) -- [ ] Set up update server infrastructure -- [ ] Create community contribution guidelines -- [ ] Set up issue tracker and roadmap -- [ ] Publish v0.9.0 release - ---- - -## Year 3: Production Polish & Scale (March 2028 -- March 2029) - -### Q1 2028 (March -- May): Monitoring, Remote Management, Accessibility - -#### Sprint 27: Advanced Monitoring (Week 1-4) -- [ ] Implement real-time metrics collection -- [ ] Add monitoring dashboard page -- [ ] Implement alerting system -- [ ] Add historical data export - -#### Sprint 28: Remote Management (Week 5-8) -- [ ] Implement Tailscale-based remote access -- [ ] Add mobile-optimized remote management -- [ ] Implement remote notification system - -#### Sprint 29: Accessibility and i18n (Week 9-12) -- [ ] Add ARIA labels and roles -- [ ] Add keyboard navigation testing -- [ ] Set up i18n infrastructure - -### Q2 2028 (June -- August): Pen Testing, Final QA - -#### Sprint 30: Security Penetration Testing (Week 1-4) -- [ ] Run automated penetration test suite -- [ ] Manual security review of all RPC endpoints -- [ ] Harden Podman container isolation -- [ ] Add rate limiting to all sensitive endpoints - -#### Sprint 31: End-to-End QA (Week 5-8) -- [ ] Create golden path test suite -- [ ] Run regression test across all hardware -- [ ] Achieve 80% test coverage -- [ ] Run 30-day soak test - -#### Sprint 32: Documentation and Community (Week 9-12) -- [ ] Write troubleshooting guide -- [ ] Create walkthrough documentation -- [ ] Finalize all ADRs -- [ ] Publish v0.95.0-rc2 - -### Q3 2028 (September -- November): v1.0 Release - -#### Sprint 33: Final Polish (Week 1-4) -- [ ] Final UX audit -- [ ] Final security audit -- [ ] Final sweep -- [ ] Performance benchmark and optimize - -#### Sprint 34: Release Engineering (Week 5-8) -- [ ] Create release automation -- [ ] Set up download/update infrastructure -- [ ] Write v1.0 release notes -- [ ] Build v1.0.0 release ISOs - -#### Sprint 35: Launch (Week 9-12) -- [ ] Tag and publish v1.0.0 -- [ ] Run 7-day post-release monitoring -- [ ] Create v1.1 roadmap - -### Q4 2028 (December -- February 2029): Maintenance - -#### Sprint 36-39: Ongoing -- [ ] Monthly dependency update cycle -- [ ] Monthly security scan -- [ ] Quarterly quality sweep -- [ ] Community app reviews -- [ ] Plan v2.0 features - ---- - -## Milestone Summary - -| Date | Milestone | Key Deliverables | -|------|-----------|-----------------| -| May 2026 | Q1 Complete | Tests, UI fixes, security, quality baseline | -| Aug 2026 | Q2 Complete | DWN, backup/restore, kiosk, StartOS independence | -| Nov 2026 | Q3 Complete | App testing, auto-updates, ARM64 | -| Feb 2027 | **v0.5.0-beta** | First public beta | -| Nov 2027 | **v0.8.0-rc1** | Release candidate | -| Feb 2028 | **v0.9.0** | Pre-release | -| Nov 2028 | **v1.0.0** | Production release | - -## Execution Method -- Execute via `/overnight` skill — each session picks up next uncompleted tasks -- Full detailed acceptance criteria in the original plan conversation -- Track progress by checking off items in this file as [x] diff --git a/.claude/memory/project_app_registry.md b/.claude/memory/project_app_registry.md deleted file mode 100644 index cea2b218..00000000 --- a/.claude/memory/project_app_registry.md +++ /dev/null @@ -1,19 +0,0 @@ ---- -name: App Registry Setup -description: Archipelago app container registry at 80.71.235.15:3000 (Gitea) — marketplace images mirrored there -type: project ---- - -Archipelago app registry running on Gitea at `80.71.235.15:3000`, org `archipelago`. - -**Why:** Self-hosted container registry so Archipelago nodes pull app images from our infrastructure instead of Docker Hub/ghcr.io. Critical for unbundled ISO installs where apps are downloaded on-demand. - -**How to apply:** -- Registry URL: `80.71.235.15:3000/archipelago/:` -- HTTP only (insecure) — nodes need `registries.conf` with `insecure = true` -- ISO build bakes the insecure registry config into `/home/archipelago/.config/containers/registries.conf` -- Marketplace data in `neode-ui/src/views/marketplace/marketplaceData.ts` uses `REGISTRY` constant -- 34 images pushed from .228 on 2026-03-26 -- NOT pushed yet: Thunderhub, Penpot (not on .228) -- Gitea instance deployed via Portainer on `80.71.235.15:9443` -- Login: podman login 80.71.235.15:3000 (credentials set up on .228) diff --git a/.claude/memory/project_bitcoin_rpc_auth.md b/.claude/memory/project_bitcoin_rpc_auth.md deleted file mode 100644 index 17d716f4..00000000 --- a/.claude/memory/project_bitcoin_rpc_auth.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Bitcoin RPC rpcauth architecture -description: Bitcoin uses rpcauth (salted hash in config, password in secrets file), system Tor for containers, reboot survival -type: project ---- - -Bitcoin RPC uses `rpcauth` — salted HMAC-SHA256 hash in bitcoin.conf, plaintext password in `/var/lib/archipelago/secrets/bitcoin-rpc-password`. Credentials are STABLE across reboots, restarts, deploys. - -**Why:** Cookie auth rotates on every Bitcoin restart, breaking all dependent containers with env-var-only credentials. The `rpcauth` approach keeps the password stable while never exposing plaintext in config files or CLI args. - -**How to apply:** -- Bitcoin: reads rpcauth from bitcoin.conf (no CLI credential flags, config generated by first-boot or deploy) -- LND: `bitcoind.rpcuser/rpcpass` in lnd.conf (NOT rpccookie — LND v0.18.4 doesn't support it) -- All containers: read password from secrets file at creation time, passed via env vars -- Rust backend `bitcoin_rpc.rs`: reads from secrets file, cached with OnceCell -- bitcoin-ui: mounts `/var/lib/archipelago/secrets:/secrets:ro`, start.sh reads password and injects nginx auth header -- System Tor: `SocksPort 0.0.0.0:9050` + SocksPolicy, containers use `host.containers.internal:9050` -- `podman-restart.service` enabled for container auto-start after reboot -- Tor hidden service hostnames copied to `/var/lib/archipelago/tor-hostnames/` for readable access -- .198 ElectrumX points at .228's full Bitcoin node (pruned node can't run ElectrumX locally) -- Health monitor interval: 60 seconds — UI may briefly show "crashed" during restarts diff --git a/.claude/memory/project_cicd_setup.md b/.claude/memory/project_cicd_setup.md deleted file mode 100644 index 094a4744..00000000 --- a/.claude/memory/project_cicd_setup.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: CI/CD Setup -description: Gitea Actions CI/CD — runner on .228, workflow builds unbundled ISO on push to main -type: project ---- - -CI/CD pipeline using Gitea Actions on git.tx1138.com. - -**Why:** Automatic ISO builds on every push to main. ISOs copied to FileBrowser /Builds/ for download. - -**How to apply:** -- Gitea repo: `git.tx1138.com/lfg2025/archy` -- Runner: .228 registered as `archipelago-builder` with label `ubuntu-latest:host` -- Runner service: `gitea-runner.service` (systemd, runs as archipelago user) -- Runner config: `~/.runner` on .228 -- Workflow: `.gitea/workflows/build-iso.yml` — unbundled ISO only -- Uses `https://git.tx1138.com/actions/checkout@v4` (NOT github.com actions) -- Builds: backend (cargo), frontend (npm), then ISO with `UNBUNDLED=1` -- Output: copied to `/var/lib/archipelago/filebrowser/Builds/` -- act_runner v0.2.11 installed at `/usr/local/bin/act_runner` diff --git a/.claude/memory/project_container_orchestration.md b/.claude/memory/project_container_orchestration.md deleted file mode 100644 index 64b6dd29..00000000 --- a/.claude/memory/project_container_orchestration.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Container Orchestration Hardening -description: Container orchestration overhaul — stop grace periods, pull retry, persistent restart tracking, scheduled remediation, failsafe install, boot reconciliation -type: project ---- - -Container orchestration hardening implemented on dev-iso branch (2026-03-28). - -**Why:** Gitea issue requesting true orchestration. Containers were unreliable — 10s stop timeout risked Bitcoin Core UTXO corruption, image pulls failed silently, restart counters reset on process restart enabling infinite loops, doctor/reconcile scripts only ran manually. - -**What was done (7 changes):** -1. Per-container stop grace periods (600s bitcoin, 330s lnd, 300s electrs, 120s databases, 60s btcpay, 30s default) + systemd TimeoutStopSec=660 -2. Image pull retry with exponential backoff (3 attempts: 5s/15s/45s) + post-pull verification + stacks.rs error propagation instead of silent swallow -3. Resolved container/health_monitor.rs TODO (documented as orchestrator-level responsibility) -4. Persistent restart tracking to restart-tracker.json (survives process restarts, seeded on startup) -5. Scheduled systemd timers: container-doctor every 30min, reconcile-containers every 6h -6. Failsafe install: post-pull image verify, rollback on start failure, 30s post-start health check with crash diagnosis -7. Boot reconciliation: runs reconcile-containers.sh after crash recovery completes - -**How to apply:** These changes affect beta reliability. The other programmer is working on custom base ISO on the same branch — coordinate on build-auto-installer-iso.sh changes. diff --git a/.claude/memory/project_demo_deploy.md b/.claude/memory/project_demo_deploy.md deleted file mode 100644 index d81a83e2..00000000 --- a/.claude/memory/project_demo_deploy.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -name: Demo Deploy Status -description: Status and details of the demo prod server deployment via Portainer Stacks from Gitea repos -type: project ---- - -## Demo Prod Deployment — In Progress (2026-03-17) - -### Two Separate Portainer Stacks - -**1. IndeedHub** — DEPLOYED SUCCESSFULLY on :7755 -- Repo: `https://git.tx1138.com/lfg2025/indee-demo` -- Compose: `docker-compose.yml` (root) -- Env vars loaded from `.env.portainer` — update DOMAIN, FRONTEND_URL, S3_PUBLIC_BUCKET_URL -- APP_PORT defaulted to 7755 (changed from 7777 to avoid conflicts) -- Healthcheck fix: pg_isready uses `${POSTGRES_USER}` env var (was hardcoded) -- Full 7-service stack: app, api, postgres, redis, minio, minio-init, relay, ffmpeg-worker -- Nostr auth is built-in (NIP-98) — users sign in with browser extension (Alby, nos2x) - -**2. Archipelago** — DEPLOYING (last attempt pending) -- Repo: `https://git.tx1138.com/lfg2025/archy-demo` -- Compose: `docker-compose.demo.yml` -- Env vars: `ANTHROPIC_API_KEY` for Claude chat -- Port: 4848 -- Pre-built frontend in `web-dist/` (built locally on Mac, no server-side build) -- Backend: `neode-ui/Dockerfile.backend` (Node mock backend on :5959) -- Web: `neode-ui/Dockerfile.web` (nginx serving pre-built static files) - -### Issues Resolved So Far -- IndeedHub postgres healthcheck hardcoded username → fixed to use env var -- Port 7777 conflict → changed to 7755 -- Archy repo too large (8GB) for Portainer clone → created lightweight `archy-demo` repo -- Frontend build failing on server → switched to pre-built static files (no npm/vite on server) -- `.dockerignore` blocking `neode-ui/dist` → moved to `web-dist/` at repo root -- Docker build cache stale → moved dist outside neode-ui to avoid gitignore conflicts - -### Current Blocker -- Last deploy attempt: Docker build cache may still be referencing old paths -- If still failing: need to prune Docker build cache on server (`docker builder prune`) - -### Frontend Changes Made -- `Apps.vue` and `AppDetails.vue`: IndeedHub removed from WEB_ONLY_APP_URLS (linter change) -- IndeedHub will be accessed as a real container or via direct URL to :7755 - -### Repo Structure (archy-demo) -``` -archy-demo/ -├── docker-compose.demo.yml -├── .dockerignore -├── web-dist/ ← pre-built Vue frontend (from local Mac build) -├── demo/aiui/ ← pre-built AIUI chat app -└── neode-ui/ ← source + mock backend + docker configs - ├── Dockerfile.web ← nginx + copy web-dist (no build) - ├── Dockerfile.backend ← Node mock backend - ├── docker/nginx-demo.conf - ├── docker/docker-entrypoint.sh - ├── mock-backend.js - └── src/... -``` - -**Why:** Demo for showcasing Archipelago + IndeedHub together. Needs to be functional with nostr signing. -**How to apply:** When resuming, check if Portainer deploy succeeded. If not, may need to SSH to prune Docker cache or debug further. diff --git a/.claude/memory/project_deploy_session_2026_03_22.md b/.claude/memory/project_deploy_session_2026_03_22.md deleted file mode 100644 index b8f0e53d..00000000 --- a/.claude/memory/project_deploy_session_2026_03_22.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -name: Deploy session 2026-03-22 findings -description: Comprehensive deploy/build fixes made overnight — container issues, image tags, script improvements, remaining work -type: project ---- - -## Session Summary (2026-03-22 overnight) - -Massive deploy infrastructure overhaul across all 5 nodes (.228, .198, Arch 1/2/3). - -### Fixed in deploy-tailscale.sh -- **Image tags**: Bitcoin Knots `28.1` (not `v28.1`), BTCPay `1.13.7` (not `1.14.5`), SearXNG `2026.3.20-6c7e9c197` -- **Removed Immich** (3 containers) and **Penpot** (5 containers) from deploy + build -- **Fedimint**: `FM_REL_NOTES_ACK=0_4_xyz` env var (NOT `FM_SKIP_REL_NOTES_ACK` or `FM_REQ_RELEASE_NOTES_ACK_V0_4`) -- **Fedimint-gateway**: `--password` instead of `--bcrypt-password-hash` (v0.5.1 CLI change) -- **FileBrowser**: added `--cap-add NET_BIND_SERVICE` for port 80 binding -- **SearXNG**: added `/var/lib/archipelago/searxng:/etc/searxng` volume mount + caps -- **Postgres**: pinned to `postgres:15` (data initialized with 15, incompatible with 16) -- **Migration**: one-time flag file `/var/lib/archipelago/.rootless-migrated` -- **Recreate-if-broken pattern**: containers that exist but are stopped get deleted and recreated -- **Arch 2 hostname**: fixed from hardcoded hostname to `$TAILSCALE_ARCH2` -- **Custom UI images**: graceful skip if not available, source extracted to repo (`docker/bitcoin-ui/`, `docker/electrs-ui/`) -- **AIUI tar xattr**: silenced with `--no-xattrs` (only in deploy-tailscale.sh, NOT deploy-to-target.sh yet) -- **Nginx MIME warning**: removed `text/html` from `sub_filter_types` - -### Added -- `--fleet` flag in deploy-to-target.sh: deploys .228 → .198 → Arch 1/2/3 -- `--both` lock fix: releases lock before recursive `--live` call -- Container verification step (Step 26b): restarts exited containers, fixes permissions, checks Tor -- IndeedHub backend stack rebuilt on .228 (7 containers) -- IndeedHub nginx patched with direct IPs (podman DNS doesn't work with nginx resolver) - -### Frontend changes -- Replaced Immich with FileBrowser on Setup homescreen (`goals.ts`, `EasyHome.vue`) -- `MEMPOOL_API_IMAGE` renamed to `MEMPOOL_BACKEND_IMAGE` in image-versions.sh -- Nextcloud downgraded from 30 to 29 (one major version upgrade at a time) - -### Session 2 fixes (same day) - -**Critical pattern found: Container credential mismatches** -- Deploy generates random passwords stored in `secrets/`. MariaDB/Postgres only use env vars on FIRST init — subsequent restarts ignore them. Container recreation with new passwords → auth failures → crash loops. -- 50,000+ cumulative container restarts across fleet from this single root cause. - -**Fixes applied to all nodes:** -1. LND: `lnd.conf` rpcpass synced from `secrets/bitcoin-rpc-password` (was hardcoded `archipelago123`) -2. MariaDB mempool: data dirs wiped + reinitialized (password mismatch unrecoverable) -3. BTCPay Postgres: `ALTER USER` to sync password with secrets -4. FileBrowser: `--user 0:0` instead of `--cap-add NET_BIND_SERVICE` (rootless port 80 fix) -5. Nextcloud: same `--user 0:0` fix -6. Tailscale container on .228: removed (2,685 restarts — unauthenticated, host already has TS) - -**Deploy script fixes:** -- `deploy-tailscale.sh`: LND config always synced before start, `eval "$DB_PASSWORDS"` → safe individual reads, MariaDB password sync step, filebrowser `--user 0:0` -- `deploy-to-target.sh`: LND stale config check now compares passwords (not just cookie/localhost), filebrowser `--user 0:0` - -**Rootless port 80 rule**: Containers binding port 80 MUST use `--user 0:0`. `NET_BIND_SERVICE` cap doesn't work in rootless (UID 0 → host 100000, unprivileged). - -### Session 3 fixes (2026-03-22 to 2026-03-24) - -**Additional container fixes applied live:** -- PhotoPrism: recreated with proper `/photoprism/storage`, `/photoprism/originals`, `/photoprism/import` volume mounts (all 3 nodes) -- Vaultwarden/Jellyfin: recreated with `--user 0:0` + health checks (Arch 1/2) -- Nextcloud: downgraded image to v29 (data initialized with v28, can't skip to v30) -- Fedimint: upgraded v0.5.1 → v0.10.0 on all Tailscale nodes -- Fedimint-gateway: bcrypt hash passed via file mount (shell escaping workaround) -- SearXNG: recreated with proper caps on Arch 2 -- Arch 3 right-sized: stopped immich (3), jellyfin, vaultwarden, nbxplorer (7.3GB RAM) - -**Deploy script improvements (6 commits pushed):** -1. `d37165ca` — Credential sync, health checks, rootless port binding -2. `f5714a5b` — Fleet deploy falls back to Tailscale when LAN unreachable, `--all` alias -3. `028248df` — Suppress tar xattr spam in AIUI deploy (`--no-xattrs`) -4. `f5802f9e` — Fix LND config SSH escaping, Tailscale fallback for BUILD_SOURCE -5. `06d85e1d` — Fix health check escaping for SSH heredoc (`--health-cmd 'cmd'` not `"cmd"`) -6. `a7920de8` — Correct health check endpoints (fedimint→8175, nextcloud→`/`, filebrowser→`/`) - -**Health checks added to deploy-tailscale.sh:** -- 25 containers now have `--health-cmd` in deploy-tailscale.sh (was zero) -- Key corrections: fedimint checks port 8175 (UI) not 8174 (websocket), nextcloud/filebrowser check `/` not custom endpoints - -**Fleet status at end of session:** - -| Node | Status | Notes | -|------|--------|-------| -| .228 | 36/36, 0 unhealthy, load 1.0 | Fully stable | -| Arch 1 | 25/25, 0 unhealthy, load 0.5 | Fully stable | -| Arch 2 | 25/25, 0 unhealthy, load 0.2 | Fully stable | -| Arch 3 | 24/28, 0 unhealthy, load 7.7 | Right-sized for 7.3GB RAM, Bitcoin IBD at 97.8% | -| .198 | Bitcoin chain data empty (4KB) | Needs full IBD — will take days. Not pruned. | - -### Remaining for next session -- **.198**: Bitcoin doing full IBD from scratch (chain data was lost/empty). No prune flag set. Will take days. -- **Arch 3**: Bitcoin IBD was at 97.8% — check if complete, then start LND/nbxplorer -- **Tor config Python syntax errors** in deploy-to-target.sh step 33 (cosmetic, falls back to system Tor) -- **deploy-to-target.sh** still missing health checks (only deploy-tailscale.sh has them) -- **first-boot-containers.sh** needs same rootless fixes (filebrowser `--user 0:0`, credential sync) -- **Fedimint guardian setup** not done on any node — all in "Setup UI" mode -- User needs to `git pull && ./scripts/deploy-to-target.sh --all` to deploy latest fixes to Tailscale nodes diff --git a/.claude/memory/project_environments.md b/.claude/memory/project_environments.md deleted file mode 100644 index 58134979..00000000 --- a/.claude/memory/project_environments.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: Four Environments -description: Clear distinction between dev mode (local mock), dev server (228), demo (Portainer), and prod (same as dev server) -type: project ---- - -Four distinct environments — use correct terminology: - -| Name | What | Where | Backend | Deploy | -|------|------|-------|---------|--------| -| **Dev mode** | Local macOS, mock backend | `localhost:8100` | `mock-backend.js` on `:5959` | `npm run dev:mock` | -| **Dev server / Prod** | Primary build/test/live server | `192.168.1.228` (+ fleet) | Real Rust backend + Podman | `deploy-to-target.sh --live` | -| **Demo** | Public demo instance | Remote server | Mock Node.js via Docker | Portainer Stacks / `docker-compose.demo.yml` | - -- Dev server and prod are the SAME machine (192.168.1.228) — "prod" just means "the live deployment" -- Demo is completely separate — user deploys via Portainer UI, Claude has no SSH access -- Dev mode is local-only, no containers needed, fastest iteration - -**Why:** User corrected ambiguous usage of "dev servers (prod)" — these are the same thing, not two separate environments. - -**How to apply:** Always say "dev mode" for local mock, "dev server" or "prod" for 228, "demo" for the Portainer instance. Never conflate them. diff --git a/.claude/memory/project_gamepad_nav.md b/.claude/memory/project_gamepad_nav.md deleted file mode 100644 index b0e35f40..00000000 --- a/.claude/memory/project_gamepad_nav.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -name: Gamepad Navigation System -description: Controller/gamepad navigation architecture, key decisions, known issues, and the nav map doc location -type: project ---- - -Gamepad/controller navigation is a core feature of Archipelago — the UI runs on a kiosk with Xbox-style controller input. - -**Why:** Archipelago runs on dedicated hardware with a TV/monitor + gamepad. Every page must be fully navigable without a mouse. - -**How to apply:** When modifying any page's interactive elements, check that `data-controller-container` and `tabindex` are set correctly. Read `neode-ui/docs/GAMEPAD-NAV-MAP.md` for the full per-page navigation spec and implementation notes. - -## Key files -- `neode-ui/src/composables/useControllerNav.ts` — all navigation logic -- `neode-ui/docs/GAMEPAD-NAV-MAP.md` — full nav spec with per-page tables, implementation notes, and Chromium gotchas - -## Critical patterns -- Cards on grid pages: `glass-card transition-all hover:-translate-y-1` + `data-controller-container tabindex="0"` -- Settings page is a MIXED page (containers + standalone buttons) — nav searches both together -- ToggleSwitch has `tabindex="-1"` + `data-controller-ignore` so gamepad skips it -- Focus glow uses blurred box-shadow, NOT `0 0 0 Npx` spread (Chromium compositor bug with translateZ(0)) -- `outline: none !important` on all containers to kill browser default focus rings diff --git a/.claude/memory/project_indeedhub_arch3_fix.md b/.claude/memory/project_indeedhub_arch3_fix.md deleted file mode 100644 index a949969f..00000000 --- a/.claude/memory/project_indeedhub_arch3_fix.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: IndeedHub Arch 3 Fix — 2026-03-17 -description: Fixed IndeedHub on Arch 3 (100.124.105.113) — corrupted image tarball was root cause, all 7 containers now running -type: project ---- - -## Status: FIXED and working (verified 2026-03-17) - -IndeedHub on Arch 3 (`100.124.105.113`) is fully operational — all 7 containers running, frontend on :7777, API healthy, NIP-07 nostr-provider injected. - -## Root Cause - -The `/tmp/indeedhub-all-images.tar` on Arch 3 was corrupted — `podman save` with multiple images collapsed ALL 7 images to the same image ID (the frontend nginx image `7222645f0b38`). So redis, minio, API, ffmpeg-worker, postgres, and relay were all running the frontend nginx binary. - -**Why:** `podman save` with multiple images sharing layers can produce broken tarballs where all images get the same config/ID. - -## What Was Done - -1. Removed all broken containers and images -2. Pulled fresh standard images from Docker Hub (postgres:16-alpine, redis:7-alpine, minio:latest, nostr-rs-relay:latest) -3. Exported each custom image as **individual tarballs** from .228 (NOT combined): - - `indeedhub-frontend.tar` (149MB, ID: `7222645f0b38`) - - `indeedhub-api.tar` (403MB, ID: `2ae2665fc6c7`) - - `indeedhub-ffmpeg.tar` (525MB, ID: `cb05b5cf8c25`) -4. Transferred via Mac (`.228` → Mac → Arch 3 over Tailscale) -5. Loaded images individually, created all 7 containers manually (bypassed the deploy script's broken `podman load` step) -6. Copied nostr-provider.js + nginx config with sub_filter from .228 container into Arch 3 container via `podman cp` - -## Remaining Issue — Deploy Script - -The deploy script at `/tmp/deploy-indeedhub.sh` on Arch 3 still references the broken `/tmp/indeedhub-all-images.tar`. If it's run again it will re-corrupt the images. The individual tarballs (`/tmp/indeedhub-frontend.tar`, `/tmp/indeedhub-api.tar`, `/tmp/indeedhub-ffmpeg.tar`) are on Arch 3 and should be used instead. - -**How to apply:** Next time deploying IndeedHub to any node, always export images individually, never as a combined tarball. Consider updating the deploy script to load individual tarballs. diff --git a/.claude/memory/project_iso_size_reduction.md b/.claude/memory/project_iso_size_reduction.md deleted file mode 100644 index 6111c48d..00000000 --- a/.claude/memory/project_iso_size_reduction.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -name: ISO Size Reduction Plan -description: Plan to reduce ISO from 3.9GB — prioritized phases for post-beta -type: project ---- - -Current ISO: ~3.9GB (unbundled). Target: <1.5GB. - -**Why:** Debian Live base (~800MB) + rootfs with kiosk/Podman/firmware (~2.1GB) + squashfs overhead. - -**Phase 1 — Quick wins (post-beta, ~500MB-1GB savings):** -- Strip unused firmware blobs (WiFi chipsets, GPU) -- Remove build-only packages from rootfs (not needed at runtime) -- `--no-install-recommends` in all apt installs -- Strip debug symbols from binaries -- Remove man pages, docs, locale data (`localepurge`) - -**Phase 2 — Minimal base (~1-1.5GB savings):** -- Replace Debian Live ISO with custom `debootstrap --variant=minbase` live image -- Make kiosk (X11 + Chromium ~400MB) optional / separate overlay -- Alpine-based rootfs alternative - -**Phase 3 — Long term (<1GB target):** -- Custom kernel with only needed modules -- A/B read-only root partition (no live boot infrastructure) -- Network installer variant (tiny ISO, needs internet) -- Reproducible builds with exact dep trees - -**How to apply:** Each phase is independent. Phase 1 is safe to do anytime. Phase 2 requires testing the boot chain. Phase 3 is architectural. diff --git a/.claude/memory/project_mesh_198_issue.md b/.claude/memory/project_mesh_198_issue.md deleted file mode 100644 index c5a3799c..00000000 --- a/.claude/memory/project_mesh_198_issue.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Mesh .198 fix — COMPLETED -description: Fixed mesh radio on .198 — duplicate init, no reconnect on write fail, wrong device path. All deployed. -type: project ---- - -## Status: COMPLETED (2026-03-17) - -Three bugs were found and fixed: - -1. **Duplicate mesh init in `server.rs`** — removed duplicate block -2. **Serial write failures don't trigger reconnection** — added `consecutive_write_failures` counter, bail after 3 -3. **Device path on .198** — set `/var/lib/archipelago/mesh-config.json` to `/dev/ttyUSB1` - -All changes deployed to both .228 and .198. - -### Files Changed -- `core/archipelago/src/server.rs` — removed duplicate mesh/transport init block -- `core/archipelago/src/mesh/listener.rs` — added write failure tracking + reconnection -- `neode-ui/src/stores/mesh.ts` — fixed TS union type for `typed_payload` diff --git a/.claude/memory/project_repo_cleanup_and_dev_env.md b/.claude/memory/project_repo_cleanup_and_dev_env.md deleted file mode 100644 index 4cfc1ab7..00000000 --- a/.claude/memory/project_repo_cleanup_and_dev_env.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -name: v1.3.0 Session Status (March 20) -description: Tor management system, bug fixes, federation name sync — cloud files working both ways -type: project ---- - -## Deployed to .228 + .198 - -### What's Live -- Full Tor hidden service management (systemd path unit pattern — tor-helper.sh) -- Container doctor: system Tor preferred, archy-tor container removed -- Federation name sync: server rename pushes to peers -- Cloud files working both ways over Tor -- Arch channel local echo for sent messages -- Web5 Message button → Mesh redirect -- Node names in federation/peers -- PeerFiles header shows name + DID (not onion) -- Connected Nodes flex height -- Server name persistence (root-owned file fixed) -- Tor services UI: add from installed apps, delete, restart, auth/protocol badges -- Layout: Network Interfaces + Tor Services stack on normal screens - -### Architecture: Tor Management -- Backend writes staged torrc + action file to /var/lib/archipelago/tor-config/ -- systemd path unit (archipelago-tor-helper.path) triggers root-level service -- tor-helper.sh processes actions: write-torrc-and-restart, restart, delete-service, sync-hostnames -- NoNewPrivileges=yes safe — no sudo from backend -- Container doctor ensures system Tor stays running after deploys -- Web apps: port 80 on .onion → local app port; Protocol services: direct port - -### Onion Addresses (current) -- .228 archipelago: r33p5uzk2vxhdte4a5pfqgeax44a7b2lx57q32dxmx5llzyfz42lwnyd.onion -- .198 archipelago: mxn62m4odavwctlpsq2ozvhy3ibjpenlzemumwtkev7wviikttxvjhyd.onion - -### Still TODO -1. **Tor channel chat** — messages via Archipelago channel need testing/polish -2. **ISO build** — update build-auto-installer-iso.sh with tor-helper, systemd units, container doctor changes -3. **Better error messaging** — when nodes are down, addresses changed, all situations -4. **File access permissions** — public (no auth), federated (full access), peer-set (specific files) -5. **Auth on Tor app access** — login before accessing app via .onion (post-beta candidate) -6. **.198 health check** — deploy health check times out on .198 (backend works, likely timing) - -**Why:** Session continuity for v1.3.0 beta stabilization effort. -**How to apply:** Read at start of next session. Work on TODO items in order. diff --git a/.claude/memory/project_session_20260328.md b/.claude/memory/project_session_20260328.md deleted file mode 100644 index 3ff749a3..00000000 --- a/.claude/memory/project_session_20260328.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: ISO Session 2026-03-28 Handoff -description: Session handoff — branding overhaul, ISOLINUX config updated, terminal banners redesigned, UEFI still broken -type: project ---- - -## Session State (2026-03-28 ~latest) - -### Branding Overhaul (this session) - -**ISOLINUX boot menu:** -- Config updated: menu centered (HSHIFT 28, WIDTH 26), title "Bitcoin Node OS" -- Selection: white on dark, hotkeys in Bitcoin orange (#fb923c) -- Tab message: "Press TAB to edit | https://archipelago.sh" -- MENU RESOLUTION kept at 1024x768 (uses GRUB background.png) -- Three options: Install Archipelago, Install (verbose), Boot from local disk - -**Terminal banners — unified design across all screens:** -- Name: "A R C H I P E L A G O" (uppercase, spaced, bold white) -- Separator: orange line -- Subtitle: dim text (varies by context) -- Colors: basic ANSI (works on bare-metal console, not 256-color) -- Width: fits 80-col terminals (no overflow/clipping) -- Build script auto-install.sh: centered + adaptive-width boxes -- Standalone scripts: fixed 52-char boxes - -**Files changed:** -- build-auto-installer-iso.sh: ISOLINUX config, colors (256 to basic ANSI), case, header + completion -- build/debian-iso/custom/etc/profile.d/z99-archipelago.sh: full rewrite -- build/debian-iso/custom/archipelago/auto-start.sh: full rewrite -- archipelago-scripts/archipelago-menu.sh: full rewrite -- build/debian-iso/custom/isolinux/stdmenu.cfg, menu.cfg, live.cfg: updated -- branding/generate-isolinux-splash.py: new file (640x480 splash generator, optional) - -### Outstanding Issues -- UEFI boot broken — drops to grub> prompt, only Legacy BIOS works -- ISOLINUX resolution kept at 1024x768, may clip on some hardware -- Install + onboarding logs confirmed present on .198 (5 log files) -- Need to review actual log content from .198 - -### Target Machine -- Dell on .198, Legacy BIOS, password: archipelago diff --git a/.claude/memory/reference_tailscale_nodes.md b/.claude/memory/reference_tailscale_nodes.md deleted file mode 100644 index cf5b9084..00000000 --- a/.claude/memory/reference_tailscale_nodes.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -name: Node inventory and SSH access -description: Complete list of all Archipelago nodes — LAN and Tailscale IPs, SSH commands, build capabilities, deploy methods -type: reference ---- - -## LAN Nodes -| Name | IP | SSH | Notes | -|------|-----|-----|-------| -| Primary (.228) | 192.168.1.228 | `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` | Full build env, CI runner, OAuth proxy | -| Secondary (.198) | 192.168.1.198 | `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.198` | Full build env | - -## Tailscale Nodes -| Name | Tailscale IP | Hostname | SSH | Build? | -|------|-------------|----------|-----|--------| -| Arch 1 | 100.82.97.63 | — | `ssh -i ~/.ssh/archipelago-deploy archipelago@100.82.97.63` | Unknown | -| Arch 2 | 100.122.84.60 | archipelago-2.tail2b6225.ts.net | `ssh -i ~/.ssh/archipelago-deploy archipelago@archipelago-2.tail2b6225.ts.net` | Yes (Node, Rust, Podman) | -| Arch 3 | 100.124.105.113 | archipelago-3.tail2b6225.ts.net | `ssh -i ~/.ssh/archipelago-deploy archipelago@100.124.105.113` | No (Podman only, copy pre-built artifacts) | -| Arch Atob | 100.113.33.31 | — | `ssh -i ~/.ssh/archipelago-deploy archipelago@100.113.33.31` | Unknown | - -## Deploy Methods -- **LAN nodes (.228, .198):** `./scripts/deploy-to-target.sh --both` -- **Arch 2:** `ARCHIPELAGO_TARGET="archipelago@archipelago-2.tail2b6225.ts.net" ./scripts/deploy-to-target.sh --live` -- **Arch 3:** SCP pre-built binary + frontend tarball (no build tools). Do NOT relay through .228 — SSH directly from Mac. -- **All nodes:** Use `~/.ssh/archipelago-deploy` key diff --git a/.claude/memory/second-server.md b/.claude/memory/second-server.md deleted file mode 100644 index 626062e0..00000000 --- a/.claude/memory/second-server.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: second-dev-server -description: Second dev server accessible via Tailscale at archipelago-2.tail2b6225.ts.net, Ryzen 7 7840U, 14GB RAM -type: project ---- - -- Hostname: archipelago-2.tail2b6225.ts.net (Tailscale) -- SSH: `ssh -i ~/.ssh/archipelago-deploy archipelago@archipelago-2.tail2b6225.ts.net` -- Password: ThunderDome6574839201! -- CPU: AMD Ryzen 7 7840U (faster than primary i3-8100T) -- RAM: 14GB -- Disk: 916GB NVMe -- OS: Debian 12 (Bookworm) x86_64 -- Has: Podman 4.3.1, Node.js v20.20.1, Rust 1.94.0, Nginx 1.22.1 -- Swap: 4GB configured -- Deploy: `ARCHIPELAGO_TARGET="archipelago@archipelago-2.tail2b6225.ts.net" ./scripts/deploy-to-target.sh --live` -- Does NOT use OAuth proxy — uses standard ANTHROPIC_API_KEY for Claude/AIUI -- First-boot containers created on 2026-03-11 (Bitcoin Knots, LND, Fedimint, PhotoPrism, Ollama, etc.) - -## Pending Fixes for Next Deploy -- **AIUI MIME type error**: Nginx needs a `/aiui/` location block serving correct MIME types for JS files. Currently JS files get wrong content-type causing module load failures. -- **Self-signed cert warnings**: Expected on fresh deploy, not a bug. -- **Container connection errors in AIUI console**: Expected until all containers finish starting and syncing. diff --git a/.claude/memory/tailscale_servers.md b/.claude/memory/tailscale_servers.md deleted file mode 100644 index 82639dc0..00000000 --- a/.claude/memory/tailscale_servers.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: Tailscale Servers -description: Archipelago Tailscale servers (archipelago-2, archipelago-3) — hostnames, SSH access, and deploy notes -type: reference ---- - -## Tailscale Servers - -- **archipelago-2**: `archipelago@archipelago-2.tail2b6225.ts.net` - - SSH key auth works (`~/.ssh/archipelago-deploy`) - - Has Node.js, npm, Cargo/Rust, Podman — can do full builds - - Deploy: `ARCHIPELAGO_TARGET="archipelago@archipelago-2.tail2b6225.ts.net" ./scripts/deploy-to-target.sh --live` - -- **archipelago-3**: `archipelago@archipelago-3.tail2b6225.ts.net` (IP: 100.124.105.113) - - SSH key auth works (key added 2026-03-12) - - Has Podman only — NO Node.js, NO Rust/Cargo - - Cannot build on-server; must copy pre-built binary + frontend tarball - - Deploy method: SCP binary from archipelago-2 or local, upload frontend tarball, extract to `/opt/archipelago/web-ui/` - -**How to apply:** For archipelago-2, use the standard deploy script with `ARCHIPELAGO_TARGET`. For archipelago-3, copy pre-built artifacts (binary + frontend tarball) since it lacks build tools. diff --git a/.claude/memory/third-server.md b/.claude/memory/third-server.md deleted file mode 100644 index 6b7f3b07..00000000 --- a/.claude/memory/third-server.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -name: third-dev-server -description: Third dev server accessible via Tailscale at archipelago-3.tail2b6225.ts.net, password ThisIsWeb54321@ -type: project ---- - -- Hostname: archipelago-3.tail2b6225.ts.net (Tailscale) -- SSH: `sshpass -p 'ThisIsWeb54321@' ssh -o StrictHostKeyChecking=no archipelago@archipelago-3.tail2b6225.ts.net` -- Password: ThisIsWeb54321@ -- Deploy: `ARCHIPELAGO_TARGET="archipelago@archipelago-3.tail2b6225.ts.net" ./scripts/deploy-to-target.sh --live` -- SSH key NOT yet installed — need to copy `~/.ssh/archipelago-deploy.pub` manually -- Added 2026-03-11 diff --git a/.claude/memory/unbundled-iso.md b/.claude/memory/unbundled-iso.md deleted file mode 100644 index 3fc612ee..00000000 --- a/.claude/memory/unbundled-iso.md +++ /dev/null @@ -1,30 +0,0 @@ -# Unbundled ISO Build (In Progress) - -## Status: NOT YET BUILT -- Server was unreachable (SSH timeout) when we tried to build — user rebooting -- Changes are in working tree only, NOT YET COMMITTED - -## What Was Done -- Created `image-recipe/build-unbundled-iso.sh` — thin wrapper that sets `UNBUNDLED=1` and delegates to main script -- Modified `image-recipe/build-auto-installer-iso.sh` to support `UNBUNDLED=1` env var - -## Changes to build-auto-installer-iso.sh -1. Added `UNBUNDLED="${UNBUNDLED:-0}"` config variable -2. Step 3b: Skips container image capture from server AND registry pull (~20 tars) -3. Skips `first-boot-containers.sh` bundling (no images to create containers from) -4. Skips docker UI source bundling (bitcoin-ui, lnd-ui, electrs-ui) -5. Different ISO filename: `archipelago-installer-unbundled-x86_64.iso` -6. Updated installer completion message (tells user to install from Marketplace) -7. Updated build summary output - -## What Still Works in Unbundled -- Full rootfs (Debian 12 + Podman + nginx + SSH) -- Backend binary + web UI captured from server -- Tor setup on first boot -- Image loader service (harmlessly handles empty dir) -- `package.install` already does `podman pull` — Marketplace works out of the box - -## Next Steps -1. Rsync updated scripts to dev server (192.168.1.228) -2. Run: `sudo ./build-unbundled-iso.sh` -3. Result appears in: `image-recipe/results/archipelago-installer-unbundled-x86_64.iso` diff --git a/.claude/memory/web-only-apps.md b/.claude/memory/web-only-apps.md deleted file mode 100644 index 8b2ac365..00000000 --- a/.claude/memory/web-only-apps.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: web-only-apps -description: Web-only apps (no container) — L484 category, BotFights, IndieHub. Iframe compatibility, nginx proxying, My Apps injection. -type: project ---- - -## Web-Only Apps (added 2026-03-11) - -These apps are external websites embedded via iframe — no Docker container. They show as "installed" in both the marketplace and My Apps. - -### L484 Category -- **NWNN** (nwnn.l484.com) — News aggregator. No X-Frame-Options. Works in iframe directly. -- **484 Kitchen** (484.kitchen) — K484 platform. X-Frame-Options: SAMEORIGIN. Proxied via `/ext/484-kitchen/`. -- **Call the Operator** (cta.tx1138.com) — Decentralization portal. No X-Frame-Options. Works in iframe directly. -- **Arch Presentation** (present.l484.com) — Archipelago presentation. X-Frame-Options: SAMEORIGIN. Proxied via `/ext/arch-presentation/`. -- **Syntropy Institute** (syntropy.institute) — Medicine Reimagined. No X-Frame-Options. Works in iframe directly. -- **T-0** (teeminuszero.net) — Decentralization documentary. No X-Frame-Options. Works in iframe directly. - -### Other Web-Only Apps -- **BotFights** (botfights.net) — X-Frame-Options: SAMEORIGIN + CSP + COEP/COOP/CORP. Proxied via `/ext/botfights/`. Nginx strips all blocking headers. -- **IndeeHub** (archipelago.indeehub.studio) — No X-Frame-Options. Works in iframe directly. - -### Nginx External Proxies -Sites with X-Frame-Options get reverse-proxied through nginx at `/ext/{app-id}/`: -- `proxy_hide_header X-Frame-Options` strips upstream header -- `add_header X-Content-Type-Options "nosniff" always` prevents server-level X-Frame-Options inheritance -- BotFights also strips `Cross-Origin-Embedder-Policy`, `Cross-Origin-Opener-Policy`, `Cross-Origin-Resource-Policy` -- Proxy locations in both HTTP and HTTPS server blocks of nginx-archipelago.conf - -### Frontend Implementation -- **appLauncher.ts**: `EXTERNAL_PROXY` map rewrites external URLs to proxy paths in `toEmbeddableUrl()` -- **Apps.vue**: `WEB_ONLY_APPS` constant with synthetic `PackageDataEntry` objects. Sorted first alphabetically. No uninstall/start/stop buttons. -- **Marketplace.vue**: `dockerImage: ''` + `webUrl` in `getCuratedAppList()`. L484 category. -- **Icons**: `neode-ui/public/assets/img/app-icons/{app-id}.png` (or .svg) diff --git a/.claude/plans/luminous-snacking-snowflake.md b/.claude/plans/luminous-snacking-snowflake.md deleted file mode 100644 index addab609..00000000 --- a/.claude/plans/luminous-snacking-snowflake.md +++ /dev/null @@ -1,138 +0,0 @@ -# Phase 3 & 4: Encrypted Mesh Messaging + Off-Grid Bitcoin Operations - -## Context - -Phase 1 built the mesh radio layer (Meshcore protocol, serial driver, basic chat). Phase 2 added transport abstraction (Mesh>LAN>Tor routing, CBOR delta sync, Reed-Solomon chunking). Current encryption is static X25519 shared secret per peer — no forward secrecy, no message type discrimination, no store-and-forward. - -Phase 3 adds Signal-style Double Ratchet for forward secrecy, typed messages (ALERT, INVOICE, COORDINATE, PSBT_HASH), and store-and-forward relay. Phase 4 adds off-grid Bitcoin operations: block header relay, transaction relay, Lightning invoice relay, and emergency alert system with dead man's switch. - -## Dependencies to Add - -```toml -hkdf = "0.12" # KDF for Double Ratchet chains -lightning-invoice = "0.34" # BOLT11 parsing (LDK standard, MIT) -``` - -Custom Double Ratchet from existing crypto (ed25519-dalek, curve25519-dalek, chacha20poly1305, sha2, hmac) — no DR crate needed. - -## Architecture - -``` -mesh/ -├── x3dh.rs — X3DH key agreement (prekey bundles, 3-way ECDH) -├── ratchet.rs — Double Ratchet state machine (forward secrecy) -├── session.rs — Per-peer session manager (ratchet state persistence) -├── prekey.rs — Prekey store (signed + one-time prekeys, rotation) -├── message_types.rs — Typed message envelope (TEXT/ALERT/INVOICE/COORDINATE/PSBT_HASH) -├── outbox.rs — Store-and-forward queue (24h TTL, relay hops) -├── bitcoin_relay.rs — TX relay, Lightning relay, block header announce -├── alerts.rs — Emergency alerts, dead man's switch -└── (existing files extended: crypto.rs, listener.rs, types.rs, mod.rs) -``` - -## Implementation Steps - -### Week 1: X3DH + HKDF Foundation - -**New**: `mesh/x3dh.rs`, `mesh/prekey.rs` -**Modify**: `Cargo.toml` (+hkdf), `mesh/crypto.rs`, `mesh/mod.rs` - -- `PrekeyBundle`: identity_key + signed_prekey + one_time_prekeys (CBOR, ~200B) -- `PrekeyStore`: disk persistence at `{data_dir}/prekeys/`, rotation, consumption -- X3DH: 3-way ECDH → HKDF-SHA256 → root key for Double Ratchet -- ARCHY:3 identity broadcast with embedded prekey bundle - -### Week 2: Double Ratchet Protocol - -**New**: `mesh/ratchet.rs` (~500 LOC), `mesh/session.rs` (~300 LOC) - -`RatchetState`: DH ratchet keypair, root key, send/recv chain keys, counters, skipped keys (max 100). HKDF-SHA256 chains + ChaCha20-Poly1305 per-message. - -Wire format: 40B header (DH pub + counters) + 12 nonce + ciphertext + 16 tag = 68B overhead. Single frame: 64B plaintext. Chunked: ~2.4KB. - -`SessionManager`: HashMap, lazy load from `{data_dir}/ratchet/{did_hash}.json`. Backward compat: falls back to static shared secret for ARCHY:2 peers. - -### Week 3: Typed Messages + Store-and-Forward - -**New**: `mesh/message_types.rs`, `mesh/outbox.rs` -**Modify**: `mesh/types.rs`, `mesh/listener.rs` - -CBOR envelope: `[0x02] [{ t: u8, v: bytes, ts: u32, sig?: bytes }]` - -Types: TEXT(0), ALERT(1), INVOICE(2), PSBT_HASH(3), COORDINATE(4), PREKEY_BUNDLE(5), SESSION_INIT(6) - -GPS as `Coordinate { lat_microdeg: i32, lng_microdeg: i32 }` — integer only, no float. - -`MeshOutbox`: VecDeque, 24h TTL, max 3 relay hops, disk persistence. Checked every 10s tick. - -### Week 4: RPC Endpoints + Session Bootstrap - -**Modify**: `api/rpc/mesh.rs`, `api/rpc/mod.rs`, `mesh/listener.rs` - -New RPC: `mesh.send-invoice`, `mesh.send-coordinate`, `mesh.send-alert`, `mesh.outbox`, `mesh.session-status`, `mesh.rotate-prekeys` - -Prekey distribution via ARCHY:3 broadcasts. Session init via X3DH on first message to new peer. - -### Week 5: Off-Grid Bitcoin (Phase 4) - -**New**: `mesh/bitcoin_relay.rs`, `mesh/block_headers.rs` -**Modify**: `Cargo.toml` (+lightning-invoice), `api/rpc/mesh.rs` - -Block header relay: Internet node broadcasts `BlockHeaderAnnouncement` (height, hash, Ed25519 sig) on new block. Mesh-only peers display "SPV sync via mesh". - -TX relay: Mesh-only node sends raw tx hex → internet peer calls `sendrawtransaction` → returns txid. - -Lightning relay: Create invoice → send bolt11 → peer pays → proof-of-payment returned. - -### Week 6: Emergency Alerts + Dead Man's Switch - -**New**: `mesh/alerts.rs` - -`DeadManSwitch`: Background task, configurable interval (default 6h), broadcasts signed ALERT with GPS to emergency contacts when triggered. Auto-check-in on any authenticated RPC. - -RPC: `mesh.alert-configure`, `mesh.alert-checkin`, `mesh.alert-test`, `mesh.alert-status` - -### Week 7: Frontend - -**Modify**: `stores/mesh.ts`, `views/Mesh.vue`, `mock-backend.js` - -Message rendering by type: invoice (orange card + Pay button), alert (red card), coordinate (blue card + OSM link), psbt_hash (gray card + Review). - -Session indicator: shield icon (green=ratchet, yellow=static, gray=none). - -Block height in off-grid banner. Alert config panel. Dead man switch toggle. - -### Week 8: Integration Test + Deploy - -E2E on .228 (internet) + .198 (mesh-only): X3DH handshake, 50-message ratchet, invoice relay, TX relay, block headers, dead man switch. Deploy to both servers. - -## New Files (8) - -1. `core/archipelago/src/mesh/x3dh.rs` -2. `core/archipelago/src/mesh/prekey.rs` -3. `core/archipelago/src/mesh/ratchet.rs` -4. `core/archipelago/src/mesh/session.rs` -5. `core/archipelago/src/mesh/message_types.rs` -6. `core/archipelago/src/mesh/outbox.rs` -7. `core/archipelago/src/mesh/bitcoin_relay.rs` -8. `core/archipelago/src/mesh/alerts.rs` - -## Modified Files (8) - -1. `core/archipelago/Cargo.toml` — +hkdf, +lightning-invoice -2. `core/archipelago/src/mesh/crypto.rs` — +hkdf_sha256, +ephemeral keygen -3. `core/archipelago/src/mesh/types.rs` — +message_type, +typed payloads -4. `core/archipelago/src/mesh/listener.rs` — typed dispatch, session bootstrap, relay -5. `core/archipelago/src/mesh/mod.rs` — new submodules, new MeshService methods -6. `core/archipelago/src/api/rpc/mesh.rs` — ~12 new RPC endpoints -7. `core/archipelago/src/api/rpc/mod.rs` — register new routes -8. `neode-ui/src/views/Mesh.vue` — typed rendering, alert UI, session badges - -## Verification - -```bash -cargo test --all-features -- mesh::ratchet mesh::x3dh mesh::session -cargo clippy --all-targets --all-features -cd neode-ui && npm run type-check -./scripts/deploy-to-target.sh --both -``` diff --git a/.claude/plans/memoized-plotting-sifakis.md b/.claude/plans/memoized-plotting-sifakis.md deleted file mode 100644 index c0c0c1e0..00000000 --- a/.claude/plans/memoized-plotting-sifakis.md +++ /dev/null @@ -1,145 +0,0 @@ -# Architecture Review — Fix Remaining Issues - -## Context - -The architecture review (`docs/architecture-review.html`) identified 4 P0, 6 P1, and 6 medium-priority issues across the codebase. After research, **all 4 P0s and 4 of 6 P1s are already fixed**. This plan addresses the remaining open items that improve reliability and security during the beta freeze. - -**What's already fixed:** P0-1 (health RPC), P0-2 (health checks), P0-3 (backup rollback), P0-4 (nginx protections), P1-B (rate limiter cleanup), P1-C (systemd limits), P1-E (WS reconnect), P1-F (Vue error handler), Issue 11 (session async I/O). - -**What we're fixing now (4 items):** - ---- - -## Item 1: Add 10s timeout to 6 bare `client.connect()` calls — DONE - -**Why:** A down Nostr relay hangs the async task indefinitely, blocking identity publishing, node discovery, and marketplace operations. Direct uptime impact. - -### Files & locations - -| File | Line | Function | -|------|------|----------| -| `core/archipelago/src/identity_manager.rs` | 409 | `publish_profile()` | -| `core/archipelago/src/nostr_discovery.rs` | 113 | `publish_node_revocation()` | -| `core/archipelago/src/nostr_discovery.rs` | 200 | `verify_revocation()` | -| `core/archipelago/src/nostr_discovery.rs` | 264 | `discover_archipelago_nodes()` | -| `core/archipelago/src/marketplace.rs` | 298 | `discover()` | -| `core/archipelago/src/marketplace.rs` | 406 | `publish()` | - -### Pattern (from `nostr_handshake.rs:126`) - -Replace each `client.connect().await;` with: -```rust -if tokio::time::timeout(Duration::from_secs(10), client.connect()).await.is_err() { - tracing::warn!("Nostr relay connection timed out after 10s, continuing anyway"); -} -``` - -Ensure `use std::time::Duration;` is imported in each file. `tracing::warn!` is already available in all three files. - -### Risk: LOW — Mechanical pattern replication, no logic changes. - ---- - -## Item 2: Pin all crypto dependency versions exactly — DONE - -**Why:** Floating versions (`"2.1"` instead of `"2.2.0"`) allow `cargo update` to silently change crypto libraries. Supply chain risk + project rules violation. - -### Versions (verified from Cargo.lock) - -**`core/archipelago/Cargo.toml`:** - -| Line | Current | Pin to | -|------|---------|--------| -| 44 | `sha2 = "0.10"` | `"0.10.9"` | -| 45 | `hmac = "0.12"` | `"0.12.1"` | -| 50 | `ed25519-dalek = { version = "2.1", ... }` | `version = "2.2.0"` | -| 51 | `curve25519-dalek = "4"` | `"4.1.3"` | -| 52 | `rand = "0.8"` | `"0.8.5"` | -| 69 | `argon2 = "0.5"` | `"0.5.3"` | -| 70 | `chacha20poly1305 = "0.10"` | `"0.10.1"` | -| 81 | `zeroize = { version = "1.7", ... }` | `version = "1.8.2"` | -| 92 | `hkdf = "0.12"` | `"0.12.4"` | - -**`core/security/Cargo.toml`:** - -| Line | Current | Pin to | -|------|---------|--------| -| 16 | `aes-gcm = "0.10"` | `"0.10.3"` | -| 17 | `rand = "0.8"` | `"0.8.5"` | -| 19 | `zeroize = { version = "1", ... }` | `version = "1.8.2"` | - -**Note:** `core/models/Cargo.toml` has `ed25519-dalek = "2.0.0"` but this crate is NOT in the workspace — it's dead code. Skip it. - -### Risk: LOW — Pins to versions already resolved in Cargo.lock. No actual dependency changes. - ---- - -## Item 3: Pin all floating container image tags — DONE - -**Why:** Floating tags (`:1`, `:7`, `:alpine`, `:main`) mean two installs a week apart get different software. Supply chain risk and a support nightmare. - -### File: `scripts/image-versions.sh` - -| Line | Variable | Current Tag | Action | -|------|----------|-------------|--------| -| 16 | `MARIADB_IMAGE` | `:11.4` | SSH -> get exact patch version | -| 21 | `POSTGRES_IMAGE` | `:15` | SSH -> get exact patch version | -| 22 | `BTCPAY_POSTGRES_IMAGE` | `:15` | SSH -> get exact patch version | -| 25 | `HOMEASSISTANT_IMAGE` | `:2024.12` | SSH -> get exact patch version | -| 27 | `UPTIME_KUMA_IMAGE` | `:1` | SSH -> get exact patch version | -| 32 | `NEXTCLOUD_IMAGE` | `:29` | SSH -> get exact patch version | -| 34 | `ONLYOFFICE_IMAGE` | `:8.2` | SSH -> get exact patch version | -| 35 | `FILEBROWSER_IMAGE` | `:v2` | SSH -> get exact patch version | -| 36 | `NPM_IMAGE` | `:2` | SSH -> get exact patch version | -| 49 | `REDIS_IMAGE` | `:7` | SSH -> get exact patch version | -| 52 | `VALKEY_IMAGE` | `:8` | SSH -> get exact patch version | -| 60 | `INDEEDHUB_POSTGRES_IMAGE` | `:16-alpine` | SSH -> get exact patch version | -| 61 | `INDEEDHUB_REDIS_IMAGE` | `:7-alpine` | SSH -> get exact patch version | -| 64 | `DWN_SERVER_IMAGE` | `:main` | SSH -> get image digest, pin by SHA or tag | -| 68 | `NGINX_ALPINE_IMAGE` | `:alpine` | SSH -> get exact version | - -### Pre-work required -Run on 192.168.1.228: `podman images --format '{{.Repository}}:{{.Tag}}'` to get exact versions currently deployed. Pin to THOSE — don't upgrade. - -### Risk: MEDIUM — Must match what's actually running. Wrong pin = containers fail on next creation. - ---- - -## Item 4: Add CI pipeline for Rust + frontend checks — DONE - -**Why:** No tests or linting run in CI. Regressions from Items 1-3 (and all future beta fixes) go undetected until they hit the server. - -### File to create: `.github/workflows/ci.yml` - -Two parallel jobs: -1. **`rust`** (ubuntu-latest): `cargo fmt --check` -> `cargo clippy -D warnings` -> `cargo test` -2. **`frontend`** (ubuntu-latest): `npm ci` -> `npm run type-check` -> `npm test` - -Trigger: push to `main` + all PRs. Reference existing `build-macos.yml` for action versions (checkout@v4, setup-node@v4 with Node 18). - -### Risk: LOW — Additive only, new file, doesn't affect existing workflows. - ---- - -## Execution Order - -1. **Item 1** (Nostr timeouts) — lowest risk, immediate reliability gain -2. **Item 2** (crypto pins) — batch with Item 1 for single deploy -3. **Item 3** (container image pins) — requires SSH query first -4. **Item 4** (CI) — validates everything, no deploy needed - -Items 1+2 deploy together. Item 3 deploys separately (script only). Item 4 is push-only. - -## Verification - -- Items 1+2: `cargo clippy --all-targets --all-features` on dev server (zero warnings), then deploy + test identity/discovery/marketplace features -- Item 3: `source scripts/image-versions.sh` + verify all vars have exact patch versions -- Item 4: Push to branch, verify both CI jobs pass green on GitHub Actions - -## Deferred (post-beta) - -- Issue 6: Generate TS types from Rust (ts-rs) — new dependency -- Issue 7: Consolidate container metadata to single source — structural refactor -- Issue 8: Split deploy/ISO scripts into modules — already planned in script comments -- Issue 9: Single app manifest driving all 6+ locations — architectural change -- Issue 12: useAsyncState composable — touches 14+ views, risky during freeze diff --git a/.claude/plans/mutable-roaming-pancake.md b/.claude/plans/mutable-roaming-pancake.md deleted file mode 100644 index 2f2600d8..00000000 --- a/.claude/plans/mutable-roaming-pancake.md +++ /dev/null @@ -1,357 +0,0 @@ -# Gold Standard Claude Code Configuration — Archipelago - -## Context - -The last optimization (2026-03-28) cut CLAUDE.md from 130→101 lines and skills from 33→11. That was the right first pass. This plan is the second pass: fixing structural issues the first cleanup didn't address — hook duplication, memory chaos, a leaked API key, missing path scoping, context budget waste, and underutilized agent/permission systems. The goal is a configuration so tight that re-running this audit would produce zero suggestions. - -**Research base**: Every file in `.claude/` (project + global), all 26 project memories, all 8 auto-memories, all 11 skills, all 5 rules, all 11 hooks, both settings files, the iframe-specialist agent, the full project structure (core/, neode-ui/, scripts/, image-recipe/, apps/, .gitea/), latest Claude Code docs (CLAUDE.md best practices, hooks v2.1.85+, skills frontmatter, agents, memory, permissions, MCP, context management, agent teams), and the 2026-03-28 cleanup feedback. - -**Governing principle** (carried from cleanup): *Every line must prevent a specific mistake Claude would otherwise make. If Claude does it right without the instruction, it's noise.* - ---- - -## Phase 0: CRITICAL — Remove Leaked Secret - -**File**: `.claude/memory/deploy-automation.md` (line 11) -Contains a plaintext Anthropic API key: `sk-ant-api03-...` - -**Action**: Remove the key immediately. Replace with: `"ANTHROPIC_API_KEY from secrets store (never stored in memory files)"` - -This is the only blocking item. Everything else is optimization. - ---- - -## Phase 1: CLAUDE.md — Trim to ~75 Lines - -**File**: `/Users/dorian/Projects/archy/CLAUDE.md` -**Current**: 101 lines | **Target**: ~75 lines | **Saves**: ~500 tokens/session - -### What to cut (reference data that doesn't prevent mistakes) - -| Section | Lines | Action | Reason | -|---------|-------|--------|--------| -| Infrastructure table | 21-30 | Move to auto-memory | Reference data, not a rule. Already in memory files | -| ISO debug commands | 79-84 | Move to `iso-debug` skill reference | Diagnostic commands, not rules | -| Kiosk toggle info | 85-86 | Move to auto-memory or delete | Reference, not a rule | -| "Backend binds 127.0.0.1" | 63 | Move to new backend rule | Claude can read the code | -| "Timeouts on all external operations" | 65 | Move to new backend rule | Already in `rules/api.md` | - -### What to add - -```markdown -## Compact Instructions -When compacting, preserve: list of modified files, test results, deploy target state, current branch. -``` - -This costs 2 lines but saves entire sessions from losing critical context. - -### Resulting structure (~75 lines) - -``` -Lines 1-2: Project description + stack -Lines 3-6: Beta freeze notice -Lines 7-12: Quick reference (dev, build, deploy commands) -Lines 13-18: Architecture diagram (compact) -Lines 19-20: Data paths -Lines 21-26: Critical Rules (5 rules) -Lines 27-33: App Integration Checklist -Lines 34-36: Git conventions -Lines 37-39: Compact instructions -``` - -Infrastructure table moves to auto-memory where it's still loaded at session start. - ---- - -## Phase 2: Hook Deduplication — Eliminate Double Execution - -### Problem - -Every `Bash` call runs **both** global `pretooluse-bash.sh` AND project `block-risky-bash.sh`. Every `Edit|Write` call runs **both** global `pretooluse-files.sh` AND project `protect-files.sh`. They overlap on ~80% of patterns (rm -rf, git reset --hard, .git/ edits, .env files, etc.). - -**Cost**: 2 extra Python processes per tool call, checking the same patterns twice. - -### Solution: Project hooks become project-specific only - -**File**: `.claude/hooks/block-risky-bash.sh` -**Action**: Strip all patterns already covered by global hook. Keep ONLY: -- Cargo build on macOS (Archy-specific: "build on dev server via SSH") -- Path traversal with rm (more aggressive check than global) - -~15 lines instead of ~80. - -**File**: `.claude/hooks/protect-files.sh` -**Action**: Strip all patterns already covered by global hook. Keep ONLY: -- `scripts/deploy-config.sh` (Archy-specific credential file) -- Path-outside-project check (project-specific boundary) - -~20 lines instead of ~75. - -**Global hooks stay unchanged** — they're the universal baseline. - -### Result -- Before: 4 Python processes per Bash call (2 global + 2 project parsing same JSON) -- After: 2 Python processes per Bash call (1 global comprehensive + 1 tiny project-specific) - ---- - -## Phase 3: Memory System — Consolidate and Clean - -### Problem - -Two separate memory systems with overlapping content: -1. **Auto-memory** (`~/.claude/projects/-Users-dorian-Projects-archy/memory/`) — 8 files, auto-loaded -2. **Project memory** (`.claude/memory/`) — 26 files, NOT auto-loaded - -Claude sees auto-memory every session. Project memory only loads if Claude manually reads it. - -### Solution: Curate auto-memory, keep project memory as archive - -**Auto-memory MEMORY.md** — restructure to ~25 lines with the most critical feedback: - -```markdown -# Archipelago Project Memory - -## Critical Feedback (prevent recurring mistakes) -- [Direct Port Rule](feedback_apps_always_direct_port.md) — Apps MUST use direct port, NEVER proxy paths -- [External URLs](feedback_external_urls_iframe.md) — Open https:// directly, never /ext/ -- [Deploy All Nodes](feedback_indeedhub_deploy_all_servers.md) — Deploy to ALL nodes -- [No Tor Publishing](feedback_no_tor_relay_publishing.md) — Never publish .onion to relays -- [UFW Forward](feedback_podman_ufw_forward.md) — DEFAULT_FORWARD_POLICY=ACCEPT -- [Deploy Patterns](feedback_deploy_patterns.md) — Rootless port 80, cred sync, image export -- [Asset Workflow](feedback_asset_workflow.md) — Never generate images, user is designer -- [ASCII Logo](feedback_logo_ascii.md) — Block-letter logo locked, never change -- [Claude Cleanup](feedback_claude_cleanup.md) — Instruction optimization principles - -## Infrastructure -- [CI/CD & Registry](reference_cicd_registry.md) — git.tx1138.com, act_runner, insecure registry -- [Multi-Node Deploy](reference_multi_node_deploy.md) — 5 nodes, SSH keys, deploy methods -- [Infrastructure Quick Ref](reference_infrastructure.md) — IPs, passwords, SSH keys (moved from CLAUDE.md) - -## Project State -- [ISO Testing](project_iso_testing_plan.md) — Hardware matrix, boot compatibility -- [ISO Custom Base](project_iso_size_reduction.md) — Debootstrap ISO, remaining issues - -## Archive -Detailed project memory in .claude/memory/MEMORY.md (26 files, not auto-loaded). -``` - -**New auto-memory files to create** (migrated from project memory): -- `feedback_apps_always_direct_port.md` — Broken THREE TIMES, highest-value feedback -- `feedback_deploy_patterns.md` — Hard-won container patterns -- `feedback_asset_workflow.md` — Prevents wasted effort generating images -- `feedback_logo_ascii.md` — Prevents changing locked-in branding -- `reference_infrastructure.md` — Infrastructure table from CLAUDE.md (IPs, SSH, passwords) - -**Project memory (.claude/memory/)**: -- Add comment at top of MEMORY.md: `` -- Fix `deploy-automation.md` (Phase 0 — remove API key) -- Update `unbundled-iso.md` (still says "NOT YET BUILT") - ---- - -## Phase 4: Permissions — Auto-Approve Safe Commands - -**File**: `.claude/settings.local.json` - -**Current**: Only `ssh:*` and `gh api:*` allowed. - -**Updated** — add read-only and build/test commands: - -```json -{ - "permissions": { - "allow": [ - "Bash(ssh:*)", - "Bash(gh api:*)", - "Bash(cd neode-ui*)", - "Bash(npm run *)", - "Bash(npm test*)", - "Bash(npm start*)", - "Bash(npx vue-tsc*)", - "Bash(npx vitest*)", - "Bash(git log*)", - "Bash(git diff*)", - "Bash(git status*)", - "Bash(git branch*)", - "Bash(git show*)", - "Bash(git stash*)", - "Bash(cargo check*)", - "Bash(cargo clippy*)", - "Bash(cargo test*)", - "Bash(journalctl*)", - "Bash(systemctl status*)", - "Bash(ls *)", - "Bash(wc *)", - "Bash(file *)", - "Bash(xxd *)", - "Bash(df *)", - "Bash(du *)" - ] - } -} -``` - -**NOT auto-approved** (still require confirmation): -- `git push/commit` — Affects remote/creates state -- `cargo build` — Blocked by hook on macOS anyway -- `npm install` — Modifies dependencies -- `./scripts/deploy-*` — Deploys to servers -- `rm`, `mv`, `cp` — Potentially destructive - ---- - -## Phase 5: Merge iso-branding into build-iso - -**Problem**: `iso-branding` is a pure design reference, only relevant during ISO builds. Its description consumes skill budget. - -**Action**: -1. Move `.claude/skills/iso-branding/SKILL.md` content → `.claude/skills/build-iso/references/branding.md` -2. Update `build-iso/SKILL.md` to reference the branding file -3. Delete `.claude/skills/iso-branding/` directory - -**Skill count**: 11 → 10 - ---- - -## Phase 6: Add Backend Rule File - -**Problem**: No path-scoped rule for Rust backend. 3 backend rules sit in CLAUDE.md (loaded every session even for frontend-only work). - -**New file**: `.claude/rules/backend.md` - -```markdown ---- -globs: - - "core/**/*.rs" - - "core/**/Cargo.toml" ---- - -# Backend Rules (Archipelago — Rust) - -- Backend binds `127.0.0.1` only — nginx handles external access -- Validate all input before path construction — reject `..`, `/`, null bytes -- Timeouts on all external operations (10s default, 30s heavy) -- Use `anyhow::Result` for error propagation, not `.unwrap()` in handlers -- Log with `tracing`, never `println!` or `eprintln!` in production paths -- Container commands through `PodmanClient` (core/container/), never raw Command::new("podman") -``` - -Delete the Backend section from CLAUDE.md (moved here). - ---- - -## Phase 7: Tighten prompt-injection-detect.sh - -**Problem**: `context_manipulation` pattern matches `IMPORTANT:`, `CRITICAL:`, `` — normal in code/docs. Creates false positive warnings. - -**Action**: Tighten the `context_manipulation` regex to require injection-specific signatures: - -```bash -# OLD (too broad): -"IMPORTANT:|CRITICAL:|SYSTEM:|ADMIN:|||" - -# NEW (specific): -"(?:^|\s)(?:SYSTEM|ADMIN):\s*(?:you are|ignore|forget|override|new instructions)|<(?:system|instructions)>.*(?:ignore|override|forget)" -``` - ---- - -## Phase 8: Add 2 Focused Agents - -**Current**: 1 agent (iframe-specialist, 678 lines) - -**Add**: - -### `.claude/agents/deploy-specialist.md` -```yaml ---- -name: deploy-specialist -description: Deploys to all 5 Archipelago nodes. Knows SSH access, build capabilities, post-deploy verification. -tools: Bash, Read, Grep, Glob -model: sonnet ---- -``` -Body: Node inventory, deploy workflow, IndeedHub multi-node rules, post-deploy checklist. - -### `.claude/agents/code-reviewer.md` -```yaml ---- -name: code-reviewer -description: Reviews code against Archipelago standards — frontend patterns, Rust safety, container security, crypto rules. -tools: Read, Grep, Glob -model: sonnet ---- -``` -Body: Frontend rules, backend rules, container rules, security checklist. - -**Agent count**: 1 → 3 - ---- - -## Phase 9: Skill Frontmatter Audit - -**Problem**: Action skills that have side effects should have `disable-model-invocation: true` to prevent Claude from auto-invoking them. - -| Skill | Has `disable-model-invocation: true`? | Needs it? | -|-------|--------------------------------------|-----------| -| add-app | Yes | Yes (side effects) | -| add-web-app | Verify | Yes | -| build-iso | Verify | Yes (builds ISO) | -| iso-debug | Verify | Yes (runs diagnostics) | -| podman | Verify | Yes (modifies containers) | -| polish | Verify | Yes (modifies code) | -| sweep | Verify | Yes (runs checks, may fix) | -| mesh | No | No (reference knowledge) | -| design-pixel-retro | No | No (reference knowledge) | -| gamepad-nav | No | No (reference knowledge) | - -Action: Verify and add `disable-model-invocation: true` to all 7 action skills. - ---- - -## Summary - -| Phase | Impact | Files Changed | Benefit | -|-------|--------|---------------|---------| -| 0. Remove API key | CRITICAL | 1 | Security | -| 1. Trim CLAUDE.md | HIGH | 1 | ~500 tokens/session saved | -| 2. Dedup hooks | HIGH | 2 | ~200ms faster per tool call | -| 3. Memory consolidate | HIGH | ~8 | Cleaner context, no stale data | -| 4. Permissions | MEDIUM | 1 | ~3s saved per safe command | -| 5. Merge iso-branding | LOW | 3 | 1 less skill description | -| 6. Backend rule | MEDIUM | 2 | Path-scoped, not always-loaded | -| 7. Injection hook | LOW | 1 | Fewer false positives | -| 8. New agents | MEDIUM | 2 new | Better delegation | -| 9. Skill frontmatter | LOW | ~5 | Prevents unintended auto-invoke | - -**Net changes**: CLAUDE.md 101→~75 lines, skills 11→10, agents 1→3, rules 5→6, hooks 60% smaller - ---- - -## What This Plan Does NOT Change (and why each was evaluated) - -- **Global CLAUDE.md** (36 lines) — Already optimized, passes the "would removing cause mistakes?" test -- **Global hooks** (8 scripts) — Universal baseline, well-tuned, no project overlap -- **Global rules** (api, crypto, bitcoin) — Correct glob scoping, concise content -- **Global settings.json** — Plugins, effort level, hook config all justified -- **iframe-specialist agent** — Deep reference, correctly scoped, rarely loaded -- **Skills mesh/gamepad-nav/design-pixel-retro** — Tiny description cost (~120 chars each), valuable on-demand -- **MCP servers** — Not needed (self-hosted infra, no external API integrations) -- **Agent teams** — Experimental, single-developer project doesn't benefit -- **Project .claude/memory/ (26 files)** — Kept as archive with annotation - ---- - -## Verification Checklist - -After implementation: -- [ ] `grep -r "sk-ant" .claude/` returns zero results -- [ ] New session auto-loads MEMORY.md with all critical feedback -- [ ] `git status` auto-approves without permission prompt -- [ ] `/sweep` skill loads and executes correctly -- [ ] Project hooks run fast (no duplicate pattern checks) -- [ ] `cd neode-ui && npx vue-tsc -b --noEmit` passes -- [ ] Spawning deploy-specialist agent works -- [ ] CLAUDE.md is ≤80 lines -- [ ] `/context` shows reasonable token budget diff --git a/.claude/plans/plan.md b/.claude/plans/plan.md deleted file mode 100644 index b0429b08..00000000 --- a/.claude/plans/plan.md +++ /dev/null @@ -1,803 +0,0 @@ -# Archipelago: Production Excellence Plan - -**Duration**: 12 months (48 weeks) -**Goal**: Code so good no developer could question any decision. Apple-level reliability. Every failure visible and recoverable. Every operation bounded. Every line justified. -**Audited**: 2026-03-20 — 122 Rust files, 38 Vue views, 180+ frontend files, 80+ shell scripts - -## CONSTRAINTS - -- **DEPLOY ONLY TO .198** — Never .228. All verification on .198. -- **BETA FREEZE** — Behavior-preserving only. No new features/UI/endpoints. -- **Tests before every refactor** — Capture current behavior first. Tests must pass unchanged after. -- **Atomic commits** — One logical change per commit. Every step compiles + passes tests. - -```bash -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.198 -``` - ---- - -## COMPLETE ISSUE REGISTRY - -### Backend Rust — 122 files audited - -| ID | Issue | File(s) | Severity | -|----|-------|---------|----------| -| R1 | Health RPC endpoint has no handler — returns "Unknown method" | `api/rpc/mod.rs` | P0 | -| R2 | Nostr client.connect() hangs indefinitely (4 calls, no timeout) | `nostr_handshake.rs:124,161,262,282` | P0 | -| R3 | Backup restore extracts directly to live dir — no atomic rollback | `backup/full.rs:122-149` | P0 | -| R4 | Rate limiter cleanup() never spawned — HashMap grows forever | `session.rs:566-579` | P1 | -| R5 | Login rate limiter same issue — entries never evicted | `session.rs:452-472` | P1 | -| R6 | Blocking std::fs in async — session.rs (6 calls) | `session.rs:77,128,370,413,423,425` | P1 | -| R7 | Blocking std::fs in async — docker_packages.rs | `docker_packages.rs:561,573` | P1 | -| R8 | Blocking std::fs in async — port_allocator.rs | `port_allocator.rs:59,73,77` | P1 | -| R9 | Blocking std::fs in async — peers.rs, node_message.rs | `peers.rs:30`, `node_message.rs:65` | P1 | -| R10 | Blocking std::fs in async — identity.rs, identity_manager.rs | `identity.rs:50`, `identity_manager.rs:164` | P1 | -| R11 | Blocking std::fs in async — nostr_discovery.rs | `nostr_discovery.rs:55` | P1 | -| R12 | Sync TCP I/O in async context — electrs_status.rs | `electrs_status.rs:5,40,78,81` | P1 | -| R13 | .expect() in main.rs startup | `main.rs:124,159` | P2 | -| R14 | .parse().unwrap() in session.rs rate limiting | `session.rs:665,676,688` | P1 | -| R15 | 7 .unwrap()/.expect() in mesh/protocol.rs | `protocol.rs:582,592,614,649,679,713,728` | P1 | -| R16 | .expect() in identity.rs crypto | `identity.rs:114,119` | P2 | -| R17 | .unwrap() in helpers/lib.rs (5 calls) | `helpers/lib.rs:167,172,180,233,253` | P2 | -| R18 | .unwrap() in helpers/rsync.rs (5 calls) | `rsync.rs:196,199,202,210,220` | P2 | -| R19 | .unwrap() in js-engine/lib.rs | `js-engine/lib.rs:130,249` | P2 | -| R20 | 14 #[allow(dead_code)] suppressions in mesh/mod.rs | `mesh/mod.rs:7-25` | P2 | -| R21 | Dead code in lnd.rs, data_manager.rs, dev_orchestrator.rs | Multiple | P2 | -| R22 | Bitcoin RPC URL hardcoded in 4+ files | `bitcoin.rs:89`, `mesh/mod.rs:624,649,663`, `listener.rs:1509+` | P2 | -| R23 | DWN health URL hardcoded | `dwn_sync.rs:76` | P2 | -| R24 | Update manifest URL hardcoded | `update.rs:11` | P3 | -| R25 | DNS-over-HTTPS URLs hardcoded (4 providers) | `network/dns.rs:98,102,106,110` | P3 | -| R26 | DWN protocol URIs hardcoded in server.rs | `server.rs:453-456` | P3 | -| R27 | Missing timeouts on mesh Bitcoin RPC calls | `mesh/mod.rs:624,649,663` | P1 | -| R28 | Missing timeouts on LND proxy calls (68 .send() calls) | `api/rpc/lnd.rs` | P2 | -| R29 | Missing timeout on DWN health check | `dwn_sync.rs:76` | P2 | -| R30 | TODO: track last-seen timestamp | `handshake.rs:77` | P3 | -| R31 | TODO: lnd.lookupinvoice RPC endpoint | `marketplace.rs:183` | P3 | -| R32 | TODO: trigger auto-restart or alert | `container/health_monitor.rs:140` | P3 | -| R33 | TODO: configure Podman to use AppArmor profile | `security/container_policies.rs:68` | P3 | -| R34 | Tor rotation deletes old .onion immediately — no transition | `api/rpc/tor.rs:184-240` | P1 | -| R35 | package.rs god file — 1,795 lines | `api/rpc/package.rs` | P2 | -| R36 | mesh/listener.rs god file — 1,799 lines | `mesh/listener.rs` | P2 | -| R37 | rpc/mod.rs god file — 1,092 lines | `api/rpc/mod.rs` | P2 | -| R38 | lnd.rs god file — 1,068 lines | `api/rpc/lnd.rs` | P2 | -| R39 | monitoring/mod.rs — 993 lines | `monitoring/mod.rs` | P3 | -| R40 | api/handler.rs — 911 lines | `api/handler.rs` | P3 | -| R41 | 30+ functions exceed 50 lines across codebase | Multiple | P3 | - -### Frontend — 180+ files audited - -| ID | Issue | File(s) | Severity | -|----|-------|---------|----------| -| F1 | WebSocket subscription registered multiple times — race condition | `stores/app.ts:88-134` | P0 | -| F2 | Unprotected concurrent mesh state mutations | `stores/mesh.ts:249-268,294-324` | P0 | -| F3 | No global Vue error handler — white screen on error | `main.ts` | P0 | -| F4 | Stale data after WebSocket reconnect — no full refresh | `stores/app.ts:88-163` | P1 | -| F5 | Message polling timer never stopped after logout | `composables/useMessageToast.ts:60` | P1 | -| F6 | AppLauncher NIP-07 message listener leak on close | `stores/appLauncher.ts:295-301` | P1 | -| F7 | Audio player listeners stack — never cleaned up | `composables/useAudioPlayer.ts:1-91` | P1 | -| F8 | WebSocket reconnection race — parallel connect() attempts | `api/websocket.ts:212-238` | P2 | -| F9 | WebSocket parse error silently caught — stale UI forever | `api/websocket.ts:164-172` | P2 | -| F10 | WebSocket stale connection detection too aggressive (5min) | `api/websocket.ts:284-299` | P2 | -| F11 | RPC client backoff + timeout = 40s max wait | `api/rpc-client.ts:31-117` | P2 | -| F12 | No code splitting — monolithic bundle | `vite.config.ts` | P2 | -| F13 | v-html on QR code without DOMPurify | `views/Settings.vue:441` | P2 | -| F14 | Goals store O(n) alias lookup on every computed | `stores/goals.ts:16-20,38-89` | P2 | -| F15 | localStorage save without try/catch (5+ instances) | `stores/goals.ts:34-36` + others | P2 | -| F16 | FileBrowser auth token duality — memory + cookie | `api/filebrowser-client.ts:39,50-68` | P2 | -| F17 | CSRF token cookie parsing brittle — regex only | `api/rpc-client.ts:18-21` | P2 | -| F18 | aiPermissions.ts Set uses unsafe type assertion | `stores/aiPermissions.ts:91-103` | P3 | -| F19 | Untracked setTimeout in AppSession — fires after unmount | `views/AppSession.vue:507` | P3 | -| F20 | Dashboard navigation missing aria-current="page" | `views/Dashboard.vue` | P3 | -| F21 | Search performance — string re-lowercasing every keystroke | `views/Apps.vue:510-537` | P3 | -| F22 | 30+ backdrop-filter blur elements — GPU overload on mobile | `style.css` | P3 | -| F23 | Record on sensitive DID operations | `types/api.ts` + `rpc-client.ts` | P3 | -| F24 | checkInterval timer leak on connect race | `api/websocket.ts:82-96` | P3 | -| F25 | Web5.vue god component — 3,940 lines | `views/Web5.vue` | P2 | -| F26 | Mesh.vue — 2,106 lines | `views/Mesh.vue` | P2 | -| F27 | Dashboard.vue — 1,819 lines | `views/Dashboard.vue` | P2 | -| F28 | Settings.vue — 1,792 lines | `views/Settings.vue` | P2 | -| F29 | Marketplace.vue — 1,293 lines | `views/Marketplace.vue` | P3 | -| F30 | Server.vue — 1,132 lines | `views/Server.vue` | P3 | -| F31 | Home.vue — 1,059 lines | `views/Home.vue` | P3 | -| F32 | AppDetails.vue — 1,036 lines | `views/AppDetails.vue` | P3 | -| F33 | useAppStore god store — 324 lines, 16 methods, 8+ responsibilities | `stores/app.ts` | P2 | - -### Shell Scripts — 80+ files audited - -| ID | Issue | File(s) | Severity | -|----|-------|---------|----------| -| S1 | 60+ instances of `sudo podman` — should be rootless | `fix-indeedhub(28)`, `deploy-bitcoin(11)`, `deploy-tailscale(2+)` | P0 | -| S2 | Zero container health checks in first-boot (30 containers) | `first-boot-containers.sh` | P0 | -| S3 | 50+ `:latest` image tags across all scripts | `first-boot(15)`, `deploy(11)`, `tailscale(18)`, `iso(7)` | P1 | -| S4 | No `set -e` in first-boot — silent container failures | `first-boot-containers.sh:1-9` | P1 | -| S5 | `eval "$DB_PASSWORDS"` — code injection risk | `deploy-to-target.sh:940` | P1 | -| S6 | No deploy locking — concurrent deploys corrupt state | `deploy-to-target.sh` | P1 | -| S7 | No deploy rollback — failed deploy leaves broken system | `deploy-to-target.sh` | P1 | -| S8 | sshpass usage in trust-archipelago-cert.sh | `trust-archipelago-cert.sh:23-26` | P1 | -| S9 | MariaDB password in command line — visible in ps | `first-boot-containers.sh:285` | P1 | -| S10 | 80+ instances of `2>/dev/null \|\| true` masking errors | `deploy-to-target.sh` | P2 | -| S11 | No trap cleanup for temp files | Multiple scripts | P2 | -| S12 | Unquoted variables (word splitting risk) | Multiple scripts | P2 | -| S13 | Hardcoded IPs in 6+ scripts | `deploy-to-target.sh:26`, `deploy-tailscale.sh:26`, etc. | P2 | -| S14 | No input validation on deploy targets | `deploy-tailscale.sh` | P2 | -| S15 | Missing memory limits on some containers in deploy | `deploy-to-target.sh:842-880` | P2 | -| S16 | ISO build not reproducible — dynamic image capture + :latest | `build-auto-installer-iso.sh:500-594` | P2 | -| S17 | No disk space pre-flight in deploy | `deploy-to-target.sh` | P2 | -| S18 | deploy-to-target.sh — 1,728 lines monolith | `deploy-to-target.sh` | P3 | -| S19 | build-auto-installer-iso.sh — 1,850 lines monolith | `build-auto-installer-iso.sh` | P3 | -| S20 | first-boot-containers.sh — 855 lines monolith | `first-boot-containers.sh` | P3 | -| S21 | No shared script library — duplicated functions | `scripts/` | P3 | - -### Infrastructure - -| ID | Issue | File(s) | Severity | -|----|-------|---------|----------| -| I1 | Nginx: /archipelago/, /content, /dwn missing timeout+rate-limit+body-size | `nginx-archipelago.conf:116-180` | P0 | -| I2 | Systemd: no MemoryMax, LimitNOFILE, TasksMax | `archipelago.service` | P1 | -| I3 | Tor rotation kills old address immediately — federation downtime | `api/rpc/tor.rs:184-240` | P1 | - ---- - -## MONTH 1: CRASH PREVENTION (Weeks 1–4) - -> Fix every issue that can crash the system, hang indefinitely, or lose data. - -### Week 1: P0 Backend — Things That Hang or Lose Data - -**R1 — Health endpoint handler** -- File: `core/archipelago/src/api/rpc/mod.rs` -- Add handler for `"health"` method that checks: crash recovery complete, Podman socket responsive, session store loaded -- Tests: health returns JSON status, degraded when Podman unreachable, degraded during recovery -- Verify: `curl http://192.168.1.198/rpc/v1 -d '{"method":"health"}'` returns real status - -**R2 — Nostr connect timeout** -- File: `core/archipelago/src/nostr_handshake.rs` lines 124, 161, 262, 282 -- Wrap all 4 `client.connect().await` in `tokio::time::timeout(Duration::from_secs(10), ...)` -- Tests: connect timeout returns Err after 10s, successful connect within timeout works - -**R3 — Backup restore atomic rollback** -- File: `core/archipelago/src/backup/full.rs` lines 122-149 -- Rewrite: decrypt → extract to staging dir → validate required files → atomic rename → rollback on failure -- Tests: valid backup restores, corrupt backup fails without touching live data, partial extraction rolls back, disk space check fails early - -**I1 — Nginx unauthenticated endpoint protection** -- File: `image-recipe/configs/nginx-archipelago.conf` lines 116-180 -- Add to `/archipelago/`, `/content`, `/dwn`: - - `limit_req zone=peer burst=20 nodelay;` - - `client_max_body_size 10m;` - - `proxy_connect_timeout 30s; proxy_read_timeout 60s; proxy_send_timeout 30s;` -- Tests: >10MB payload → 413, slow client → timeout, burst 30 → 429 after 20 - -### Week 2: P0 Frontend + Scripts — Things That Break UI or Containers - -**F1 — WebSocket subscription race condition** -- File: `neode-ui/src/stores/app.ts` lines 88-134 -- Fix: Return unsubscribe function from `wsClient.subscribe()`, call it before re-subscribing. Use a subscription ID to prevent duplicates. -- Tests: rapid connectWebSocket() calls produce only one active subscription - -**F2 — Mesh concurrent state mutations** -- File: `neode-ui/src/stores/mesh.ts` lines 249-324 -- Fix: Add `isSending` ref as mutex. Queue concurrent sends. `fetchMessages()` called once after all sends complete. -- Tests: 3 concurrent sendMessage() calls → all succeed, messages list consistent - -**F3 — Global error handler** -- File: `neode-ui/src/main.ts` -- Add `app.config.errorHandler` that shows toast + logs structured error -- Tests: thrown error in component shows toast, nested errors don't crash handler - -**S1 — Eliminate all `sudo podman`** -- Files: `fix-indeedhub-containers.sh` (28), `deploy-bitcoin-knots.sh` (11), `deploy-tailscale.sh` (2+), `uptime-monitor.sh` (1), `setup-aiui-server.sh` -- Replace every `sudo podman` with `podman` (runs as archipelago user) -- Tests: grep for `sudo podman` across all scripts returns zero matches - -**S2 — Container health checks for all 30 containers** -- File: `scripts/first-boot-containers.sh` -- Add `--health-cmd`, `--health-interval=30s`, `--health-timeout=5s`, `--health-retries=3` to every `$DOCKER run` -- Health commands per type: - - Bitcoin: `bitcoin-cli -rpcuser=... getblockchaininfo || exit 1` - - HTTP apps: `curl -sf http://localhost:{port}/ || exit 1` - - LND: `curl -sf --insecure https://localhost:8080/v1/getinfo || exit 1` - - Databases: `mariadb -u root -p... -e "SELECT 1" || exit 1` -- Tests: script grep confirms every `$DOCKER run` has `--health-cmd` - -### Week 3: P1 Backend — Blocking I/O and Memory Leaks - -**R4+R5 — Rate limiter cleanup** -- File: `core/archipelago/src/session.rs` -- Spawn background tasks for both `EndpointRateLimiter::cleanup()` and `LoginRateLimiter` cleanup, every 5 min -- Tests: after cleanup, stale entries removed; active entries preserved - -**R6 — session.rs blocking I/O (6 calls)** -- Replace `std::fs::read_to_string` → `tokio::fs::read_to_string` at lines 77, 370, 413 -- Replace `std::fs::write` → `tokio::fs::write` at lines 128, 425 -- Replace `std::fs::create_dir_all` → `tokio::fs::create_dir_all` at line 423 -- Tests: session load/save/persist still works correctly - -**R7 — docker_packages.rs blocking I/O** -- Replace `std::fs::read_to_string` → `tokio::fs::read_to_string` at lines 561, 573 -- Tests: app metadata loading works - -**R8 — port_allocator.rs blocking I/O** -- Replace all 3 std::fs calls → tokio::fs at lines 59, 73, 77 -- Tests: port allocation/persistence works - -**R9+R10+R11 — Remaining blocking I/O** -- `peers.rs:30`, `node_message.rs:65`, `identity.rs:50`, `identity_manager.rs:164`, `nostr_discovery.rs:55` -- Convert all to tokio::fs -- Tests: each module's file operations still work - -**R12 — electrs_status.rs sync TCP I/O** -- Convert synchronous TCP client to async (tokio::net::TcpStream) -- Tests: ElectrumX status query works, timeout on connection failure - -### Week 4: P1 Frontend — Memory Leaks and Stale State - -**F4 — WebSocket reconnect full state refresh** -- File: `neode-ui/src/stores/app.ts` -- After reconnect, call `rpcClient.call({method: 'server.get-state'})` to get fresh state before accepting patches -- Tests: after simulated disconnect+reconnect, state matches server - -**F5 — Message polling timer cleanup** -- File: `neode-ui/src/composables/useMessageToast.ts` -- Tie polling lifecycle to auth state: stop on logout, start on login. Export cleanup function. -- Tests: polling stops when auth false, restarts when auth true, no timer after unmount - -**F6 — AppLauncher message listener leak** -- File: `neode-ui/src/stores/appLauncher.ts` -- Ensure listener is removed when app closes (even if not via close button — e.g., route navigation) -- Tests: navigate away from app → listener removed, new app opens clean - -**F7 — Audio player listener stacking** -- File: `neode-ui/src/composables/useAudioPlayer.ts` -- Create Audio element once, register listeners once. Track initialization flag. -- Tests: calling play() 10 times → still only 6 listeners total (not 60) - -**S3 — Pin all container images (remove :latest)** -- Files: `first-boot-containers.sh` (15), `deploy-to-target.sh` (11), `deploy-tailscale.sh` (18), `build-auto-installer-iso.sh` (7) -- Replace every `:latest` with specific version tag -- Create `image-versions.env` sourced by all scripts — single source of truth -- Tests: `grep -r ':latest' scripts/ image-recipe/` returns zero matches (excluding comments) - ---- - -## MONTH 2: OPERATIONAL SAFETY (Weeks 5–8) - -> Fix everything that makes deploys dangerous, scripts unreliable, or operations opaque. - -### Week 5: Deploy Script Hardening - -**S4 — first-boot error handling** -- Add per-section error checking: if Bitcoin fails, skip dependent containers (LND, Mempool, BTCPay) -- Add `wait_for_container` return value checking -- Tests: first-boot with broken Bitcoin image → Bitcoin deps skipped, independent apps still start - -**S5 — Replace eval with safe construct** -- File: `deploy-to-target.sh:940` -- Replace `eval "$DB_PASSWORDS"` with explicit variable assignment from SSH output -- Tests: passwords parsed correctly without eval - -**S6 — Deploy locking** -- File: `deploy-to-target.sh` -- Add remote `flock` on `/var/lock/archipelago-deploy.lock`. Second deploy fails immediately with message. Stale lock (>30 min) broken automatically. -- Tests: two parallel deploys → second fails, stale lock → broken and deploy proceeds - -**S7 — Deploy rollback** -- File: `deploy-to-target.sh` -- Before overwriting binary: `cp archipelago archipelago.bak` -- Before overwriting frontend: `cp -r web-ui web-ui.bak` -- If health check fails post-restart: restore from .bak, restart again -- Tests: intentionally broken binary → deploy detects, rolls back, system healthy - -**S8 — Eliminate sshpass** -- File: `trust-archipelago-cert.sh` -- Rewrite to use SSH key only: `ssh -i ~/.ssh/archipelago-deploy` -- Tests: script works with key auth, fails gracefully without key - -### Week 6: Script Quality - -**S9 — MariaDB password not on command line** -- File: `first-boot-containers.sh:285` -- Use `$DOCKER exec -i ... mariadb -uroot < /dev/stdin <<< "SET PASSWORD..."` -- Tests: `ps aux` during execution doesn't show password - -**S10 — Replace silent error masking** -- File: `deploy-to-target.sh` (80+ instances) -- Pattern: replace `2>/dev/null || echo ""` with `|| { log_warn "..."; echo ""; }` -- At minimum, log what failed before masking -- Tests: failed health check produces log entry - -**S11 — Trap cleanup for temp files** -- All scripts that create /tmp files: add `trap "rm -rf /tmp/deploy-$$" EXIT` at start -- Files: deploy-to-target.sh, deploy-tailscale.sh, build-auto-installer-iso.sh -- Tests: script interrupted mid-execution → temp files cleaned up - -**S12 — Quote all variables** -- Audit and fix unquoted `$VARIABLE` in command arguments across all scripts -- Tests: shellcheck passes on all modified scripts - -**S13 — Extract hardcoded IPs to config** -- Create `scripts/deploy-config-defaults.sh` with all node IPs as named variables -- Source from all scripts instead of hardcoding -- Tests: changing IP in config → all scripts use new IP - -### Week 7: Infrastructure Hardening - -**I2 — Systemd resource limits** -- File: `image-recipe/configs/archipelago.service` -- Add: `MemoryMax=4G`, `LimitNOFILE=65535`, `TasksMax=2048` -- Tests: `systemctl show archipelago` confirms limits applied, service starts normally - -**I3 — Tor rotation transition period** -- File: `core/archipelago/src/api/rpc/tor.rs` -- Keep old hidden service running for 24h after rotation. Both addresses active. Notify peers of new address. Schedule old deletion. -- Tests: after rotation old address still resolves, peers receive notification, old removed after transition - -**S14 — Input validation on deploy targets** -- Add regex validation for hostnames/IPs before SSH -- Tests: invalid hostname → clear error, valid hostname → proceeds - -**S15 — Memory limits on all deploy containers** -- File: `deploy-to-target.sh` lines 842-880 -- Add `--memory=$(mem_limit ...)` to all UI container builds -- Tests: every container in deploy has `--memory` flag - -**S17 — Disk space pre-flight** -- File: `deploy-to-target.sh` -- Check target disk <85% before deploying. Abort with clear message if full. -- Tests: deploy to 90% full disk → aborted, deploy to 50% full → succeeds - -### Week 8: Remaining P1 Backend - -**R14 — Fix .parse().unwrap() in session rate limiting** -- File: `session.rs:665,676,688` -- Replace `.parse().unwrap()` with `.parse().context("...")?` -- Tests: invalid IP handling works gracefully - -**R15 — Fix 7 unwrap/expect in mesh/protocol.rs** -- File: `mesh/protocol.rs:582,592,614,649,679,713,728` -- Replace all with `?` operator + proper error types -- Tests: protocol parsing with malformed data returns error, not panic - -**R27 — Add timeouts to mesh Bitcoin RPC calls** -- File: `mesh/mod.rs:624,649,663` -- Add `tokio::time::timeout(Duration::from_secs(10), ...)` to all Bitcoin RPC calls -- Tests: RPC timeout returns error after 10s - -**R34 — Tor rotation transition** -- (Covered by I3 above) - ---- - -## MONTH 3: PRODUCTION POLISH (Weeks 9–12) - -> Fix every remaining P2 issue — unwraps, hardcoded values, frontend quality, resilience. - -### Week 9: Remaining Backend Unwraps + Dead Code - -**R13 — main.rs .expect() → .context()** -- Replace 2 `.expect()` calls with `.context("...")?` and proper startup error handling - -**R16 — identity.rs .expect() → safe handling** -- Replace 2 `.expect()` in crypto operations with result propagation - -**R17+R18 — helpers unwraps** -- Fix 10 `.unwrap()` calls in `helpers/lib.rs` and `helpers/rsync.rs` -- Replace with `?` operator or `.context()` - -**R19 — js-engine unwraps** -- Fix 2 `.unwrap()` in `js-engine/lib.rs:130,249` - -**R20+R21 — Dead code elimination** -- Remove all 14 `#[allow(dead_code)]` in `mesh/mod.rs`. Either use the fields or delete them. -- Same for `lnd.rs`, `data_manager.rs`, `dev_orchestrator.rs` -- Tests: `cargo clippy` zero warnings, `cargo test` passes - -### Week 10: Hardcoded Values → Constants - -**R22 — Bitcoin RPC URL constant** -- Create `const BITCOIN_RPC_URL: &str = "http://127.0.0.1:8332/";` in a shared constants module -- Use across `bitcoin.rs`, `mesh/mod.rs`, `mesh/listener.rs` -- Tests: all Bitcoin RPC calls still work - -**R23 — DWN health URL constant** -**R24 — Update manifest URL constant** -**R25 — DNS-over-HTTPS URLs → constants array** -**R26 — DWN protocol URIs → constants** -- Centralize all hardcoded URLs/URIs into `core/archipelago/src/constants.rs` -- Tests: all modules reference constants, no hardcoded strings remain - -**R28 — LND proxy timeouts** -- Audit all 68 `.send()` calls in `api/rpc/lnd.rs`. Ensure each has explicit timeout. -- Tests: LND proxy call with unresponsive LND → timeout error, not hang - -**R29 — DWN health check timeout** -- Add timeout to `dwn_sync.rs:76` health check - -**R30-R33 — Resolve all TODOs** -- Either implement the TODO or remove the dead code path. Per project rules: no TODO/FIXME in commits. - -### Week 11: Frontend P2 Fixes - -**F8 — WebSocket reconnection race** -- Add `isReconnecting` flag. Skip if already reconnecting. -- Tests: rapid close events → only one reconnect attempt - -**F9 — WebSocket parse error handling** -- Count consecutive parse errors. After 3, force reconnect. -- Tests: 3 malformed messages → reconnect triggered; single bad message → logged only - -**F10 — Stale connection detection tuning** -- Require mutual pong response within 30s. Don't close valid connections that are simply quiet. -- Tests: quiet but healthy connection → stays open; no pong for 30s → reconnects - -**F11 — RPC client backoff reduction** -- Reduce default timeout from 30s to 15s. Add jitter to backoff. Cap total retry time at 20s. -- Tests: server outage → user sees error within 20s, not 40s - -**F12 — Code splitting** -- Lazy-load all routes: `() => import('./views/Web5.vue')` -- Add manual chunks in vite.config.ts for vendor/api -- Tests: build produces multiple chunks, initial bundle < 200KB gzipped - -**F13 — DOMPurify on QR v-html** -- Add DOMPurify.sanitize() to QR SVG before v-html rendering -- Tests: XSS payload in QR content → sanitized - -### Week 12: Frontend P2 Continued + Performance - -**F14 — Goals computed memoization** -- Replace O(n) alias lookup with Map. Add deep equality check. -- Tests: goalStatuses computed runs in <1ms with 100 apps - -**F15 — localStorage error handling** -- Wrap all localStorage.setItem in try/catch. Show toast on quota exceeded. -- Tests: full localStorage → toast shown, app continues - -**F16 — FileBrowser auth consolidation** -- Use cookie-only auth. Remove in-memory token. -- Tests: login persists across page reload, logout clears cookie - -**F17 — CSRF token parsing robustness** -- Add header fallback for CSRF token. Handle edge cases. -- Tests: missing cookie → falls back to header, both missing → error - -**F22 — CSS backdrop-filter mobile performance** -- Add media query: reduce blur to 8px on mobile. Remove backdrop-filter from non-visible elements. -- Tests: mobile Lighthouse performance score > 80 - ---- - -## MONTH 4-5: BACKEND ARCHITECTURE (Weeks 13–20) - -> Split every Rust god file. Target: no file > 500 lines. - -### Week 13–14: Split package.rs (1,795 lines) - -``` -api/rpc/package/ -├── mod.rs — Re-exports (~50 lines) -├── config.rs — get_app_config(), get_app_capabilities(), needs_archy_net() -├── lifecycle.rs — install, start, stop, restart, uninstall -├── validation.rs — Input validation, dependency checking, image validation -└── progress.rs — Progress streaming, install status tracking -``` - -Pre-split tests: test every `get_app_config()` variant, validation path, lifecycle transition -Post-split: all RPC calls return identical responses, `cargo test` passes - -### Week 15–16: Split mesh/listener.rs (1,799 lines) - -``` -mesh/listener/ -├── mod.rs — Re-exports + spawn_mesh_listener() -├── session.rs — run_mesh_session() loop -├── frames.rs — handle_frame() dispatcher -├── identity.rs — handle_identity_received(), handle_typed_message() -├── sync.rs — sync_queued_messages(), store_typed_message() -└── bitcoin.rs — Bitcoin relay operations, RPC calls -``` - -### Week 17–18: Split rpc/mod.rs (1,092 lines) + lnd.rs (1,068 lines) - -**rpc/mod.rs** → `dispatcher.rs` (method routing), `middleware.rs` (CSRF/session/rate-limit), `response.rs` (response building) - -**lnd.rs** → `lnd/wallet.rs`, `lnd/channels.rs`, `lnd/info.rs`, `lnd/payments.rs` - -### Week 19–20: Split monitoring (993), handler (911), mesh (865) - -Split each into sub-modules. Target: no file > 500 lines. -All pre-split tests, all post-split verification. - ---- - -## MONTH 6-8: FRONTEND ARCHITECTURE (Weeks 21–32) - -> Split every Vue god component. Target: no component > 500 lines. - -### Week 21–22: Split Web5.vue (3,940 lines → 8 sub-views) - -``` -views/web5/ -├── Web5.vue — Router shell (~150 lines) -├── Web5Identity.vue — DID management -├── Web5Wallet.vue — Wallet operations -├── Web5Nostr.vue — Nostr relays/profiles -├── Web5Credentials.vue — Verifiable Credentials -├── Web5Peers.vue — P2P federation nodes -├── Web5Storage.vue — DWN storage/explorer -├── Web5Goals.vue — Goals/voting -└── Web5Marketplace.vue — Decentralized marketplace -``` - -Add nested routes. Component tests for each section. All sections render identically. - -### Week 23–24: Split Mesh.vue (2,106) + Dashboard.vue (1,819) - -**Mesh.vue** → `MeshRadio.vue`, `MeshChat.vue`, `MeshNetwork.vue`, `MeshFederation.vue` -**Dashboard.vue** → `DashboardHome.vue`, `DashboardApps.vue`, `DashboardSystem.vue` - -### Week 25–26: Split Settings.vue (1,792) + Server.vue (1,132) - -**Settings.vue** → `SettingsAccount.vue`, `SettingsSystem.vue`, `SettingsNetwork.vue`, `SettingsAppearance.vue` -**Server.vue** → `ServerOverview.vue`, `ServerContainers.vue`, `ServerLogs.vue` - -### Week 27–28: Split Marketplace.vue (1,293) + AppDetails.vue (1,036) + Home.vue (1,059) - -Each into 3-4 focused sub-components. - -### Week 29–30: Decompose useAppStore (324 lines, 16 methods) - -``` -stores/ -├── app.ts — Thin re-export for backward compat (~50 lines) -├── auth.ts — Login, logout, session, password, TOTP -├── server.ts — Server info, system stats, reboot/shutdown -├── realtime.ts — WebSocket connection, subscriptions, heartbeat -└── packages.ts — Package install/uninstall, marketplace data -``` - -Tests: every existing import of `useAppStore` still works. State transitions identical. - -### Week 31–32: Remaining frontend P3 issues - -**F18** — aiPermissions runtime validation -**F19** — Track AppSession timeout -**F20** — Dashboard aria-current -**F21** — Debounce search + memoize -**F23** — Branded types for DID operations -**F24** — Fix checkInterval leak - ---- - -## MONTH 9-10: SCRIPT ARCHITECTURE + ISO (Weeks 33–40) - -> Split every monolithic script. Target: no script > 400 lines. - -### Week 33–34: Create shared script library - -``` -scripts/lib/ -├── common.sh — Colors, logging, error handling, SSH helpers -├── health.sh — Health check polling, container status -├── deploy-utils.sh — Rsync, file sync, backup/restore -├── container.sh — Podman helpers, image management, mem_limit() -└── network.sh — IP validation, port checking -``` - -Tests: each library function tested in `scripts/tests/` - -### Week 35–36: Split deploy-to-target.sh (1,728 lines) - -``` -scripts/ -├── deploy-to-target.sh — Orchestrator + arg parsing (~300 lines) -├── deploy/ -│ ├── frontend.sh — Build + sync frontend -│ ├── backend.sh — Build + sync binary -│ ├── configs.sh — Sync nginx, systemd, scripts -│ ├── containers.sh — Container creation/update -│ ├── verify.sh — Post-deploy health checks -│ └── rollback.sh — Rollback on failure -``` - -### Week 37–38: Split ISO build (1,850 lines) + first-boot (855 lines) - -**build-auto-installer-iso.sh** → `build/capture-images.sh`, `build/create-rootfs.sh`, `build/install-packages.sh`, `build/bundle-configs.sh`, `build/package-iso.sh` - -**first-boot-containers.sh** → `first-boot/databases.sh`, `first-boot/bitcoin.sh`, `first-boot/lightning.sh`, `first-boot/apps.sh`, `first-boot/networking.sh` - -### Week 39–40: ISO Reproducibility + Integration Tests - -**S16 — Make ISO builds reproducible** -- Create `image-versions.env` with pinned digests for every container image -- ISO build sources this file, never pulls `:latest` -- Build manifest records exactly what shipped -- Tests: two consecutive ISO builds produce identical image sets - -**E2E smoke test script** -```bash -# scripts/smoke-test.sh — Run against .198 -# 1. curl /health → OK -# 2. Login → get session -# 3. Get server info → valid JSON -# 4. List containers → all healthy -# 5. Check every /app/* proxy → responds -# 6. Check Tor hidden service → resolves -# 7. Check WebSocket upgrade → 101 -# Exit 0 only if all pass -``` - ---- - -## MONTH 11: INTEGRATION TESTS (Weeks 41–44) - -> Comprehensive test suites that prove everything works. - -### Week 41–42: Backend Integration Tests - -``` -core/archipelago/tests/ -├── test_auth_flow.rs — Login → session → CSRF → auth request → logout -├── test_container_lifecycle.rs — Install → start → health → stop → uninstall -├── test_federation.rs — Generate invite → join → sync → verify -├── test_rpc_validation.rs — Every endpoint with invalid input → proper error -├── test_session_persist.rs — Create session → restart → session survives -├── test_rate_limiting.rs — Flood → 429 → wait → allowed -├── test_backup_restore.rs — Create → verify → restore → validate -├── test_health_endpoint.rs — Healthy → degraded → recovery -``` - -Target: 25+ backend integration tests passing - -### Week 43–44: Frontend Integration Tests - -``` -neode-ui/src/__tests__/integration/ -├── auth-flow.spec.ts — Login → dashboard → timeout → redirect -├── app-lifecycle.spec.ts — Marketplace → install → progress → launch → uninstall -├── websocket.spec.ts — Connect → update → disconnect → reconnect → state consistent -├── settings-flow.spec.ts — Change password → re-login → 2FA setup → verify -├── spotlight.spec.ts — Open → search → navigate → close -├── mesh-chat.spec.ts — Connect → send → receive → disconnect -├── error-handling.spec.ts — Network error → toast → retry → success -├── code-splitting.spec.ts — Route navigation → chunks loaded lazily -``` - -Target: 20+ frontend integration tests passing - ---- - -## MONTH 12: TYPE SYNC + CI/CD PLAN (Weeks 45–48) - -### Week 45–46: Rust↔TypeScript Type Sync - -**Approach**: `ts-rs` crate to auto-generate TypeScript types from Rust structs - -1. Add `ts-rs` to `core/models/Cargo.toml` -2. Add `#[derive(TS)]` to all API request/response types -3. Build script generates `neode-ui/src/types/generated.ts` -4. Replace manual types in `types/api.ts` with imports from generated file -5. Verification: regenerate → diff → must be zero (types committed) - -Tests: frontend type-check passes with generated types, manual api.ts reduced to non-API types - -### Week 47–48: CI/CD Planning (Document Only — Execute Later) - -> This section is the PLAN for CI/CD. Do not execute during this phase. Document everything needed so it can be implemented in a future sprint. - -**CI Pipeline Design** (`.github/workflows/ci.yml`): - -```yaml -# Triggers: push to main, all PRs -# Jobs: -# rust-checks (Linux runner): -# - cargo clippy --all-targets --all-features (zero warnings gate) -# - cargo fmt --all -- --check (formatting gate) -# - cargo test --all-features (all tests gate) -# -# frontend-checks (Node 20): -# - npm run type-check (TypeScript strictness gate) -# - npm run lint (ESLint gate) -# - npm test (Vitest suite gate) -# -# integration (Linux runner, optional): -# - scripts/smoke-test.sh against staging -# -# Merge policy: all checks must pass before merge -# Branch protection: require PR, require checks, no force push to main -``` - -**Release Pipeline Design** (`.github/workflows/release.yml`): -```yaml -# Triggers: tag push (v*) -# Jobs: -# build-linux-binary: -# - Cross-compile Rust for x86_64 + ARM64 -# build-frontend: -# - npm run build -# build-iso: -# - SSH to build server, run ISO build -# - Upload ISO as release asset -# smoke-test: -# - Boot ISO in QEMU -# - Run smoke-test.sh -# - Gate release on pass -``` - -**Pre-requisites to implement**: -- [ ] GitHub Actions runner with Rust toolchain + cross-compilation -- [ ] Node.js 20 runner for frontend -- [ ] SSH key for build server accessible from CI -- [ ] Branch protection rules configured -- [ ] Image digest manifest for reproducible ISO builds -- [ ] QEMU-based ISO verification script - -**Estimated implementation time**: 2 weeks when ready to execute - ---- - -## VERIFICATION PROTOCOL (Every Week) - -1. `cargo clippy --all-targets --all-features` — zero warnings -2. `cargo fmt --all` -3. `cargo test --all-features` — all pass -4. `cd neode-ui && npm run type-check` — zero errors -5. `cd neode-ui && npm test` — all pass -6. `./scripts/deploy-to-target.sh --target 192.168.1.198` — **ONLY .198** -7. `curl http://192.168.1.198/health` — returns OK with service status -8. Navigate all affected views in browser — identical behavior -9. Atomic commit: `refactor: ` or `fix: ` - ---- - -## EXIT CRITERIA (Month 12 Complete) - -### Reliability (Zero Tolerance) -- [ ] Health endpoint returns real service status -- [ ] All async operations have bounded timeouts -- [ ] Zero blocking I/O in async context (no std::fs in async functions) -- [ ] Zero .unwrap()/.expect() in production code -- [ ] All rate limiters have cleanup tasks -- [ ] Backup restore uses staging + atomic swap + rollback -- [ ] All 30 containers have health checks + memory limits -- [ ] All container images pinned to specific versions -- [ ] Nginx unauthenticated endpoints protected (timeout + rate limit + body size) -- [ ] Systemd service has resource limits -- [ ] Tor rotation preserves old address during transition -- [ ] Deploy has locking + disk check + rollback -- [ ] Zero `sudo podman` in any script -- [ ] Zero `:latest` image tags anywhere -- [ ] Zero silent error masking without logging - -### Frontend (Zero Tolerance) -- [ ] Global error handler catches and displays all errors -- [ ] WebSocket: single subscription, reconnect refreshes state, bounded retries -- [ ] All timers/listeners cleaned up on unmount -- [ ] Code splitting: initial bundle < 200KB gzipped -- [ ] v-html always uses DOMPurify -- [ ] All localStorage operations wrapped in try/catch - -### Architecture (Target: File Size Limits) -- [ ] No Rust file > 500 lines (excluding generated code) -- [ ] No Vue component > 500 lines -- [ ] No shell script > 400 lines -- [ ] No Pinia store has more than 1 responsibility -- [ ] All hardcoded URLs/ports extracted to constants -- [ ] Shared script library eliminates duplication -- [ ] TypeScript types auto-generated from Rust structs - -### Testing -- [ ] 25+ backend integration tests passing -- [ ] 20+ frontend integration tests passing -- [ ] E2E smoke test script passes on .198 -- [ ] ISO builds are reproducible (pinned digests) - -### CI/CD (Planned, Not Executed) -- [ ] CI pipeline design documented -- [ ] Release pipeline design documented -- [ ] Pre-requisites list complete -- [ ] Ready for 2-week implementation sprint - -### Zero Behavior Changes -Every feature works identically. Every existing test passes. Every user flow unchanged. diff --git a/.claude/plans/polished-napping-squid.md b/.claude/plans/polished-napping-squid.md deleted file mode 100644 index 85d1a13d..00000000 --- a/.claude/plans/polished-napping-squid.md +++ /dev/null @@ -1,108 +0,0 @@ -# Meshcore Mesh Networking — Phase 1 Implementation Plan - -## Context - -Adding mesh networking to Archipelago using Heltec V3 devices running Meshcore firmware (Companion USB). Two nodes (.228 and .198) will exchange encrypted identity and text messages over LoRa radio with no internet required. The existing `mesh.rs` wraps the Meshtastic CLI — this replaces it with a native Meshcore serial protocol driver. - -## Architecture - -Convert `mesh.rs` into `mesh/` module directory: - -``` -core/archipelago/src/mesh/ -├── mod.rs — Public API, MeshService, config (migrated from mesh.rs) -├── types.rs — MeshPeer, MeshMessage, MeshStatus, DeviceType -├── protocol.rs — Meshcore binary frame protocol (encode/decode/commands) -├── serial.rs — MeshcoreDevice: async serial driver (serial2-tokio) -├── crypto.rs — X25519 ECDH + ChaCha20-Poly1305 per-message encryption -└── listener.rs — Background tokio task: serial reader + message dispatcher -``` - -Frontend: -``` -neode-ui/src/stores/mesh.ts — Pinia store -neode-ui/src/views/Mesh.vue — Mesh status, peers, messaging UI -``` - -## Dependency - -Add to `core/archipelago/Cargo.toml`: -```toml -serial2-tokio = "0.1" -``` - -All crypto deps already present (chacha20poly1305, ed25519-dalek, curve25519-dalek). - -## Meshcore Protocol Summary - -- **Frame format**: `>` + 2-byte LE length + data (outbound), `<` + 2-byte LE length + data (inbound) -- **Baud**: 115200, 8N1 -- **Max message**: 160 bytes -- **Init sequence**: CMD_DEVICE_QUERY (0x16) -> CMD_APP_START (0x01) -> CMD_SET_DEVICE_TIME (0x06) -- **Key commands**: SEND_TXT_MSG (0x02), SEND_CHANNEL_TXT_MSG (0x03), GET_CONTACTS (0x04), SYNC_NEXT_MESSAGE (0x0A), SEND_SELF_ADVERT (0x07) -- **Push events** (async, >=0x80): NEW_CONTACT (0x8A), ACK (0x82), MESSAGES_WAITING (0x83) - -## Encryption Design - -Reuses existing identity.rs X25519 key agreement: -1. Nodes broadcast identity on mesh channel: `ARCHY:1:{did}:{ed25519_pubkey}:{x25519_pubkey}` -2. Receiving node derives shared secret: X25519(our_secret, their_x25519_pub) -3. All DMs encrypted: ChaCha20-Poly1305 with random 12-byte nonce -4. Wire format: [nonce 12B] + [ciphertext] + [tag 16B] — fits in 160B limit for ~130B plaintext - -## RPC Endpoints - -| Method | Action | -|--------|--------| -| `mesh.status` | Device + mesh status (updated) | -| `mesh.peers` | **NEW** — list discovered mesh peers | -| `mesh.messages` | **NEW** — get message history (last 100) | -| `mesh.send` | **NEW** — send encrypted message to peer | -| `mesh.broadcast` | Broadcast identity (updated for Meshcore) | -| `mesh.configure` | Update config (updated) | - -## Implementation Steps - -1. **Create mesh/ module, migrate existing code** — types.rs + mod.rs from mesh.rs -2. **protocol.rs** — Binary frame encode/decode, command builders, response parsers + unit tests -3. **crypto.rs** — X25519 ECDH + ChaCha20-Poly1305 encrypt/decrypt + unit tests -4. **serial.rs** — MeshcoreDevice with open/init/send/recv + device auto-detection -5. **listener.rs** — Background task: serial reader, peer cache, message store, reconnect -6. **mod.rs MeshService** — Wraps listener + config, start/stop lifecycle -7. **Update RPC handlers** — New endpoints, wire MeshService into RpcHandler -8. **Update RPC dispatch** — Add routes in mod.rs ~line 622 -9. **Frontend store + view** — mesh.ts Pinia store, Mesh.vue with glass-card UI, router + nav -10. **Deploy + test** — Deploy to .228 and .198, plug in Heltec V3s, test end-to-end - -## Key Files to Modify - -- `core/archipelago/src/mesh.rs` -> delete, replace with `mesh/` directory -- `core/archipelago/src/api/rpc/mesh.rs` — update handlers -- `core/archipelago/src/api/rpc/mod.rs` — add routes (~line 622) -- `core/archipelago/Cargo.toml` — add serial2-tokio -- `neode-ui/src/router/index.ts` — add /dashboard/mesh route -- `neode-ui/src/views/Dashboard.vue` — add Mesh nav item - -## Reusable Existing Code - -- `identity.rs` lines 140-152: Ed25519 -> X25519 conversion (CompressedEdwardsY -> Montgomery) -- `identity.rs` `pubkey_bytes_from_did_key()`: extract raw pubkey from DID string -- `node_message.rs` pattern: IncomingMessage store with max 100 circular buffer -- `mesh.rs` `MeshConfig` + `load_config`/`save_config`: migrate directly into mod.rs -- `mesh.rs` `detect_meshtastic_devices()`: keep as fallback, add Meshcore probe-based detection - -## Prerequisites - -- Flash both Heltec V3 with Meshcore **Companion USB** role -- Add `archipelago` user to `dialout` group: `usermod -aG dialout archipelago` -- Connect Heltec V3 to USB on .228 and .198 - -## Verification - -1. `cargo clippy --all-targets` passes with zero warnings -2. Unit tests pass: protocol encode/decode, crypto encrypt/decrypt roundtrip -3. Device detected on /dev/ttyUSB0 or /dev/ttyACM0 -4. Init handshake completes (visible in tracing logs) -5. Identity broadcast from .228, received on .198 -6. Encrypted DM sent .228 -> .198, decrypted and visible in UI -7. Mesh.vue shows device status, peer list, message history diff --git a/.claude/plans/prancy-scribbling-pnueli.md b/.claude/plans/prancy-scribbling-pnueli.md deleted file mode 100644 index 402d266b..00000000 --- a/.claude/plans/prancy-scribbling-pnueli.md +++ /dev/null @@ -1,80 +0,0 @@ -# Plan: Demo Seeding, Dev Environment Fix, and Developer Onboarding - -## Context -After the repo cleanup (docs/scripts archived to `~/Projects/archy-archive/`), several dev scripts reference deleted files. Additionally, the demo needs better seeding for Portainer showcase, ThunderHub + Fedimint need to be visible, and a new developer needs docs to onboard. - -## Changes - -### 1. Fix broken dev scripts - -**`neode-ui/start-dev.sh`** — Remove lines 72-110 (Docker Desktop check + `start-docker-apps.sh` call). Replace with a one-liner noting mock backend handles simulation. - -**`neode-ui/stop-dev.sh`** — Remove lines 66-74 (Docker container stop block calling `stop-docker-apps.sh`). - -**`neode-ui/package.json`** — Remove the `prebuild` script (line 22) that references archived `../../loop-start.mp3`. File already exists at `public/assets/audio/`. - -**`scripts/dev-start.sh`** — Fix option 2 (Full Stack) lines 67-84 that reference `start-docker-apps.sh`. Guard with a skip message instead of failing. - -### 2. Add ThunderHub (Lightning management UI) - -**Files**: mock-backend.js, Marketplace.vue, appLauncher.ts, new icon SVG - -- Port: **3010** (3000 taken by Grafana) -- Docker image: `apotdevin/thunderhub:v0.13.31` -- Add to `portMappings`, `marketplaceMetadata`, `staticDevApps`, `marketplace.get()` in mock-backend.js -- Add to `getCuratedAppList()` in Marketplace.vue (after LND entry) -- Add to `recommended` tier in `getAppTier()` -- Add `'3010': 'thunderhub'` to PORT_TO_APP_ID in appLauncher.ts -- Create `neode-ui/public/assets/img/app-icons/thunderhub.svg` (Bitcoin-orange lightning bolt icon) - -### 3. Improve Fedimint in demo - -**mock-backend.js**: -- Add `fedimint` to `staticDevApps` (pre-installed, running, port 8175) -- Update `marketplace.get()` version from `0.4.3` → `0.10.0` -- Fix `portMappings.fedimint` from 8174 → 8175 (Guardian UI port) - -### 4. Add realistic notifications - -**mock-backend.js** — Replace empty `node.notifications` with 5 realistic entries: Bitcoin sync, LND channel opened, disk warning, system update, Fedimint guardian connected. - -### 5. Rewrite README for developer onboarding - -**`neode-ui/README.md`** — Full rewrite: -- Quick start (npm install, npm start, localhost:8100, password123) -- Architecture overview -- Dev modes (setup/onboarding/existing/boot) -- Mock backend capabilities (8 static apps, 30+ marketplace, WebSocket, FileBrowser API, Claude proxy) -- Demo deployment (docker-compose.demo.yml, Portainer, ANTHROPIC_API_KEY) -- Design system (glassmorphism classes, tokens) -- Build commands -- Remove Angular references and outdated sections - -**`neode-ui/DEV-SCRIPTS.md`** — Update "Available Test Apps" section to list the 8 actual static apps, remove Docker apps references. - -### 6. Verify Docker demo build - -Confirm `docker-compose.demo.yml` paths still valid after cleanup: -- `demo/aiui/` exists (for Dockerfile.web COPY) -- `neode-ui/docker/nginx-demo.conf` exists -- `neode-ui/docker/docker-entrypoint.sh` exists - -## Files to modify -1. `neode-ui/start-dev.sh` -2. `neode-ui/stop-dev.sh` -3. `neode-ui/package.json` -4. `scripts/dev-start.sh` -5. `neode-ui/mock-backend.js` -6. `neode-ui/src/views/Marketplace.vue` -7. `neode-ui/src/stores/appLauncher.ts` -8. `neode-ui/public/assets/img/app-icons/thunderhub.svg` (new) -9. `neode-ui/README.md` -10. `neode-ui/DEV-SCRIPTS.md` - -## Verification -1. `cd neode-ui && npm start` — should start cleanly, no errors about missing scripts -2. Visit localhost:8100 → login → Dashboard shows 8 apps (bitcoin, lnd, electrs, mempool, lorabell, filebrowser, thunderhub, fedimint) -3. Marketplace shows ThunderHub in Bitcoin category -4. Notifications bell shows 3 unread -5. `npm stop` — clean shutdown, no errors -6. `docker compose -f docker-compose.demo.yml build` — builds successfully diff --git a/.claude/plans/reflective-meandering-castle.md b/.claude/plans/reflective-meandering-castle.md deleted file mode 100644 index 0790115e..00000000 --- a/.claude/plans/reflective-meandering-castle.md +++ /dev/null @@ -1,145 +0,0 @@ -# Expand AIUI Node Capabilities - -## Context -AIUI currently sees basic app status and file names but can't read files, check Bitcoin/LND details, or view app logs. Expanding these 4 capabilities makes AIUI a truly useful node assistant. - ---- - -## 1. File Reading (frontend-only) [DONE] - -### `neode-ui/src/api/filebrowser-client.ts` -Add `readFileAsText(path, maxBytes = 102400)` method: -- Fetch from existing `/app/filebrowser/api/raw{path}?auth={token}` endpoint -- Limit response to 100KB (truncate with note) -- Only allow text-like extensions: `.txt`, `.md`, `.json`, `.csv`, `.log`, `.conf`, `.yaml`, `.yml`, `.toml`, `.xml`, `.html`, `.css`, `.js`, `.ts`, `.py`, `.sh` -- Return `{ content: string, truncated: boolean, size: number }` - -### `neode-ui/src/types/aiui-protocol.ts` -Add `'read-file'` and `'tail-logs'` to `AIActionType` union. - -### `neode-ui/src/services/contextBroker.ts` -Add `read-file` action handler: -- Check `files` permission is enabled -- Validate path param exists, validate extension -- Call `fileBrowserClient.readFileAsText(path)` -- Return content in action response - -### `AIUI/packages/app/src/composables/useArchy.ts` -- Add `readFile(path: string)` helper that calls `archyBridge.requestAction('read-file', { path })` -- Update `buildArchyContext()` files section: mention "You can read file contents by requesting the read-file action with a file path." - ---- - -## 2. App Log Viewing (frontend-only) [DONE] - -### `neode-ui/src/services/contextBroker.ts` -Add `tail-logs` action handler: -- Check `apps` permission is enabled -- Params: `{ appId: string, lines?: string }` (default 50, max 200) -- Call existing `rpcClient.call({ method: 'container-logs', params: { app_id, lines } })` -- Return log lines in action response - -### `AIUI/packages/app/src/composables/useArchy.ts` -- Add `tailLogs(appId: string, lines?: number)` helper -- Update `buildArchyContext()` apps section: "You can view recent app logs by requesting the tail-logs action with an appId." - ---- - -## 3. Bitcoin Deep Data (backend + frontend) [DONE] - -### `core/archipelago/src/api/rpc/mod.rs` -Add routing: `"bitcoin.getinfo" => self.handle_bitcoin_getinfo().await` - -### New: `core/archipelago/src/api/rpc/bitcoin.rs` -Add `handle_bitcoin_getinfo()`: -- Use `reqwest` to POST to `http://127.0.0.1:8332` with Basic Auth `archipelago:archipelago123` -- Call `getblockchaininfo` JSON-RPC method -- Call `getmempoolinfo` JSON-RPC method -- Return sanitized JSON: -```json -{ - "block_height": 800000, - "sync_progress": 0.9999, - "chain": "main", - "difficulty": 72006146, - "mempool_size": 45000000, - "mempool_tx_count": 12500, - "verification_progress": 0.9999 -} -``` -- Handle connection errors gracefully (Bitcoin Core might be syncing or down) - -### `neode-ui/src/services/contextBroker.ts` -Enrich `bitcoin` category sanitizer: -- Call `rpcClient.call({ method: 'bitcoin.getinfo' })` -- Merge with existing container status data -- Return block height, sync %, chain, mempool stats - -### `AIUI/packages/app/src/composables/useArchy.ts` -- Add `bitcoinInfo` ref with block height, sync %, etc. -- Update `buildArchyContext()`: "**Bitcoin:** Block 800,000 (99.99% synced), mainnet, mempool: 12,500 txs" - ---- - -## 4. LND Deep Data (backend + frontend) [DONE] - -### `core/archipelago/src/api/rpc/mod.rs` -Add routing: `"lnd.getinfo" => self.handle_lnd_getinfo().await` - -### New: `core/archipelago/src/api/rpc/lnd.rs` -Add `handle_lnd_getinfo()`: -- Read admin macaroon from `/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon` -- Use `reqwest` to GET `https://127.0.0.1:8080/v1/getinfo` with `Grpc-Metadata-macaroon` header (hex-encoded) -- GET `https://127.0.0.1:8080/v1/balance/channels` for channel balance -- GET `https://127.0.0.1:8080/v1/balance/blockchain` for on-chain balance -- Accept self-signed cert (`reqwest::Client::builder().danger_accept_invalid_certs(true)`) -- Return sanitized JSON: -```json -{ - "alias": "my-node", - "num_active_channels": 5, - "num_peers": 8, - "synced_to_chain": true, - "block_height": 800000, - "balance_sats": 1500000, - "channel_balance_sats": 3000000, - "pending_open_balance": 0 -} -``` -- **Never expose**: private keys, seed, macaroon, node pubkey (optional — could include for identification) -- Handle errors: LND might be locked, syncing, or not installed - -### `neode-ui/src/services/contextBroker.ts` -Enrich `wallet` category: -- Call `rpcClient.call({ method: 'lnd.getinfo' })` -- Return alias, channels, balances, sync status - -### `AIUI/packages/app/src/composables/useArchy.ts` -- Add `lndInfo` ref -- Update `buildArchyContext()`: "**Lightning:** 5 channels, 3M sats in channels, 1.5M on-chain, synced" - ---- - -## File Summary - -| File | Change | -|------|--------| -| `neode-ui/src/api/filebrowser-client.ts` | Add `readFileAsText()` | -| `neode-ui/src/types/aiui-protocol.ts` | Add `read-file`, `tail-logs` action types | -| `neode-ui/src/services/contextBroker.ts` | Add 2 action handlers + enrich bitcoin/wallet categories | -| `neode-ui/src/stores/aiPermissions.ts` | Update category descriptions | -| `core/archipelago/src/api/rpc/mod.rs` | Add 2 route entries | -| `core/archipelago/src/api/rpc/bitcoin.rs` | New: Bitcoin Core RPC proxy | -| `core/archipelago/src/api/rpc/lnd.rs` | New: LND REST proxy | -| `AIUI/packages/app/src/composables/useArchy.ts` | Add helpers + enrich buildArchyContext() | - -## Verification -1. `cd neode-ui && npm run build` — frontend builds -2. `./scripts/deploy-to-target.sh --live` — deploys + builds Rust backend on server -3. Test in AIUI chat: - - "What files do I have?" → sees file list - - "Read my config.txt" → gets file content - - "How's my Bitcoin node?" → block height, sync %, mempool - - "What's my Lightning balance?" → channel count, sats balance - - "Why is Mempool not working?" → views recent logs - - "Show me the last 50 lines of Bitcoin logs" → log output diff --git a/.claude/plans/rosy-floating-lightning.md b/.claude/plans/rosy-floating-lightning.md deleted file mode 100644 index 93d50e34..00000000 --- a/.claude/plans/rosy-floating-lightning.md +++ /dev/null @@ -1,174 +0,0 @@ -# Plan: Optimize Claude Code Instructions for Maximum Coding Performance - -## Context - -### The Problem -Research across Anthropic's official docs, engineering blog, GitHub issues, and academic papers converges on one finding: **instruction overload degrades Claude's coding performance**. The more tokens consumed by rules/instructions, the less attention and context remain for actual code generation. - -Key evidence: -- Anthropic official docs: *"Bloated CLAUDE.md files cause Claude to ignore your actual instructions!"* -- Boris Cherny (Claude Code creator) uses ~100 lines / ~2,500 tokens for his CLAUDE.md -- Research (Jaroslawicz et al., 2025): instruction compliance decreases linearly as count increases; frontier models plateau at ~150-200 instructions; Claude Code's system prompt already uses ~50 -- "Lost in the Middle" (Stanford, 2024): LLMs exhibit U-shaped attention — middle content gets least attention -- Anthropic engineering blog: *"Find the smallest possible set of high-signal tokens that maximize the likelihood of some desired outcome"* -- Aggressive language (BANNED, NEVER, CRITICAL, Non-Negotiable) overtriggers on Claude 4.5/4.6 — Anthropic explicitly recommends dialing it back -- Multiple GitHub issues (15443, 28158, 16073, 34197) document systematic instruction ignoring with large CLAUDE.md files - -### Current State (Archy Project) - -**Always-loaded instruction payload:** -| Source | Lines | Chars | Est. Tokens | -|--------|-------|-------|-------------| -| Global CLAUDE.md | 97 | 5,624 | ~1,400 | -| Project CLAUDE.md | 130 | 5,270 | ~1,300 | -| 5 rules files | 119 | 5,123 | ~1,280 | -| MEMORY.md index | 16 | 1,099 | ~275 | -| 33 skill descriptions (system) | ~300 | ~13,200 | ~3,300 | -| **Total always-loaded** | **~662** | **~30,316** | **~7,555** | - -Plus ~10 memory files (~290 lines, ~19K chars) loaded on relevance, and 33 skills totaling ~122K chars loaded on demand. - -### Key Problems Identified - -1. **Global CLAUDE.md is ~60% things Claude already knows** — "Comment WHY not WHAT," "Functions under 50 lines," "Zero compiler warnings" are standard practices Claude follows without being told -2. **Anti-Hallucination section (28 lines) restates built-in behavior** — package verification is in Claude's training -3. **Redundancy across files** — security rules appear in global CLAUDE.md + crypto.md + api.md + project CLAUDE.md (4x) -4. **Aggressive language throughout** — "BANNED," "Non-Negotiable," "MANDATORY," "NEVER" — Anthropic says this causes overtriggering on current models -5. **Project CLAUDE.md duplicates rules files** — Frontend section repeats frontend.md, Security section repeats crypto.md + api.md -6. **Philosophy section is ~30 lines that don't affect code generation** — Claude won't suggest altcoins or proprietary deps regardless - -### What We Preserve (per user request) -- All deploy commands, build commands, SSH access, CI/CD info -- All infrastructure keys/addresses/IPs -- Security and quality architecture rules that prevent real mistakes -- All memory files and feedback (operational learnings) -- All skills (they already use progressive disclosure correctly) - ---- - -## The Plan - -### Principle: Every line must prevent a specific mistake Claude would otherwise make - -If Claude would do the right thing without the instruction -> delete it. -If Claude does the wrong thing even with the instruction -> make it a hook. -If it only matters for specific files -> scope it with globs in rules/. - -### Step 1: Rewrite Global CLAUDE.md (~97 -> ~35 lines) - -**Remove (Claude already knows these):** -- "Comment WHY not WHAT" — standard practice -- "Functions under 50 lines, single responsibility" — standard practice -- "Zero compiler warnings, zero linter errors" — standard practice -- "Remove dead code entirely" — standard practice -- "Deploy and verify changes" — project-specific, belongs in project CLAUDE.md -- Entire "Core Principles" enumeration (5 items) — the one-line philosophy header covers it -- "Encryption first" details — covered by crypto.md rules file -- Most of "Anti-Hallucination" section (28 lines) — Claude already verifies packages; keep only "cross-reference existing deps" which is non-obvious -- "Code Sourcing: What to avoid" items 3-4 — too specific, rarely triggered - -**Keep (prevents real mistakes):** -- Bitcoin-only stance (1 line) — prevents suggesting altcoin libs -- Open source preference (1 line) -- Code sourcing core rules (no vibe-code repos, no vendoring without approval) -- Dependency selection order (rustls > openssl, etc.) — non-obvious preferences -- Security standards not in rules files (never commit secrets, pin versions) -- Project ecosystem listing — useful cross-project context -- Atomic commit format - -**Rewrite style:** Calm, direct. No MANDATORY, no bold on every line. - -### Step 2: Rewrite Project CLAUDE.md (~130 -> ~75 lines) - -**Remove (duplicated in scoped rules files):** -- Frontend section (lines 70-77) — exact duplicate of .claude/rules/frontend.md -- Security section (lines 87-94) — duplicates crypto.md + api.md + containers.md -- "See .claude/rules/ for detailed..." pointer — Claude loads them automatically - -**Remove (Claude already knows):** -- "No unwrap()/expect() — use ? with .context()" — standard Rust practice -- "tracing for logging, never println!" — standard practice -- "tokio runtime" — obvious from the codebase - -**Keep and tighten (all non-obvious, prevents real mistakes):** -- Overview + Stack (essential context) -- Beta freeze status (active project constraint) -- Quick Reference commands (frequently used, non-guessable) -- Infrastructure table (IPs, keys, remotes — user explicitly wants these) -- Architecture diagram (essential mental model) -- Critical Rules (5 items — all non-obvious) -- Backend: only non-obvious rules (bind 127.0.0.1, path validation, timeouts) -- ISO Build commands (operational knowledge) -- App Integration Checklist (prevents real mistakes) -- Git conventions (one line) - -### Step 3: Tone Adjustment (all files) - -Per Anthropic's explicit guidance for Claude 4.5/4.6: - -| Before | After | -|--------|-------| -| `.gradient-button` is BANNED | Use `.glass-button` for all buttons, not `.gradient-button` | -| Non-Negotiable | _(remove header, rules speak for themselves)_ | -| MANDATORY checks | _(remove, rules are clear)_ | -| NEVER use floating point | Sats are always integers (`u64`/`BigInt`), not floats | -| NEVER build Rust on macOS | Do not build Rust on macOS — deploy script handles cross-compilation | - -This is not cosmetic — Anthropic docs state aggressive language causes overtriggering. - -### Step 4: Tighten Rules Files - -- **frontend.md** — Tone adjustment only (already 8 good rules, glob-scoped) -- **containers.md** — Reorder critical rules to top, tone adjustment. Keep UID table and systemd requirements (genuine lookup references) -- **api.md, bitcoin.md, crypto.md** — Tone adjustment only (already concise and glob-scoped) - -### Step 5: Clean Up Memory Index - -- Fix duplicate Session 2026-03-28 entry in MEMORY.md -- Add missing entries for untracked files (feedback_asset_workflow.md, project_iso_size_reduction.md, etc.) -- All memory file content preserved as-is - -### Step 6: No Changes To - -- **Skills** — Load on demand (correct architecture). 33 skill descriptions at ~100 tokens each is the design intent. -- **Hooks** — Already well-structured. -- **Settings** — Good as-is. -- **Rules file glob scoping** — Already correct. - ---- - -## Expected Impact - -| Metric | Before | After | Reduction | -|--------|--------|-------|-----------| -| Global CLAUDE.md | 97 lines / 5,624 chars | ~35 lines / ~2,100 chars | 64% | -| Project CLAUDE.md | 130 lines / 5,270 chars | ~75 lines / ~3,200 chars | 42% | -| Rules files | 119 lines / 5,123 chars | ~115 lines / ~5,000 chars | 3% | -| **Total always-loaded** | **346 lines / 16,017 chars** | **~225 lines / ~10,300 chars** | **35%** | - -Key outcomes: -- Every remaining line prevents a specific, real mistake -- No redundancy between files -- Calm, direct tone matched to current model behavior -- Critical rules at top/bottom of files (exploits primacy/recency attention bias) -- ~1,400 tokens freed for actual code context per session - -## Files to Modify - -1. `/Users/dorian/.claude/CLAUDE.md` — Rewrite (97 -> ~35 lines) -2. `/Users/dorian/Projects/archy/CLAUDE.md` — Rewrite (130 -> ~75 lines) -3. `/Users/dorian/Projects/archy/.claude/rules/frontend.md` — Tone adjustment (BANNED -> positive) -4. `/Users/dorian/Projects/archy/.claude/rules/containers.md` — Reorder + tone -5. `/Users/dorian/.claude/rules/bitcoin.md` — Tone adjustment -6. `/Users/dorian/.claude/rules/crypto.md` — Tone adjustment -7. `/Users/dorian/.claude/projects/-Users-dorian-Projects-archy/memory/MEMORY.md` — Fix index - -## Verification - -1. Start a new Claude Code session on archy -2. Check infrastructure IPs, SSH keys, deploy commands are all accessible -3. Ask Claude to write a Vue component — should follow glass-button, script setup, style.css -4. Ask Claude to write Rust backend code — should use ?, bind 127.0.0.1 -5. Ask Claude about deploying — should know deploy-to-target.sh, .228, .198 -6. Ask Claude to add a container — should follow rootless Podman, UID mapping -7. Observe: faster responses, less hedging, more focused output diff --git a/.claude/plans/sequential-jingling-moth.md b/.claude/plans/sequential-jingling-moth.md deleted file mode 100644 index 571f03ca..00000000 --- a/.claude/plans/sequential-jingling-moth.md +++ /dev/null @@ -1,244 +0,0 @@ -# Manage — Claude Code Configuration Dashboard - -## Context - -You have 77 skills, 15 hooks, 17 memory files, 19 plans, and settings across 5 projects + global scope. All stored as flat files (markdown with YAML frontmatter, JSON, bash scripts) under `~/.claude/` and `{project}/.claude/`. Currently the only way to manage these is manually editing files. This project creates a visual web dashboard for browsing, creating, editing, and organizing all of it. - -**Project location**: `/Users/dorian/Projects/Manage` -**Stack**: Vue 3 + Vite + TypeScript + Tailwind + Pinia (frontend) + Express + tsx (backend) -**Design**: Glassmorphism dark theme (matching Archipelago aesthetic) - ---- - -## Architecture - -``` -Browser (localhost:5173) Express Server (localhost:3141) -+-----------------------+ +----------------------------+ -| Vue 3 SPA | fetch | /api/projects | -| +-- Dashboard | ------> | /api/skills (CRUD) | -| +-- Skills | | /api/hooks (CRUD) | -| +-- Hooks | SSE | /api/memory (CRUD) | -| +-- Memory | <------ | /api/plans (CRUD) | -| +-- Plans | | /api/settings (R/W) | -| +-- Settings | | /api/claude-md (R/W) | -| +-- CLAUDE.md | | /api/search | -+-----------------------+ | /api/events (SSE) | - +-------------+--------------+ - | chokidar - +-------------v--------------+ - | ~/.claude/ | - | ~/Projects/*/.claude/ | - +----------------------------+ -``` - -Single command start: `npm start` runs both server + Vite via concurrently. - ---- - -## Phase 1: Foundation — Project Setup + Dashboard - -### 1.1 Scaffold project -- `npm create vite@latest` with Vue + TypeScript -- Install deps: `express`, `cors`, `gray-matter`, `chokidar`, `concurrently`, `tsx`, `@vueuse/core`, `vue-router`, `pinia`, `fuse.js` -- Configure `vite.config.ts` with `@` alias and `/api` proxy to `:3141` -- Configure Tailwind with glassmorphism tokens from archy - -### 1.2 Design system (`src/style.css`) -- Port glassmorphism classes from `neode-ui/src/style.css`: `.glass-card`, `.glass-button`, `.path-option-card`, `.info-card`, `.scope-badge` -- New classes: `.skill-card`, `.hook-node`, `.memory-tree-item`, `.plan-progress-bar`, `.editor-panel` -- Background: `#0a0a0a`, accent: `#fb923c` - -### 1.3 Backend: Project discovery -- **`server/index.ts`** — Express on :3141 with CORS + JSON body parser -- **`server/lib/discovery.ts`** — Scan `~/Projects/` for dirs with `.claude/`, decode `~/.claude/projects/` encoded paths, count skills/hooks/memory/plans per project -- **`GET /api/projects`** — Return project list with counts - -### 1.4 Frontend: App shell + Dashboard -- **`AppShell.vue`** — Sidebar (project switcher + nav links) + router-view content area -- **`Sidebar.vue`** — "Global" at top, then project list; active project highlighted; click to switch scope -- **`Dashboard.vue`** — Stats row (total skills/hooks/memory/plans) + project cards grid -- **`ProjectCard.vue`** — Glass card showing project name, path, skill/hook/memory counts, click to select -- **`stores/projects.ts`** — Pinia store: `projects[]`, `activeProject`, `fetchProjects()`, `setActiveProject()` - -**Verify**: `npm start` opens browser, sidebar shows 5 projects + global, dashboard shows stats. - ---- - -## Phase 2: Skills Manager - -### 2.1 Backend -- **`server/lib/skill-parser.ts`** — Parse SKILL.md YAML frontmatter via `gray-matter`, handle both `skills/{name}/SKILL.md` (dir-based) and `skills/{name}.md` (flat) formats -- **`server/lib/fs-utils.ts`** — Safe read/write/delete/mkdir helpers with atomic writes -- **`server/routes/skills.ts`** — Full CRUD + `POST /api/skills/move` for scope transfers - -### 2.2 Frontend -- **`Skills.vue`** — Top bar: scope filter, grid/list toggle, category dropdown, search. Grid of SkillCards. FAB for "New Skill" -- **`SkillCard.vue`** — Name, description (truncated), scope badge, category color stripe, allowed-tools pills. Click opens editor. -- **`SkillEditor.vue`** — Slide-in panel: frontmatter form (name, description, category, tags, allowed-tools, disable-model-invocation toggle) + Monaco editor for markdown body + live preview -- **`InheritanceMap.vue`** — Two-column view: global skills left, project skills right, connecting lines for name-matched overrides -- **Drag-and-drop**: Drag SkillCard between global/project columns to move/copy. Uses `vue-draggable-plus`. - -**Verify**: Browse all 77 skills, create/edit/delete, drag between scopes, see inheritance. - ---- - -## Phase 3: Hooks Manager - -### 3.1 Backend -- **`server/lib/hook-parser.ts`** — Parse `settings.json` hook entries + read referenced `.sh` files. Detect orphaned scripts. -- **`server/routes/hooks.ts`** — CRUD + `PUT /toggle` for enable/disable. Creates .sh + updates settings.json atomically. - -### 3.2 Frontend -- **`Hooks.vue`** — Grouped by event type (PreToolUse, PostToolUse, UserPromptSubmit, Stop, SessionEnd) -- **`HookPipeline.vue`** — Visual flow per hook: `[Event Badge] -> [Matcher Pill] -> [Script Name] -> [Action]` with CSS-drawn connecting arrows -- **`HookCard.vue`** — Event type badge (color-coded), matcher, script filename, enabled/disabled toggle switch -- **`HookEditor.vue`** — Monaco editor for `.sh` script + form for event type and matcher pattern -- Orphaned scripts in "Unlinked Scripts" section with "Link" button - -**Verify**: See all 15 hooks in pipeline view, toggle enable/disable, edit scripts, create new hook. - ---- - -## Phase 4: Memory Browser - -### 4.1 Backend -- **`server/lib/memory-parser.ts`** — Parse from both locations: `{project}/.claude/memory/` (git-tracked) and `~/.claude/projects/{encoded}/memory/` (private). Parse YAML frontmatter. -- **`server/routes/memory.ts`** — CRUD + auto-sync MEMORY.md index on create/delete - -### 4.2 Frontend -- **`Memory.vue`** — Split layout: tree panel (left 300px) + content panel (right) -- **`MemoryTree.vue`** — Collapsible tree: Project -> Scope -> Type -> Files. Type badges: user (blue), feedback (orange), project (green), reference (purple) -- **`MemoryEditor.vue`** — Frontmatter form (name, description, type dropdown) + Monaco editor + markdown preview toggle -- Search input at top filters across titles and content - -**Verify**: Browse all 17 memory files in tree, types color-coded, edit with preview, create new, MEMORY.md auto-updates. - ---- - -## Phase 5: Plans Tracker - -### 5.1 Backend -- **`server/lib/plan-parser.ts`** — Extract title from `#`, phases from `##`, tasks from `- [ ]`/`- [x]` with line numbers. Calculate completion percentages. -- **`server/routes/plans.ts`** — CRUD + `PUT /task` for toggling single checkbox by line number - -### 5.2 Frontend -- **`PlanCard.vue`** — Title, overall progress bar, phase count, "12/47 tasks" text -- **`PlanDetail.vue`** — Expanded: title, summary, phases as sections with TaskCheckboxes -- **`PhaseBar.vue`** — Segmented bar: green (done) / amber (in-progress) / gray (pending) -- **`TaskCheckbox.vue`** — Click toggles checkbox, instant API call to update file -- "Edit Raw" switches to Monaco. "New Plan" uses overnight template. - -**Verify**: See all 19 plans with progress bars, toggle checkboxes that persist, create new plan. - ---- - -## Phase 6: Settings + CLAUDE.md Editor - -### 6.1 Settings -- **`Settings.vue`** — Scope tabs (Global / Project). Sections: - - Permissions: toggle switches for allowed tools - - Hooks: visual tree of event -> matcher -> command with add/remove - - Plugins: installed plugin cards with enable/disable - - Effort Level: dropdown - - Raw JSON: toggle to edit settings.json directly in Monaco - -### 6.2 CLAUDE.md -- **`ClaudeMd.vue`** — Scope tabs. Monaco editor with markdown syntax. Live preview panel. Unsaved changes indicator. Save button. - -**Verify**: Edit settings, toggle permissions, edit CLAUDE.md with preview, confirm files updated. - ---- - -## Phase 7: Polish — File Watching, Search, Animations - -### 7.1 Live file watching -- **`server/lib/file-watcher.ts`** — chokidar watches all `.claude/` dirs. Debounce 300ms. Push SSE events. -- **`useFileWatcher.ts`** composable — EventSource connection, triggers store refresh on changes - -### 7.2 Global search -- **`GET /api/search?q=bitcoin`** — Full-text across skills, memory, plans, CLAUDE.md -- **`TopBar.vue`** — Cmd+K search input with dropdown results - -### 7.3 Drag-and-drop refinement -- `vue-draggable-plus` for skills between scopes and plan task reordering - -### 7.4 Final polish -- Loading skeletons, empty states, confirm dialogs on deletes -- Keyboard shortcuts: Cmd+K (search), Cmd+S (save), Escape (close panels) -- View transitions (fade + slide) - -**Verify**: External file edits trigger UI refresh. Cmd+K searches everything. Drag skills between scopes. - ---- - -## Project Structure - -``` -Manage/ -+-- package.json -+-- tsconfig.json -+-- vite.config.ts -+-- tailwind.config.ts -+-- index.html -+-- .gitignore -+-- server/ -| +-- index.ts -| +-- tsconfig.json -| +-- routes/ -| | +-- projects.ts, skills.ts, hooks.ts, memory.ts -| | +-- plans.ts, settings.ts, claude-md.ts, search.ts -| +-- lib/ -| | +-- discovery.ts, skill-parser.ts, hook-parser.ts -| | +-- memory-parser.ts, plan-parser.ts, settings-parser.ts -| | +-- file-watcher.ts, fs-utils.ts -| +-- types/ -| +-- index.ts -+-- src/ -| +-- main.ts, App.vue, style.css -| +-- api/client.ts -| +-- router/index.ts -| +-- stores/ (projects, skills, hooks, memory, plans, settings, search) -| +-- types/ (skill, hook, memory, plan, project, settings) -| +-- composables/ (useFileWatcher, useMarkdownPreview, useMonaco) -| +-- views/ (Dashboard, Skills, Hooks, Memory, Plans, Settings, ClaudeMd) -| +-- components/ -| +-- layout/ (AppShell, Sidebar, TopBar) -| +-- shared/ (GlassCard, GlassButton, ScopeBadge, MonacoEditor, etc.) -| +-- dashboard/ (ProjectCard, QuickStats) -| +-- skills/ (SkillCard, SkillEditor, SkillList, InheritanceMap) -| +-- hooks/ (HookPipeline, HookCard, HookEditor) -| +-- memory/ (MemoryTree, MemoryCard, MemoryEditor) -| +-- plans/ (PlanCard, PlanDetail, PhaseBar, TaskCheckbox) -| +-- settings/ (PermissionToggle, HookConfig, PluginCard) -+-- public/ - +-- favicon.svg -``` - ---- - -## Key Libraries - -| Library | Purpose | -|---------|---------| -| `express` + `cors` | Backend HTTP server | -| `tsx` | Run TypeScript server without build step | -| `concurrently` | Run server + Vite in one command | -| `gray-matter` | Parse YAML frontmatter from markdown | -| `chokidar` | Watch filesystem for live updates | -| `monaco-editor` + `@monaco-editor/loader` | Code editor (md, bash, json, yaml) | -| `marked` + `highlight.js` | Markdown rendering with syntax highlighting | -| `vue-draggable-plus` | Drag-and-drop for skills and plan tasks | -| `fuse.js` | Client-side fuzzy search | -| `@vueuse/core` | Vue utilities (useEventSource, useDebounceFn) | - ---- - -## Key Decisions - -- **Express over Bun**: More predictable on macOS, better middleware ecosystem -- **SSE over WebSocket**: File watching is server->client only. SSE auto-reconnects, simpler. -- **Monaco over CodeMirror**: VS Code-like editing for all 4 file types -- **Atomic settings.json writes**: Read-modify-write with temp file + rename -- **MEMORY.md auto-sync**: Create/delete memory files auto-updates the index -- **Both skill formats**: Parser handles dir-based and flat-file skills diff --git a/.claude/plans/shiny-bouncing-raven.md b/.claude/plans/shiny-bouncing-raven.md deleted file mode 100644 index ae48761b..00000000 --- a/.claude/plans/shiny-bouncing-raven.md +++ /dev/null @@ -1,103 +0,0 @@ -# Plan: Fix Iframe Apps, Detail Pages, Kiosk, Identity Pairing, NIP-07 - -## Context - -Three web-only apps (BotFights, 484 Kitchen, Arch Presentation) show black screens in iframe despite nginx reverse proxies being set up. The kiosk on .228 isn't running. Web-only apps need proper detail pages. The user wants Nostr identity formally paired with DID and NIP-07 browser integration for frictionless login to embedded apps. - ---- - -## Task 1: Fix iframe black screen (HIGH) - -**Root cause**: Proxied HTML contains root-relative paths (`href="/css/main.css"`). Browser resolves these against the origin root, not `/ext/botfights/`, so all assets 404. - -**Fix**: Add `sub_filter` to nginx proxy blocks to rewrite root-relative paths. - -**File**: `image-recipe/configs/nginx-archipelago.conf` (6 location blocks — 3 HTTP, 3 HTTPS) - -Key additions per block: -```nginx -proxy_set_header Accept-Encoding ""; # Disable gzip so sub_filter works -sub_filter_once off; -sub_filter_types text/html text/css application/javascript; -sub_filter 'href="/' 'href="/ext/{app}/'; -sub_filter 'src="/' 'src="/ext/{app}/'; -sub_filter 'action="/' 'action="/ext/{app}/'; -sub_filter "href='/" "href='/ext/{app}/"; -sub_filter "src='/" "src='/ext/{app}/"; -``` - -Deploy + nginx reload. Verify in browser DevTools (Network tab — no 404s on assets). - ---- - -## Task 2: Detail pages for web-only apps (MEDIUM) - -**Problem**: Clicking a web-only app card navigates to `/dashboard/apps/{id}`. AppDetails.vue can't resolve it because web-only apps aren't in `store.packages` or `dummyApps`. - -**Fix**: -1. Add 7 web-only apps to `dummyApps` in AppDetails.vue (botfights, nwnn, 484-kitchen, call-the-operator, arch-presentation, syntropy-institute, t-zero) — same pattern as IndeeHub -2. Add URL mappings in AppDetails.vue `appUrls` for all 7 (if not already present) -3. Hide uninstall/start/stop buttons for web-only apps in AppDetails.vue - -**Files**: `neode-ui/src/views/AppDetails.vue` - ---- - -## Task 3: Kiosk on .228 (MEDIUM) - -**Problem**: Code exists but was never installed on server. No X11/Chromium packages. - -**Steps** (SSH to .228, no code changes): -1. `sudo apt-get install -y xorg chromium unclutter xinit` -2. `cd ~/archy && sudo ./scripts/setup-kiosk.sh archipelago` -3. `sudo systemctl enable --now archipelago-kiosk.service` -4. Verify on monitor - ---- - -## Task 4: Pair Nostr identity with DID (LOW) - -**Current state**: Ed25519 (DID) and secp256k1 (Nostr) are separate key pairs, both generated at startup. Not formally linked. - -**Fix**: Include the Nostr secp256k1 pubkey in the DID Document as an additional verification method: -- Modify `did_document_from_pubkey_hex()` in `identity.rs` to accept optional Nostr pubkey -- Add `EcdsaSecp256k1VerificationKey2019` entry to `verificationMethod` array -- Pass Nostr pubkey from server startup context - -**Files**: `core/archipelago/src/identity.rs`, `core/archipelago/src/server.rs` - ---- - -## Task 5: NIP-07 Nostr login via iframe injection (EXPLORATORY) - -**Goal**: Web apps in iframe (like IndeeHub) can call `window.nostr.getPublicKey()` and `window.nostr.signEvent()` for frictionless Nostr login. - -**Approach**: Inject a `window.nostr` shim into proxied pages via `sub_filter`, communicating with the parent Archipelago frame via `postMessage`. - -**Steps**: -1. Create `neode-ui/public/nostr-provider.js` — implements `window.nostr` interface, uses `postMessage` to parent -2. Add `sub_filter '' '';` to nginx ext proxy blocks -3. Add `postMessage` listener in AppLauncherOverlay that handles `nostr-getPublicKey` and `nostr-signEvent` by calling backend RPC -4. Backend already has `identity.nostr-sign` and `node.nostr-pubkey` RPC endpoints - -**Security**: Validate postMessage origin, prompt user before signing, never expose secret key to frontend. - -**Files**: new `neode-ui/public/nostr-provider.js`, `image-recipe/configs/nginx-archipelago.conf`, AppLauncherOverlay component, `neode-ui/src/stores/appLauncher.ts` - ---- - -## Execution Order - -1. Task 1 — fix iframe black screen (deploy nginx) -2. Task 2 — detail pages (deploy frontend) -3. Task 3 — kiosk on .228 (SSH ops) -4. Task 4 — DID+Nostr pairing (deploy backend) -5. Task 5 — NIP-07 injection (deploy full) - -## Verification - -- Task 1: Open BotFights/484 Kitchen/Arch Presentation in iframe — page renders with styles and interactivity -- Task 2: Click web-only app card → detail page shows with title, description, launch button, no container buttons -- Task 3: .228 monitor shows kiosk app grid -- Task 4: `node.did` RPC returns DID Document with Nostr pubkey in verificationMethod -- Task 5: Open IndeeHub in iframe, browser console `window.nostr.getPublicKey()` returns hex pubkey diff --git a/.claude/plans/silly-wondering-flamingo.md b/.claude/plans/silly-wondering-flamingo.md deleted file mode 100644 index c6e6903f..00000000 --- a/.claude/plans/silly-wondering-flamingo.md +++ /dev/null @@ -1,243 +0,0 @@ -# ISO Overhaul: Custom Minimal Base + Branding + Size Optimization - -## Context - -The Archipelago ISO is ~3.9GB — too large. The root cause is a ~800MB Debian Live ISO used as the boot base, plus a ~2.1GB rootfs with no `--no-install-recommends`. We're replacing the Debian Live dependency entirely with a custom debootstrap-built installer, adding full Archipelago branding to the boot chain, and stripping the rootfs. Target: sub-2GB ISO. - -All work on `dev-iso` branch with its own CI workflow. Main branch stays untouched. - ---- - -## Phase 0: Branch + CI Setup - -**Create `dev-iso` branch and separate CI workflow.** - -1. Branch from current `main` -2. Create `.gitea/workflows/build-iso-dev.yml`: - - Trigger: `push: branches: [dev-iso]` + `workflow_dispatch` - - Same structure as `build-iso.yml` (131 lines) but: - - Remove "Cache Debian Live ISO" step (no longer needed) - - Add `debootstrap`, `squashfs-tools`, `isolinux`, `syslinux-common`, `mtools`, `grub-efi-amd64-bin`, `grub-pc-bin` to tool dependencies - - Output naming: `archipelago-dev-unbundled-{date}.iso` - - Keep: backend build, frontend build, type check, tests, build report -3. Push and verify CI triggers on .228 runner - -**Files:** -- New: `.gitea/workflows/build-iso-dev.yml` - ---- - -## Phase 1: Rootfs Size Optimizations - -**Shrink rootfs.tar from ~2.1GB to ~1.5GB. Only touches the Dockerfile heredoc in Step 1 (lines 210-335).** - -### 1.1 Add `--no-install-recommends` -- Line 229: `apt-get install -y` → `apt-get install -y --no-install-recommends` -- Line 269: Same for Tailscale install -- Explicitly add packages that may be needed as recommends: `fonts-liberation`, `xfonts-base` (for Chromium kiosk) -- **Saves: ~150-300MB** - -### 1.2 Remove `firmware-misc-nonfree` -- Line 257: Remove `firmware-misc-nonfree` from package list -- Keep: `firmware-realtek`, `firmware-iwlwifi`, `intel-microcode`, `amd64-microcode` -- **Saves: ~50-80MB** - -### 1.3 Strip docs/man/locales -- Add after line 264 (after apt-get clean): - ```dockerfile - RUN find /usr/share/doc -depth -type f ! -name copyright -delete 2>/dev/null; \ - find /usr/share/doc -empty -delete 2>/dev/null; \ - rm -rf /usr/share/man /usr/share/info /usr/share/lintian /usr/share/linda; \ - find /usr/share/locale -maxdepth 1 -mindepth 1 ! -name 'en_US' ! -name 'locale.alias' -exec rm -rf {} + - ``` -- **Saves: ~50-80MB** - -### 1.4 Remove `wget` and `htop` -- Lines 244, 246: Remove `wget` (curl covers it) and `htop` (luxury tool) -- Keep `git` (used by self-update system) -- **Saves: ~5MB** (minor but removes unnecessary surface) - -### Verification -- Build ISO, compare rootfs.tar size -- Boot in QEMU, verify: kiosk renders, SSH works, nginx serves UI, podman runs - -**Files modified:** -- `image-recipe/build-auto-installer-iso.sh` (Step 1 Dockerfile heredoc, lines 210-335) - ---- - -## Phase 2: Replace Debian Live with Custom Debootstrap Base - -**The big one. Replaces Steps 2, 5, and parts of 4 and 6.** - -### 2.1 New Step 2: Build Minimal Installer Environment - -Replace lines 420-502 entirely. Run debootstrap inside a container to produce: -- `vmlinuz` — kernel (reused from linux-image-amd64) -- `initrd.img` — custom initramfs with ISO-mount hook -- `filesystem.squashfs` — minimal Debian root (~120-180MB) - -The installer squashfs contains only what's needed to run the auto-install script: -- `debootstrap --variant=minbase --include=systemd,systemd-sysv,udev,bash,coreutils,mount,util-linux,cryptsetup,parted,dosfstools,e2fsprogs,kmod,procps,iproute2,ca-certificates,gdisk` -- Auto-login on tty1 via getty override -- systemd service that auto-starts the installer (replaces profile.d hack) - -**Key: Custom initramfs hook** (`local-bottom/archipelago-mount`) that: -1. Scans `/dev/sr0`, `/dev/sd*` for a partition containing `archipelago/auto-install.sh` -2. Mounts it read-only at `/run/archiso` -3. This replaces Debian Live's `boot=live components` mechanism - -### 2.2 New Step 5: Assemble ISO Directory - -Replace lines 2236-2448 entirely. Much simpler — no squashfs overlay mechanism, no tools extraction (tools are in the squashfs), no profile.d manipulation. - -New Step 5 just assembles the directory structure: -``` -$INSTALLER_ISO/ - live/ - vmlinuz - initrd.img - filesystem.squashfs - boot/grub/ - grub.cfg - themes/archipelago/ (Phase 3) - efi.img (built with grub-mkimage) - isolinux/ - isolinux.bin - ldlinux.c32 - isolinux.cfg - EFI/BOOT/ - BOOTX64.EFI (built with grub-mkimage) - archipelago/ - auto-install.sh - rootfs.tar - bin/archipelago - web-ui/ - scripts/ - container-images/ (if bundled) -``` - -Generate EFI boot image with `grub-mkimage` and ISOLINUX files from the `isolinux` package. No more extracting MBR from Debian Live. - -### 2.3 Updated Step 6: ISO Creation - -Replace lines 2461-2511 (MBR extraction + EFI image search). Use: -- MBR: `/usr/lib/ISOLINUX/isohdpfx.bin` (from `isolinux` package) -- EFI: `boot/grub/efi.img` (built in Step 5) -- xorriso command stays the same structure - -### 2.4 Update Boot Media Paths in Step 4 (auto-install.sh) - -Lines 1154-1155: Add `/run/archiso` as first search path: -```bash -for dev in /run/archiso /cdrom /media/cdrom /run/live/medium /lib/live/mount/medium; do -``` - -Also update lines 2326, 2377 (no longer needed — replaced by systemd service in installer squashfs). - -### 2.5 Remove Debian Live cleanup from auto-install.sh - -The installed system's auto-install script currently removes `live-boot`, `live-boot-initramfs-tools`, `live-config` (around line 1872). With the custom base, these packages won't exist in the rootfs, so this cleanup becomes a harmless no-op — but should be cleaned up for clarity. - -### Verification -- Build ISO, verify size < 2GB -- Boot in QEMU (UEFI mode): verify GRUB menu → installer → full install → reboot -- Boot in QEMU (BIOS mode): verify ISOLINUX → installer → full install → reboot -- After install: SSH, web UI, kiosk, container loading all work -- Test `test-iso-qemu.sh` (may need minor path updates) - -**Files modified:** -- `image-recipe/build-auto-installer-iso.sh` (Steps 2, 4, 5, 6 — major rewrite) - ---- - -## Phase 3: Archipelago Boot Branding - -**Custom GRUB theme, installer banner, installed system GRUB.** - -### 3.1 Create GRUB Theme - -New directory: `image-recipe/branding/grub-theme/` -- `theme.txt` — dark background (#0a0a0a), white text, Bitcoin orange (#f7931a) highlight -- `background.png` — 1920x1080 dark with subtle Archipelago logo watermark -- Font files (`.pf2`) — generated with `grub-mkfont` from DejaVu Sans during build - -GRUB menu entries: -- "Install Archipelago" (default, quiet boot) -- "Install Archipelago (verbose)" (no `quiet`, for debugging) -- "Boot from local disk" (chainloader) - -### 3.2 Create ISOLINUX Theme - -New file: `image-recipe/branding/isolinux.cfg` -- Matching dark theme for legacy BIOS boot -- Same menu entries as GRUB - -### 3.3 Branded Installer Banner - -The systemd service's start script displays: -``` - ARCHIPELAGO BITCOIN NODE OS - Automatic Installer v0.1.0 - - Press Enter to start installation... -``` - -### 3.4 Install GRUB Theme to Target System - -In Step 4 (auto-install.sh), before `update-grub` (around line 1888): -- Copy GRUB theme from ISO to `/mnt/target/boot/grub/themes/archipelago/` -- Add `GRUB_THEME="/boot/grub/themes/archipelago/theme.txt"` to `/mnt/target/etc/default/grub` -- The installed system boots with Archipelago branding, not Debian default - -### 3.5 Create Background Image - -Render from existing SVG favicon (`neode-ui/public/assets/icon/favico-black-v2.svg`) to PNG at appropriate sizes. Dark background with subtle centered logo. - -### Verification -- Boot ISO: GRUB shows Archipelago theme (dark + orange) -- No Debian branding visible anywhere -- After install: target system GRUB also shows Archipelago theme - -**Files:** -- New: `image-recipe/branding/grub-theme/theme.txt` -- New: `image-recipe/branding/grub-theme/background.png` -- New: `image-recipe/branding/isolinux.cfg` -- Modified: `image-recipe/build-auto-installer-iso.sh` (Steps 5, 4) - ---- - -## Risk Areas - -| Risk | Severity | Mitigation | -|------|----------|------------| -| Custom initramfs fails to find USB media | High | Test multiple USB controller types in QEMU; add verbose fallback boot option | -| Missing packages in minbase break install | Medium | Trace auto-install.sh dependencies; test full install flow | -| GRUB EFI image missing modules | High | Include all common modules in grub-mkimage; test UEFI + BIOS | -| Kiosk breaks without recommends | Medium | Explicitly add Chromium/X11 font deps; test kiosk before merge | -| initramfs overlayfs mount fails | High | Follow well-established patterns from Arch/Ubuntu live ISOs | - ---- - -## Implementation Order - -1. **Phase 0** — branch + CI (~1 hour) -2. **Phase 1** — rootfs size opts (~2 hours, push + verify) -3. **Phase 2** — custom base (~8-10 hours, iterative QEMU testing) -4. **Phase 3** — branding (~3 hours) - -Phases are sequential — each builds on the previous. Push after each phase, verify CI passes. - ---- - -## Key Files - -| File | Role | -|------|------| -| `image-recipe/build-auto-installer-iso.sh` | Main build script — most changes here | -| `.gitea/workflows/build-iso-dev.yml` | New CI workflow for dev-iso branch | -| `image-recipe/branding/grub-theme/*` | New GRUB theme assets | -| `image-recipe/branding/isolinux.cfg` | New ISOLINUX config | -| `image-recipe/test-iso-qemu.sh` | QEMU test script (minor updates) | -| `.gitea/workflows/build-iso.yml` | Reference for new CI workflow | -| `scripts/image-versions.sh` | Unchanged — container image versions | diff --git a/.claude/plans/smooth-roaming-wadler.md b/.claude/plans/smooth-roaming-wadler.md deleted file mode 100644 index 1f52da3e..00000000 --- a/.claude/plans/smooth-roaming-wadler.md +++ /dev/null @@ -1,241 +0,0 @@ -# Container Orchestration Dev Testing Infrastructure - -## Context - -Container orchestration has been unreliable for months. Every fix requires a full deploy to .228 (5+ minutes), manual SSH debugging, and prayer. No way to test orchestration logic locally or catch regressions before deploy. We need three layers of testing so orchestration is bulletproof before it ever touches a server. - -## Three Layers - -### Layer C: Mock Podman in Rust Unit Tests (runs on macOS, instant) - -Tests the orchestration LOGIC without any containers. Runs in `cargo test`, takes seconds. - -**What it tests:** Retry backoff timing, restart tracker persistence, tier ordering, stop grace periods, failsafe install flow, health monitor state machine, crash recovery. - -**Implementation:** - -Create `core/archipelago/src/container/mock_podman.rs` — a fake podman command executor: - -```rust -pub struct MockPodman { - containers: Arc>>, - fail_pull: Arc, // simulate registry down - fail_start: Arc, // simulate container crash on start - pull_delay_ms: Arc, // simulate slow pull -} - -struct MockContainer { - name: String, - image: String, - state: ContainerState, // Created/Running/Exited/Stopped - exit_code: i32, - created_at: DateTime, -} -``` - -Key trait to add in `runtime.rs`: -```rust -#[async_trait] -pub trait CommandExecutor: Send + Sync { - async fn execute(&self, program: &str, args: &[&str]) -> Result; -} -``` - -Production uses `RealExecutor` (calls `tokio::process::Command`). Tests use `MockPodman`. - -**Test file:** `core/archipelago/tests/orchestration_tests.rs` - -Tests to write: -1. `test_stop_grace_periods` — bitcoin gets 600s, lnd 330s, unknown gets 30s -2. `test_pull_retry_backoff` — fail twice, succeed third, verify 5s/15s delays -3. `test_pull_all_attempts_fail` — fail 3x, verify error returned -4. `test_restart_tracker_persistence` — save to disk, reload, verify counters survive -5. `test_restart_tracker_stability_reset` — after 1h, counters clear -6. `test_failsafe_install_rollback` — container exits immediately, verify cleanup -7. `test_failsafe_install_image_missing` — pull succeeds but image not found, verify error -8. `test_health_monitor_tier_ordering` — databases restart before apps -9. `test_health_monitor_skips_user_stopped` — user-stopped containers not restarted -10. `test_health_monitor_max_attempts` — stops after 3 failures -11. `test_crash_recovery_loads_snapshot` — PID file + snapshot → containers restarted -12. `test_crash_recovery_skips_user_stopped` — user-stopped not recovered - -**Files to modify:** -- `core/archipelago/src/container/mod.rs` — add `pub mod mock_podman;` -- `core/archipelago/src/container/mock_podman.rs` — NEW mock implementation -- `core/archipelago/tests/orchestration_tests.rs` — NEW test file -- `core/archipelago/src/health_monitor.rs` — extract logic into testable functions (pure functions that take data, not functions that call podman) -- `core/archipelago/src/api/rpc/package/runtime.rs` — make `stop_timeout_secs` public for testing - -**Key refactors to make code testable:** -- Extract `stop_timeout_secs()` → `pub fn` so tests can call it directly -- Extract health monitor `check_and_restart()` into a function that takes container list + tracker + user_stopped, returns actions to take (restart X, notify Y, skip Z) — pure logic, no IO -- Extract `RestartTracker` + `RestartHistory` into own file for independent testing -- Make `pull_image_with_progress` retry logic independent of progress streaming - ---- - -### Layer A: SSH Dev Loop in dev-start.sh (real containers on .228) - -New option 9 in `dev-start.sh`: "Container orchestration dev (live on .228)" - -**What it does:** -1. Rsync code to .228 (2 seconds) -2. Build backend on .228 (incremental: 5-15 seconds) -3. Restart archipelago service -4. Run orchestration smoke tests via RPC -5. Show container status + health monitor logs -6. Loop: edit locally → press Enter → rsync+rebuild+test - -**What it tests:** Real podman, real containers, real networking. The actual install/start/stop/restart/health cycle. - -**Implementation:** - -Add option 9 to `scripts/dev-start.sh`: -```bash -9) - echo "Container Orchestration Dev (live testing on .228)" - exec "$SCRIPT_DIR/dev-container-test.sh" - ;; -``` - -Create `scripts/dev-container-test.sh` (~150 lines): -```bash -#!/bin/bash -# Fast edit-build-test loop for container orchestration on .228 -# -# Usage: ./scripts/dev-container-test.sh [--once] -# -# Syncs code, builds, restarts, runs orchestration smoke tests. -# Press Enter to re-run, Ctrl+C to stop. - -SSH="ssh -o StrictHostKeyChecking=no -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228" - -sync_and_build() { - rsync (same excludes as deploy script) - ssh: cargo build --release -p archipelago (incremental) - ssh: sudo systemctl restart archipelago - ssh: wait for health endpoint (15s timeout) -} - -run_smoke_tests() { - # Test 1: Container list works - curl -s /rpc/v1 -d '{"method":"container.list"}' - - # Test 2: Install filebrowser (small, fast, no deps) - curl -s /rpc/v1 -d '{"method":"package.install","params":{"id":"filebrowser","dockerImage":"..."}}' - # Wait for running state - - # Test 3: Stop with grace period - curl -s /rpc/v1 -d '{"method":"package.stop","params":{"id":"filebrowser"}}' - # Verify stopped - - # Test 4: Start - curl -s /rpc/v1 -d '{"method":"package.start","params":{"id":"filebrowser"}}' - # Verify running - - # Test 5: Health check - curl -s /rpc/v1 -d '{"method":"container.health"}' - - # Test 6: Check restart-tracker.json exists - ssh: cat /var/lib/archipelago/restart-tracker.json - - # Test 7: Check health monitor logs for errors - ssh: journalctl -u archipelago --since "2 min ago" | grep -i "error\|panic\|fail" - - # Test 8: Uninstall - curl -s /rpc/v1 -d '{"method":"package.uninstall","params":{"id":"filebrowser"}}' -} - -# Main loop -while true; do - sync_and_build - run_smoke_tests - echo "Press Enter to re-run, Ctrl+C to stop" - read -done -``` - -**Files:** -- `scripts/dev-start.sh` — add option 9 -- `scripts/dev-container-test.sh` — NEW - ---- - -### Layer B: CI Integration Tests (runs on .228 via Gitea Actions) - -Extend the existing CI to run container orchestration tests on every push to dev-iso. - -**What it tests:** Full lifecycle on real hardware after every code change. Catches regressions automatically. - -**Implementation:** - -Create `.gitea/workflows/container-tests.yml`: -```yaml -name: Container Orchestration Tests -on: - push: - branches: [dev-iso, main] - paths: - - 'core/**' - - 'scripts/container-*.sh' - - 'scripts/reconcile-*.sh' - -jobs: - unit-tests: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Rust unit tests (orchestration) - run: cargo test -p archipelago -- orchestration --no-fail-fast - - integration-tests: - runs-on: ubuntu-latest - needs: unit-tests - steps: - - uses: actions/checkout@v4 - - name: Deploy to test node - run: | - # Rsync + build on .228 - # Run orchestration smoke tests - bash scripts/run-container-tests.sh -``` - -Create `scripts/run-container-tests.sh` (~200 lines): -Reuses the smoke test logic from dev-container-test.sh but structured for CI: -- JSON output for CI parsing -- Exit codes for pass/fail -- Timeout handling (5 min max) -- Cleanup after test (remove test containers) -- Tests: install, start, stop, restart, uninstall, health check, restart tracker, reconciliation - -**Files:** -- `.gitea/workflows/container-tests.yml` — NEW -- `scripts/run-container-tests.sh` — NEW - ---- - -## Execution Order - -1. **Layer C first** (mock tests) — Get the logic tested, runs locally, fast feedback -2. **Layer A second** (dev loop) — Test against real containers with fast iteration -3. **Layer B last** (CI) — Automate regression catching - -## Files Summary - -| File | Action | Layer | -|------|--------|-------| -| `core/archipelago/src/container/mock_podman.rs` | NEW | C | -| `core/archipelago/src/container/mod.rs` | MODIFY | C | -| `core/archipelago/tests/orchestration_tests.rs` | NEW | C | -| `core/archipelago/src/health_monitor.rs` | REFACTOR (extract pure logic) | C | -| `core/archipelago/src/api/rpc/package/runtime.rs` | MODIFY (pub fn) | C | -| `scripts/dev-start.sh` | MODIFY (add option 9) | A | -| `scripts/dev-container-test.sh` | NEW | A | -| `.gitea/workflows/container-tests.yml` | NEW | B | -| `scripts/run-container-tests.sh` | NEW | B | - -## Verification - -- Layer C: `cargo test -p archipelago -- orchestration` — all pass on macOS -- Layer A: `./scripts/dev-start.sh` → option 9 → green smoke tests on .228 -- Layer B: Push to dev-iso → CI green on container-tests workflow diff --git a/.claude/plans/synchronous-greeting-rose.md b/.claude/plans/synchronous-greeting-rose.md deleted file mode 100644 index a4cf0f7c..00000000 --- a/.claude/plans/synchronous-greeting-rose.md +++ /dev/null @@ -1,173 +0,0 @@ -# Mesh Phase 4 Completion + Phase 5 Implementation - -## Context - -Mesh Phases 1-3 are complete: serial driver, transport layer (Mesh>LAN>Tor), Double Ratchet encryption, typed messages, store-and-forward, chat UI. Phase 4 is 40% done — data structures, builders, and tests exist (`bitcoin_relay.rs`, `alerts.rs`, `message_types.rs`) but nothing is wired into the listener, MeshService, or RPC layer. Phase 5 (steganographic modes, adaptive routing, multi-hardware) is not started. - -## Phase 4: Wire Up Off-Grid Bitcoin Operations (Weeks 8-11) - -### Week 8: Typed Message Dispatch in Listener - -**The critical foundation — everything else depends on this.** - -**`mesh/listener.rs`:** -- Add `MeshCommand::SendRaw { dest_pubkey_prefix: [u8; 6], payload: Vec }` and `BroadcastChannel { channel: u8, payload: Vec }` variants -- In `handle_frame()`: after extracting message bytes, check for `0x02` TypedEnvelope prefix -- New `handle_typed_message()` dispatches by type: - - `BlockHeader` → validate Ed25519 sig, store in `BlockHeaderCache`, emit event - - `TxRelay` → spawn task: Bitcoin RPC `sendrawtransaction`, send `TxRelayResponse` back - - `TxRelayResponse` → complete pending in `RelayTracker`, store as MeshMessage - - `LightningRelay` → spawn task: LND REST `payinvoice`, send response back - - `LightningRelayResponse` → complete pending, store - - `Alert` → verify sig, store, emit `MeshEvent::AlertReceived` -- Handle `SendRaw` and `BroadcastChannel` in `tokio::select!` command dispatch - -**`mesh/types.rs`:** New `MeshEvent` variants: `BlockHeaderReceived`, `AlertReceived`, `TxRelayCompleted`, `LightningRelayCompleted` - -**Key design:** Spawn separate tokio tasks for Bitcoin/LND HTTP calls (don't block serial read loop). Response sent back via `cmd_tx` channel. - -### Week 9: MeshService Integration + Dead Man's Switch Task - -**`mesh/mod.rs`:** -- Add fields: `block_header_cache: Arc`, `relay_tracker: Arc`, `dead_man_switch: Arc`, `signing_key: ed25519_dalek::SigningKey` -- Init in `new()`, pass cache + tracker into listener via `MeshState` -- Accessor methods for RPC layer - -**Dead Man background task** (spawned in `start()`): -- Check every 60s: if triggered → build signed alert → broadcast on channel 0 + direct to emergency contacts -- Persist `last_check_in_time` as unix timestamp on disk (survives restarts) - -### Week 10: RPC Endpoints - -**`api/rpc/mesh.rs`** — New handlers: - -| Endpoint | Params | Description | -|----------|--------|-------------| -| `mesh.relay-tx` | `{ tx_hex }` | Queue TX for relay via internet peer | -| `mesh.block-headers` | `{ count? }` | Return cached block headers | -| `mesh.relay-lightning` | `{ bolt11, amount_sats }` | Queue LN invoice for payment | -| `mesh.deadman-status` | — | Query switch state | -| `mesh.deadman-configure` | `{ enabled, interval_secs, lat, lng, contacts, custom_message }` | Configure | -| `mesh.deadman-checkin` | — | Heartbeat reset | - -**Fix `mesh.send-invoice`:** Replace placeholder bolt11 with real LND `POST /v1/invoices` call. - -**`api/rpc/mod.rs`:** Register all new routes (~line 643). - -### Week 11: Block Header Announcer + Frontend - -**Backend:** Optional background task: poll Bitcoin Core `getblockchaininfo` every 30s → on new block → signed announcement → broadcast channel 0. Config: `announce_block_headers: bool`. - -**Frontend `stores/mesh.ts`:** New methods for all Phase 4 RPC calls. - -**Frontend `views/Mesh.vue`:** -- "Off-Grid Bitcoin" panel: block height, headers, TX relay form, LN relay form -- "Dead Man's Switch" panel: enable/disable, interval, GPS, contacts, countdown, check-in -- Uses `.path-option-card`, `.glass-button`, `.info-card` - -## Phase 5: Mesh Network Intelligence (Weeks 12-15) - -### Week 12: Steganographic Modes - -**New: `mesh/steganography.rs`** - -- `SteganographyMode` enum: `Normal`, `WeatherStation`, `SensorNetwork` -- **Weather Station:** Map payload bytes → plausible weather readings (temp, humidity, pressure, wind). Marker `0xAA` replaces `0x02`. -- **Sensor Network:** Industrial sensor format (voltage, current, vibration) -- `to_wire_steganographic(mode)` / `from_wire_steganographic(data)` on TypedEnvelope -- Listener detects `0xAA` → decode stego → normal dispatch -- Config: `steganography_mode` in `MeshConfig` -- Budget: ~80 bytes real data per 160-byte LoRa frame with stego overhead - -### Week 13: Adaptive Routing & Signal Intelligence - -**New: `mesh/routing.rs`** - -- `LinkQuality` per peer: RSSI/SNR rolling 1h history, packet loss, hop count -- `RoutingTable`: link quality per peer + best route per destination DID -- Score: `(rssi+120)*0.4 + (snr+20)*0.3 + (1-loss)*100*0.3` -- Best relay selection for TX/LN relay (highest quality peer with internet) -- Multi-hop forwarding: if dest DID != ours and hops < 3, forward to best next-hop -- Extract RSSI from v3 frames (bytes 1-2, currently unused) -- RPC: `mesh.routing-table` - -### Week 14: LoRa Radio Parameter Control - -**`mesh/protocol.rs`:** Builders for `SET_RADIO_PARAMS` (0x0B), `SET_TX_POWER` (0x0C), `SET_TUNING_PARAMS` (0x15). Parse `RESP_STATS` (0x18). - -**RPC:** `mesh.set-radio-params`, `mesh.set-tx-power`, `mesh.get-radio-stats` - -**Auto-adaptive SF:** If link quality drops → increase spreading factor (longer range, slower). Config toggle. - -**Frontend:** Radio tuning panel with SF/TX power sliders, stats, auto-adaptive toggle. - -### Week 15: Multi-Hardware + Topology UI - -**New: `mesh/device_trait.rs`** - -```rust -#[async_trait] -pub trait MeshDevice: Send + Sync { - async fn open(path: &str) -> Result where Self: Sized; - async fn initialize(&mut self) -> Result; - async fn send_text(&mut self, dest: &[u8; 6], msg: &[u8]) -> Result<()>; - async fn try_recv_frame(&mut self) -> Result>; - // ... -} -``` - -- Implement for `MeshcoreDevice`, stub Meshtastic/WiFi/BLE -- `listener.rs` uses `Box` -- **Topology UI:** SVG graph (this node center, peers as satellites), edge thickness = quality, color = green/yellow/red, tooltips with RSSI/SNR/hops -- Stego mode selector, block relay status panel - -## Key Challenges - -1. **TX hex > 160 bytes:** Use Reed-Solomon chunking (already in `transport/chunking.rs`) -2. **Async in listener:** Spawn tasks for Bitcoin/LND calls, don't block serial loop -3. **Dead man false triggers:** Persist check-in time as unix timestamp on disk -4. **Stego overhead:** ~80 bytes real data per 160-byte frame - -## Files Modified - -**Phase 4:** -- `core/archipelago/src/mesh/listener.rs` — typed dispatch, new MeshCommand variants -- `core/archipelago/src/mesh/mod.rs` — new fields, init, background tasks -- `core/archipelago/src/mesh/types.rs` — new MeshEvent variants -- `core/archipelago/src/api/rpc/mesh.rs` — 6+ new endpoints, fix send-invoice -- `core/archipelago/src/api/rpc/mod.rs` — register routes -- `neode-ui/src/stores/mesh.ts` — new store methods -- `neode-ui/src/views/Mesh.vue` — off-grid + dead man panels - -**Phase 5 new files:** -- `core/archipelago/src/mesh/steganography.rs` -- `core/archipelago/src/mesh/routing.rs` -- `core/archipelago/src/mesh/device_trait.rs` - -## Existing Code to Reuse - -- `bitcoin_relay.rs`: `BlockHeaderCache`, `RelayTracker`, all `build_*` functions -- `alerts.rs`: `DeadManSwitch`, `AlertConfig`, `load_config`/`save_config` -- `message_types.rs`: All payload types, `TypedEnvelope`, `encode_payload`/`decode_payload` -- `api/rpc/lnd.rs:128-141`: `lnd_client()` pattern for LND REST calls -- `api/rpc/bitcoin.rs:74-107`: `bitcoin_rpc_call()` for Bitcoin Core RPC -- `transport/chunking.rs`: Reed-Solomon FEC for payloads > 160 bytes - -## Verification - -```bash -# Unit tests on server -ssh archipelago@192.168.1.228 'cd ~/archy/core && source ~/.cargo/env && cargo test --all-features -- mesh' - -# Type check frontend -cd neode-ui && npm run type-check - -# Deploy to both -./scripts/deploy-to-target.sh --both - -# E2E tests: -# 1. .228 (internet) relays TX from .198 (mesh-only) -# 2. .228 announces block headers, .198 receives them -# 3. Dead man's switch triggers after interval, broadcasts alert -# 4. Steganographic packet looks like weather data on wire -``` diff --git a/.claude/plans/tailscale-migration.md b/.claude/plans/tailscale-migration.md deleted file mode 100644 index 8d9e87c9..00000000 --- a/.claude/plans/tailscale-migration.md +++ /dev/null @@ -1,119 +0,0 @@ -# Plan: Seamless Tailscale Migration for Alpha Testers - -## Context - -Tailscale nodes (Arch 1/2/3) are alpha tester machines. They need full deployment — binary, frontend, infrastructure, and containers — with zero friction. Currently `deploy-tailscale.sh` only deploys binary + frontend (85 lines), missing ALL infrastructure that `deploy-to-target.sh --live` provides (rootless prereqs, UID mapping, containers, nginx, Tor, HTTPS, dev mode, UFW, etc.). - -These nodes may also have old **rootful** containers that need migrating to rootless. - -## Approach - -**Don't refactor the 1615-line deploy-to-target.sh** — too risky during beta freeze. Instead: - -1. **Rewrite `deploy-tailscale.sh`** as a full-deploy script with split-mode SSH resilience -2. **Add `--tailscale` flag** to `deploy-to-target.sh` as a convenience wrapper -3. **Add rootful→rootless migration** as an automatic pre-step -4. **Fix `first-boot-containers.sh`** for rootless (separate concern, for ISO builds) - -## Changes - -### 1. Rewrite `scripts/deploy-tailscale.sh` (~400 lines) - -Currently 85 lines doing only binary+frontend. Rewrite to be a full deploy for any node, using split-mode SSH (each step = separate short SSH session) for Tailscale stability. - -**Steps the new script will run (each as its own SSH session):** - -1. SSH connectivity check -2. Install prerequisites (rsync, node, npm) if missing -3. Rsync code to target -4. **Rootful→rootless migration** (detect `sudo podman ps -a`, stop & remove old rootful containers) -5. Build frontend (nohup + poll, or skip if copy-only node) -6. Build backend (nohup + poll, or skip if copy-only node) -7. Create rollback backup -8. Deploy binary (build locally or copy from .228) -9. Deploy frontend (build locally or copy from .228) -10. Deploy AIUI -11. Sync nginx config + HTTPS snippets -12. Sync systemd service -13. **Setup rootless prereqs** (sysctl, linger, podman.socket) -14. **Create data dirs + UID mapping** (full chown table from deploy-to-target.sh:670-689) -15. **Dev mode** (ARCHIPELAGO_DEV_MODE=true for HTTP cookies over Tailscale) -16. Deploy nostr-provider.js -17. Deploy Claude API proxy (if ANTHROPIC_API_KEY available) -18. Setup NTP + swap -19. Restart services -20. **Setup HTTPS** (with node's own IP in SAN) -21. **Read Bitcoin RPC credentials** from server secrets -22. **Create all containers** (Bitcoin, Mempool, BTCPay, ElectrumX, LND, Fedimint, Immich, HA, Grafana, Jellyfin, Vaultwarden, SearXNG, FileBrowser) -23. **Setup Tor** hidden services -24. **Fix UFW** forward policy -25. **Fix IndeedHub** NIP-07 (if running) -26. **Transfer custom images** for copy-only nodes (individual tarballs, never combined) -27. Run container doctor -28. Write deploy manifest -29. Post-deploy health check - -**Copy-only mode**: When target can't build (Arch 1/3), script detects no `cargo`/`npm` on target and copies pre-built artifacts from .228 via SSH pipe. - -**Key sections to port from deploy-to-target.sh:** -- Lines 646-689 — rootless prereqs + UID mapping -- Lines 629-641 — dev mode -- Lines 839-1474 — all container creation -- Lines 1143-1234 — Tor setup -- Lines 1477-1485 — UFW fix -- Lines 1487-1545 — IndeedHub NIP-07 - -### 2. Add `--tailscale` flag to `deploy-to-target.sh` (~30 lines) - -Wrapper that calls `deploy-tailscale.sh` for each node sequentially. Also add `--tailscale-node=arch1|arch2|arch3` for single-node targeting. - -### 3. Rootful→rootless migration (in deploy-tailscale.sh step 4) - -Auto-detect and handle: -``` -ssh TARGET 'ROOTFUL=$(sudo podman ps -a 2>/dev/null | wc -l); if [ $ROOTFUL -gt 1 ]; then sudo podman stop --all; sudo podman rm --all; fi' -``` -Data safe — `/var/lib/archipelago/` never deleted, only ownership fixed by UID mapping step. - -### 4. Fix `scripts/first-boot-containers.sh` (5 targeted edits) - -- **Line 15**: Change root check → archipelago user check (UID 1000) -- **Line 140**: Change `10.88.0.0/16` → `0.0.0.0/0` (match deploy-to-target.sh) -- **After line 111**: Add rootless prereqs (sysctl, linger, podman.socket) -- **After line 113**: Add full UID mapping block -- **Pin `:latest` tags**: photoprism, ollama, searxng, nginx-proxy-manager, penpot - -### 5. Update `scripts/setup-https-dev.sh` - -Dynamic SAN — detect node's own IPs (including Tailscale interface) instead of hardcoding .228/.198. - -## Files Modified - -| File | Change | ~Lines | -|------|--------|--------| -| `scripts/deploy-tailscale.sh` | Full rewrite — complete deploy with split-mode SSH | ~400 | -| `scripts/deploy-to-target.sh` | Add `--tailscale` / `--tailscale-node` flags | ~30 | -| `scripts/first-boot-containers.sh` | Fix for rootless (subnet, UID mapping, prereqs) | ~40 | -| `scripts/setup-https-dev.sh` | Dynamic SAN with Tailscale IPs | ~15 | -| `docs/BETA-PROGRESS.md` | Update TASK-11 status | ~5 | - -## Auth State Preservation - -All user state in `/var/lib/archipelago/` is **never touched** by deploys: -- `sessions.json`, `user.json`, `identities/`, `secrets/`, `federation/` - -## Verification - -1. Deploy to Arch 2 first (has build tools, safest test) -2. Then Arch 1/3 (copy-only mode) -3. For each node: `podman ps` shows containers, `curl /health` returns 200, UI loads, login works -4. Run container doctor — 0 fixes needed - -## Order - -1. Rewrite `deploy-tailscale.sh` (main deliverable) -2. Add `--tailscale` flags to `deploy-to-target.sh` -3. Fix `first-boot-containers.sh` -4. Update `setup-https-dev.sh` -5. Test: Arch 2 → Arch 1 → Arch 3 -6. Update BETA-PROGRESS.md diff --git a/.claude/plans/toasty-inventing-cascade.md b/.claude/plans/toasty-inventing-cascade.md deleted file mode 100644 index c4d813ad..00000000 --- a/.claude/plans/toasty-inventing-cascade.md +++ /dev/null @@ -1,89 +0,0 @@ -# Plan: ISO Polish — Fix Everything for Beta Release - -## Context -Fresh ISO install on .198 revealed 11 issues ranging from critical (app installs, Tor broken) to UX (GRUB scaling, boot splash, kiosk reliability). Goal: next ISO build produces a flawless out-of-box experience. - -## Issues & Fixes (priority order) - -### 1. CRITICAL: Tor services.json not written (escaping bug) -**Symptom:** `setup-tor.sh: line 12: $ARCHY_TOR_DIR/services.json: No such file or directory` -**Root cause:** In `build-auto-installer-iso.sh`, the setup-tor heredoc escapes `$ARCHY_TOR_DIR` as `\$ARCHY_TOR_DIR`, producing a literal `$` in the output script. The variable never expands at runtime. -**Fix:** In the heredoc that generates setup-tor.sh (~line 1200), use unescaped `$ARCHY_TOR_DIR` so it expands at runtime. The heredoc itself uses `< PBKDF2-HMAC-SHA512 (2048 rounds, empty passphrase) - -> Master Seed (64 bytes) - | - +-- HKDF-SHA256(seed, info="archipelago/node/ed25519/v1") -> Node Ed25519 key -> did:key - +-- HKDF-SHA256(seed, info="archipelago/nostr-node/secp256k1/v1") -> Node Nostr key - +-- HKDF-SHA256(seed, info="archipelago/identity/{i}/ed25519/v1") -> Identity i Ed25519 -> did:key - +-- BIP-32 m/44'/1237'/0'/0/{i} -> Identity i Nostr key (NIP-06) - +-- BIP-32 m/84'/0'/0' -> Bitcoin Core wallet (native segwit) - +-- HKDF-SHA256(seed, info="archipelago/lnd/entropy/v1") -> 16 bytes -> LND aezeed entropy -``` - ---- - -## Phase 1: Seed Module (foundation) - -### New crates in `core/archipelago/Cargo.toml` -```toml -bip39 = "=2.1.0" -bitcoin = { version = "=0.32.5", features = ["rand-std"] } -``` - -### New file: `core/archipelago/src/seed.rs` - -**`MasterSeed` struct** — wraps `Zeroizing<[u8; 64]>`, implements `ZeroizeOnDrop` - -Functions: -- `MasterSeed::generate() -> (Mnemonic, MasterSeed)` — 256-bit entropy, 24 words -- `MasterSeed::from_mnemonic(mnemonic) -> MasterSeed` — for restore -- `MasterSeed::from_mnemonic_words(words: &str) -> Result<(Mnemonic, MasterSeed)>` — parse + validate -- `derive_node_ed25519(&MasterSeed) -> SigningKey` — HKDF with info `archipelago/node/ed25519/v1` -- `derive_identity_ed25519(&MasterSeed, index: u32) -> SigningKey` — HKDF with info `archipelago/identity/{index}/ed25519/v1` -- `derive_nostr_identity_key(&MasterSeed, index: u32) -> nostr_sdk::Keys` — BIP-32 `m/44'/1237'/0'/0/{index}` -- `derive_node_nostr_key(&MasterSeed) -> nostr_sdk::Keys` — HKDF with info `archipelago/nostr-node/secp256k1/v1` -- `derive_bitcoin_xprv(&MasterSeed) -> Xpriv` — BIP-32 `m/84'/0'/0'` -- `derive_lnd_entropy(&MasterSeed) -> [u8; 16]` — HKDF with info `archipelago/lnd/entropy/v1` -- `save_seed_encrypted(data_dir, mnemonic, passphrase)` — Argon2+ChaCha20 to `master_seed.enc` -- `load_seed_encrypted(data_dir, passphrase) -> Mnemonic` -- `seed_exists(data_dir) -> bool` -- `save_identity_index(data_dir, next_index: u32)` / `load_identity_index(data_dir) -> u32` - -Security: Never log seed/mnemonic. All seed types implement `ZeroizeOnDrop`. File permissions 0o600. - -Existing building blocks to reuse: -- `mesh/crypto.rs:hkdf_sha256()` / `hkdf_sha256_32()` — already implemented -- `backup/identity.rs` encryption pattern — Argon2+ChaCha20 (reuse for `save_seed_encrypted`) -- `ed25519-dalek`, `sha2`, `hmac`, `hkdf`, `zeroize` — all in Cargo.toml already - ---- - -## Phase 2: Onboarding UI - -### New Vue views: - -**`OnboardingSeedGenerate.vue`** — calls `seed.generate`, displays 24 words in grid, "I wrote these down" checkbox - -**`OnboardingSeedVerify.vue`** — picks 4 random word positions, user types them back, calls `seed.verify`, shows DID + npub on success - -**`OnboardingSeedRestore.vue`** — 24 input fields with BIP-39 wordlist autocomplete, calls `seed.restore` - -### New onboarding flow: -``` -Intro -> Options (Fresh / Restore) -> [branch] - -FRESH: SeedGenerate -> SeedVerify -> Identity (name/purpose) -> Done -RESTORE: SeedRestore -> Done -``` - -### Router changes (`neode-ui/src/router/index.ts`): -- Add routes: `onboarding/seed`, `onboarding/seed-verify`, `onboarding/seed-restore` -- Remove: `onboarding/did`, `onboarding/backup`, `onboarding/verify` -- Enable Restore path in `OnboardingOptions.vue` - -### RPC client (`neode-ui/src/api/rpc-client.ts`): -- `generateSeed()`, `verifySeed()`, `restoreSeed()`, `saveSeedEncrypted()`, `seedStatus()` - ---- - -## Phase 3: Backend Integration - -### `identity.rs` — add `NodeIdentity::from_seed(identity_dir, &MasterSeed)` -- Derives Ed25519 node key via `seed::derive_node_ed25519()` -- Writes to `node_key` / `node_key.pub` (same format as today) -- Existing `load_or_create()` unchanged (loads from disk, works for both seed-derived and legacy keys) - -### `identity_manager.rs` — seed-aware `create()` -- When seed available: derive Ed25519 from `derive_identity_ed25519(seed, index)`, Nostr from `derive_nostr_identity_key(seed, index)` -- Increment and persist `identity_index` -- Add `derivation_index: Option` to `IdentityFile` (serde default, backward-compatible) -- When no seed (legacy): fall back to current random generation - -### `server.rs` — startup flow: -``` -seed exists + node_key exists -> Normal seed-backed operation -no seed + node_key exists -> Legacy node, show migration prompt -no seed + no node_key -> Fresh install, await onboarding -seed exists + no node_key -> Re-derive from seed (recovery) -``` -- Add `seed_backed: bool` to `ServerInfo` - -### New RPC endpoints in `api/rpc/seed.rs`: -- `seed.generate` — generates mnemonic, derives & writes node keys, returns words (onboarding only, unauth) -- `seed.verify` — validates user re-entered correct words (onboarding only) -- `seed.restore` — accepts 24 words, derives all keys, writes to disk (onboarding only, unauth) -- `seed.save-encrypted` — encrypts mnemonic to `master_seed.enc` (optional convenience) -- `seed.status` — returns `{ has_seed, is_legacy, identity_count, next_index }` -- `seed.derive-lnd-entropy` — password-protected, returns 16 bytes for LND wallet init -- `seed.derive-bitcoin-xprv` — password-protected, returns xprv for Bitcoin Core import - -In-memory mnemonic between `seed.generate` and `seed.verify`: held in `Mutex>>` with 10-minute auto-clear timeout. - ---- - -## Phase 4: Bitcoin/LND Integration - -### LND wallet from seed: -- `lnd.init-wallet-from-seed` handler — derives 16-byte entropy, calls LND REST `POST /v1/initwallet` with `seed_entropy` -- Triggered during LND first-install flow - -### Bitcoin Core wallet from seed: -- `bitcoin.init-wallet-from-seed` handler — derives BIP-84 xprv, calls `createwallet` + `importdescriptors` via Bitcoin Core RPC -- Triggered during Bitcoin Core first-install flow - -Both endpoints require password re-verification. - ---- - -## Phase 5: Migration & Polish - -### Legacy node migration: -- Detect legacy nodes (node_key exists, no master_seed.enc) -- Settings page shows prompt: "Set up seed phrase to protect future identities" -- Existing keys preserved — only NEW identities use seed derivation -- Optional full migration (`seed.migrate-legacy`) can be added later - -### Cleanup: -- Remove old `OnboardingDid.vue`, `OnboardingBackup.vue`, `OnboardingVerify.vue` -- Update Settings backup section to show seed status -- Update ADR-008 to reflect implementation matches description - ---- - -## File Layout After Implementation - -``` -{data_dir}/identity/ - node_key # 32 bytes Ed25519 secret (derived from seed or legacy) - node_key.pub # 32 bytes Ed25519 public - master_seed.enc # NEW: encrypted mnemonic (optional convenience backup) - identity_index # NEW: next derivation index (plain text integer) -{data_dir}/identities/ - {uuid}.json # Same format + optional derivation_index field -``` - ---- - -## Critical Files to Modify - -| File | Change | -|------|--------| -| `core/archipelago/Cargo.toml` | Add `bip39`, `bitcoin` crates | -| `core/archipelago/src/seed.rs` | **NEW** — all seed logic | -| `core/archipelago/src/identity.rs` | Add `from_seed()` constructor | -| `core/archipelago/src/identity_manager.rs` | Seed-aware `create()`, add `derivation_index` | -| `core/archipelago/src/server.rs` | Startup state detection (seed/legacy/fresh) | -| `core/archipelago/src/api/rpc/seed.rs` | **NEW** — seed RPC handlers | -| `core/archipelago/src/api/rpc/dispatcher.rs` | Register seed.* endpoints | -| `neode-ui/src/views/OnboardingSeedGenerate.vue` | **NEW** — show 24 words | -| `neode-ui/src/views/OnboardingSeedVerify.vue` | **NEW** — verify written words | -| `neode-ui/src/views/OnboardingSeedRestore.vue` | **NEW** — enter 24 words to restore | -| `neode-ui/src/views/OnboardingOptions.vue` | Enable Restore path | -| `neode-ui/src/router/index.ts` | Update onboarding routes | -| `neode-ui/src/api/rpc-client.ts` | Add seed RPC methods | - ---- - -## Verification - -1. **Unit tests**: Deterministic derivation (same mnemonic -> same keys), invalid mnemonic rejection, index increment, zeroization -2. **Integration**: Fresh install flow end-to-end, restore flow (generate on node A, enter words on node B, verify same DID/npub) -3. **Security**: Grep seed.rs for tracing macros that interpolate seed vars, verify file permissions -4. **LND**: Derive entropy, init wallet, verify deterministic aezeed -5. **Bitcoin Core**: Derive xprv, import descriptors, verify addresses match -6. **Legacy**: Existing node without seed starts normally, can still create identities -7. **Type check**: `cd neode-ui && npx vue-tsc -b --noEmit` diff --git a/.claude/rules/backend.md b/.claude/rules/backend.md deleted file mode 100644 index 42649c2e..00000000 --- a/.claude/rules/backend.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -globs: - - "core/**/*.rs" - - "core/**/Cargo.toml" ---- - -# Backend Rules (Archipelago — Rust) - -- Backend binds `127.0.0.1` only — nginx handles external access -- Validate all input before path construction — reject `..`, `/`, null bytes -- Timeouts on all external operations (10s default, 30s for heavy like Bitcoin RPC) -- Use `anyhow::Result` for error propagation, not `.unwrap()` in handlers -- Log with `tracing`, never `println!` or `eprintln!` in production paths -- Container commands through `PodmanClient` (core/container/), never raw `Command::new("podman")` diff --git a/.claude/rules/containers.md b/.claude/rules/containers.md deleted file mode 100644 index 2e0e6339..00000000 --- a/.claude/rules/containers.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -globs: - - "**/container/**" - - "**/manifest*" - - "**/*podman*" - - "**/Containerfile" - - "**/Dockerfile" - - "**/first-boot*" - - "**/container-doctor*" ---- - -# Container Security Rules (Archipelago — Rootless Podman) - -## Rootless Podman Architecture -- Podman runs as `archipelago` user (UID 1000), NOT root — never use `sudo podman` -- UID namespace mapping via subuid: container UID N → host UID (100000 + N) -- Container images stored in `~/.local/share/containers/storage/` (NOT /var/lib/containers) -- Container subnet: `10.89.0.0/16` (rootless), not `10.88.0.0/16` (rootful) -- XDG_RUNTIME_DIR must be `/run/user/1000` — required for podman socket -- `loginctl enable-linger archipelago` required for containers to survive logout - -## Container Security (Non-Negotiable) -- Drop ALL capabilities, add only what's required (`--cap-drop=ALL --cap-add=...`) -- Set `--security-opt=no-new-privileges:true` on all containers -- Use `--read-only` + tmpfs where possible (safe apps: searxng, grafana, filebrowser, electrumx, nostr-rs-relay, ollama, indeedhub) -- Pin image versions — never use `:latest` tag -- Mount secrets as read-only files, never pass as environment variables when possible -- Set memory and CPU limits on all containers -- All containers must have `--restart unless-stopped` - -## Volume Ownership (Critical for Rootless) -- Volume directories must be owned by the MAPPED UID, not the container UID -- Formula: `host_uid = 100000 + container_uid` -- UID 0 (most apps) → `sudo chown -R 100000:100000 /var/lib/archipelago/{app}` -- UID 101 (bitcoin) → `sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin` -- UID 70 (postgres) → `sudo chown -R 100070:100070 /var/lib/archipelago/postgres-*` -- UID 472 (grafana) → `sudo chown -R 100472:100472 /var/lib/archipelago/grafana` -- UID 999 (mariadb) → `sudo chown -R 100999:100999 /var/lib/archipelago/mysql-*` - -## Systemd Service Requirements -- `ProtectHome=no` — podman needs `~/.local/share/containers/` -- `PrivateTmp=no` — podman runtime uses `/tmp/podman-run-1000/` -- `RestrictNamespaces=` must NOT be set — rootless podman creates user namespaces -- `SystemCallFilter=` must NOT be set — rootless podman needs clone/unshare -- UFW `DEFAULT_FORWARD_POLICY="ACCEPT"` — required for LAN access to container ports - -## Network Rules -- Apps needing inter-container DNS: use `--network=archy-net` (bitcoin, lnd, electrumx, mempool, btcpay, fedimint) -- Standalone apps: default bridge network -- Tailscale only: `--network=host` + `NET_ADMIN` + `NET_RAW` + `/dev/net/tun` diff --git a/.claude/rules/frontend.md b/.claude/rules/frontend.md deleted file mode 100644 index 1f538cf1..00000000 --- a/.claude/rules/frontend.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -globs: - - "**/neode-ui/**" - - "**/*.vue" ---- - -# Frontend Rules (Archipelago) - -- Always use `'; - sub_filter_once on; - } - location = /nostr-provider.js { - alias /opt/archipelago/web-ui/nostr-provider.js; - } -} -``` - -### 3. Add to appLauncher.ts EXTERNAL_PROXY_PORT - -In `neode-ui/src/stores/appLauncher.ts`, add the domain-to-port mapping: - -```typescript -const EXTERNAL_PROXY_PORT: Record = { - // ... existing entries - '{DOMAIN}': {PORT}, -} -``` - -### 4. Add to Apps.vue WEB_ONLY_APP_URLS and WEB_ONLY_APPS - -In `neode-ui/src/views/Apps.vue`: -1. Add to `WEB_ONLY_APP_URLS`: `'{app-id}': 'https://{DOMAIN}'` -2. Add to `WEB_ONLY_APPS` with a synthetic `PackageDataEntry`: - - state: `'running'` - - manifest with id, title, version, description - - static-files with icon path - -### 5. Add to dummyApps.ts - -In `neode-ui/src/utils/dummyApps.ts`, add a full `PackageDataEntry` with: -- Long description (for detail page) -- Website URL in manifest -- Icon path - -### 6. Add to AppDetails.vue WEB_ONLY_APP_URLS - -In `neode-ui/src/views/AppDetails.vue`, add to the `WEB_ONLY_APP_URLS` map. - -### 7. Add app icon - -Place icon at `neode-ui/public/assets/img/app-icons/{app-id}.{png|webp|svg}` - -### 8. Deploy - -```bash -# Build frontend -cd neode-ui && npm run build - -# Deploy nginx config -scp image-recipe/configs/nginx-archipelago.conf archipelago@192.168.1.228:/tmp/ -ssh archipelago@192.168.1.228 "sudo cp /tmp/nginx-archipelago.conf /etc/nginx/sites-available/archipelago && sudo nginx -t && sudo systemctl reload nginx" - -# Deploy frontend -rsync -az --delete --exclude aiui --exclude claude-login.html web/dist/neode-ui/ archipelago@192.168.1.228:/opt/archipelago/web-ui/ -``` - -### 9. Verify - -1. Open Archipelago UI -2. Web-only app appears in My Apps (sorted alphabetically before container apps) -3. Click app card -> detail page with title, description, launch button, no container buttons -4. Click Launch -> iframe loads the external website correctly -5. All assets load (no 404s in Network tab) -6. `window.nostr` available in iframe console (NIP-07) - -## Files Modified - -| File | What to add | -|------|-------------| -| `image-recipe/configs/nginx-archipelago.conf` | New server block with proxy | -| `neode-ui/src/stores/appLauncher.ts` | EXTERNAL_PROXY_PORT entry | -| `neode-ui/src/views/Apps.vue` | WEB_ONLY_APP_URLS + WEB_ONLY_APPS entries | -| `neode-ui/src/views/AppDetails.vue` | WEB_ONLY_APP_URLS entry | -| `neode-ui/src/utils/dummyApps.ts` | Full PackageDataEntry for detail page | -| `neode-ui/public/assets/img/app-icons/` | App icon file | diff --git a/.claude/skills/bitcoin-conventions/SKILL.md b/.claude/skills/bitcoin-conventions/SKILL.md deleted file mode 100644 index 695cd9da..00000000 --- a/.claude/skills/bitcoin-conventions/SKILL.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -name: bitcoin-conventions -description: Bitcoin development conventions for Archipelago. Covers sats display (integers, never float), address type detection, Tor/onion endpoint preference, Bitcoin RPC error handling, and Lightning patterns. Use when working with Bitcoin amounts, addresses, RPC calls, Lightning channels, or onion services. ---- - -# Bitcoin Development Conventions - -## Critical Rules - -- **NEVER use floating point for Bitcoin amounts.** Sats are always `u64` (Rust) or `BigInt`/integer (TypeScript). -- **NEVER log private keys, seeds, or mnemonics.** Not even at debug/trace level. -- **Prefer Tor/onion endpoints** for all Bitcoin network services when available. - -## Amount Display - -### Rust -```rust -// Amount is always in sats as u64 -pub fn format_sats(sats: u64) -> String { - if sats >= 100_000_000 { - let btc = sats / 100_000_000; - let remainder = sats % 100_000_000; - if remainder == 0 { - format!("{} BTC", btc) - } else { - format!("{}.{:08} BTC", btc, remainder) - } - } else { - format!("{} sats", sats) - } -} -``` - -### TypeScript -```typescript -// Never: amount * 0.00000001 -// Always: integer arithmetic or BigInt -function formatSats(sats: number): string { - if (sats >= 100_000_000) { - const btc = Math.floor(sats / 100_000_000) - const remainder = sats % 100_000_000 - return remainder === 0 ? `${btc} BTC` : `${btc}.${String(remainder).padStart(8, '0')} BTC` - } - return `${sats.toLocaleString()} sats` -} -``` - -## Address Types - -Detect and display address type: -- `1...` — P2PKH (Legacy) -- `3...` — P2SH (SegWit-compatible) -- `bc1q...` — P2WPKH (Native SegWit) -- `bc1p...` — P2TR (Taproot) - -Always validate addresses before any operation. Use network-appropriate validation (mainnet `bc1`, testnet `tb1`, regtest `bcrt1`). - -## Bitcoin RPC Error Handling - -```rust -match rpc_response.error { - Some(err) => { - // Standard Bitcoin Core RPC error codes - match err.code { - -1 => /* miscellaneous error */, - -5 => /* invalid address or key */, - -6 => /* insufficient funds */, - -25 => /* transaction verification failed */, - -26 => /* transaction rejected by policy */, - -27 => /* transaction already in chain */, - -28 => /* client still warming up */, - _ => /* unknown error */, - } - } - None => { /* success */ } -} -``` - -Always set explicit timeouts on RPC calls (10s default, 30s for heavy operations like `rescanblockchain`). - -## Tor/Onion Preferences - -When configuring Bitcoin services: -1. Check for Tor SOCKS proxy (default: `127.0.0.1:9050`) -2. If available, route Bitcoin P2P and RPC through Tor -3. Prefer `.onion` endpoints for block explorers, electrum servers -4. Set `proxy=127.0.0.1:9050` in `bitcoin.conf` -5. Set `onlynet=onion` for maximum privacy (if full Tor mode) - -## Lightning (LND/CLN) Patterns - -### BOLT11 Invoice handling -- Always validate invoice before displaying to user -- Show: amount, description, expiry, destination pubkey -- Never auto-pay without user confirmation - -### Channel States -Display human-readable channel state: -- `PENDING_OPEN` → "Opening..." -- `OPEN` → "Active" -- `PENDING_CLOSE` / `FORCE_CLOSING` → "Closing..." -- `CLOSED` → "Closed" - -### Macaroon handling -- Never log macaroon contents -- Store with restrictive permissions (0600) -- Use read-only macaroon for queries, admin macaroon only for mutations - -## Container Images for Bitcoin Services - -- **Always pin by SHA256 digest**, never by tag alone -- Example: `docker.io/lnzap/lnd@sha256:abc123...` not `lnzap/lnd:latest` -- Verify image signatures when available (cosign/notary) diff --git a/.claude/skills/build-iso/SKILL.md b/.claude/skills/build-iso/SKILL.md deleted file mode 100644 index c08b07f2..00000000 --- a/.claude/skills/build-iso/SKILL.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -name: build-iso -description: Build Archipelago auto-installer ISOs. Custom debootstrap base (no Debian Live dependency), live-boot for squashfs root, hybrid BIOS+UEFI boot, Archipelago branding. Use when user says "build ISO", "build image", "create installer", or needs to work on the ISO build pipeline. -allowed-tools: Bash, Read, Edit, Write, Grep, Glob, Agent ---- - -# Build Archipelago ISO - -## Architecture (dev-iso branch) - -Custom debootstrap-based installer. NO Debian Live ISO download. - -| Component | Source | Size | -|-----------|--------|------| -| Installer squashfs | debootstrap --variant=minbase + live-boot | ~180MB | -| Target rootfs | Docker build (Debian bookworm, full stack) | ~1.5GB compressed | -| Kernel + initramfs | From debootstrap, with live-boot hooks | ~50MB | -| GRUB + ISOLINUX | Built from packages during Step 2 | ~1MB | -| **Total ISO** | **Unbundled** | **~2.2GB** | - -## Build Pipeline (6 Steps) - -**Step 1** (lines ~200-430): Build target rootfs via Docker -- Debian bookworm + all runtime packages (podman, nginx, tor, chromium, etc.) -- `--no-install-recommends` for size reduction -- Strips docs/man/locales -- Output: `archipelago-rootfs.tar` (~1.5GB) - -**Step 2** (lines ~430-710): Build installer environment via debootstrap -- `debootstrap --variant=minbase` inside a container -- Installs live-boot via chroot (NOT --include — minbase can't resolve it) -- Custom initramfs with live-boot hooks -- Builds GRUB EFI image with grub-mkimage -- Creates ISOLINUX files, EFI boot image -- Installs GRUB theme + background -- Output: vmlinuz, initrd.img, filesystem.squashfs, BOOTX64.EFI, efi.img, isolinux.bin - -**Step 3** (lines ~710-850): Add Archipelago components -- Backend binary, web UI, rootfs.tar, scripts, Plymouth theme - -**Step 3b** (lines ~850-1230): Bundle container images (skipped if UNBUNDLED=1) - -**Step 4** (lines ~1230-2380): Generate auto-install.sh -- Embedded installer script (~1100 lines) -- Disk detection, partitioning, LUKS encryption, GRUB install -- Installs GRUB + Plymouth theme on target - -**Step 5** (lines ~2380-2460): Configure boot loaders -- Write GRUB config (boot=live components) -- Write ISOLINUX config -- Both reference kernel at /live/vmlinuz - -**Step 6** (lines ~2460-2540): Create final ISO -- xorriso with hybrid BIOS+UEFI boot -- Uses proven MBR from `branding/isohdpfx.bin` -- `-partition_offset 16` for UEFI compatibility - -## CI Workflow - -**Branch**: `dev-iso` → `.gitea/workflows/build-iso-dev.yml` -**Branch**: `main` → `.gitea/workflows/build-iso.yml` - -Dev CI includes a smoke test step that verifies: -- All critical files present in ISO -- Initrd contains live-boot scripts -- grub.cfg has boot=live -- Fails build before copying to Builds if any check fails - -## Critical Rules - -1. **MBR**: Always use `branding/isohdpfx.bin` (Debian Live MBR, starts with `4552`). The ISOLINUX generic MBR (`33ed`) doesn't boot on all hardware. - -2. **live-boot**: Must be installed via `chroot /installer apt-get install` AFTER debootstrap completes. The `--include` flag silently fails for live-boot. - -3. **Initramfs**: `update-initramfs` needs `/proc`, `/sys`, `/dev` bind-mounted in the chroot. Without them, the initramfs is broken. - -4. **scripts/live is a FILE**: Verify with `[ -e ]` not `[ -d ]`. - -5. **Kernel params**: Must include `boot=live components`. Without `boot=live`, live-boot hooks never activate. - -6. **partition_offset 16**: Required in xorriso for UEFI firmware to recognize the USB. - -7. **Never push during a running CI build**: The gitea-runner kills in-progress builds when a new commit arrives on the same branch. - -## Quick Commands - -```bash -# Build locally (on .228): -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 -cd ~/archy/image-recipe -sudo UNBUNDLED=1 DEV_SERVER=localhost BUILD_FROM_SOURCE=0 ./build-auto-installer-iso.sh - -# Check build status: -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - "ps aux | grep build-auto | grep -v grep" - -# Check latest ISO: -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - "ls -lt /var/lib/archipelago/filebrowser/Builds/archipelago-dev-*.iso | head -3" - -# Verify ISO: -# See /iso-debug skill for the full verification checklist - -# Iterate on branding without rebuilding: -./image-recipe/dev-branding.sh [path-to-iso] -# Or: ./scripts/dev-start.sh → option 0 -``` - -## Key Files - -| File | Role | -|------|------| -| `image-recipe/build-auto-installer-iso.sh` | Main build script (~2600 lines) | -| `image-recipe/build-unbundled-iso.sh` | Wrapper: sets UNBUNDLED=1 | -| `image-recipe/branding/isohdpfx.bin` | Proven MBR (432 bytes) | -| `image-recipe/branding/grub-theme/` | GRUB theme + background | -| `image-recipe/branding/plymouth-theme/` | Plymouth boot splash | -| `scripts/image-versions.sh` | Pinned container image versions | -| `.gitea/workflows/build-iso-dev.yml` | CI for dev-iso branch | -| `image-recipe/test-iso-qemu.sh` | QEMU test script | -| `image-recipe/dev-branding.sh` | Quick branding iteration | diff --git a/.claude/skills/build-iso/references/branding.md b/.claude/skills/build-iso/references/branding.md deleted file mode 100644 index f7c21fe8..00000000 --- a/.claude/skills/build-iso/references/branding.md +++ /dev/null @@ -1,80 +0,0 @@ -# ISO Boot Branding — Archipelago - -Design and build the visual boot experience from USB power-on to web UI. - -## Brand Identity - -**Archipelago** = self-sovereign Bitcoin node OS. Floating islands in the sky. - -| Element | Value | -|---------|-------| -| Primary accent | `#fb923c` (Bitcoin orange) | -| Secondary accent | `#f7931a` (deeper orange) | -| Success | `#4ade80` (green) | -| Background | `#0a0a0a` -> `#050505` (near-black) | -| Text | `#ffffff` (white), `#aaaaaa` (dim), `#555555` (subtle) | -| Glass | `rgba(255,255,255,0.06)` frost overlay | -| Style | Pixel art cyberpunk, dark glass morphism, CRT scanlines | -| Logo | Pixel-art lowercase "a" (from SVG favicon) | - -## Boot Stages & What's Customizable - -### 1. GRUB Menu (UEFI boot) -- **Background**: `branding/grub-theme/background.png` — any PNG, GRUB scales it -- **Theme**: `branding/grub-theme/theme.txt` — colors, layout, labels -- **Fonts**: Generated with `grub-mkfont` during build, .pf2 format -- **Config**: Written by build script in Step 5 (`grub.cfg` heredoc) - -GRUB theme.txt properties that work: -``` -desktop-color: "#rrggbb" -desktop-image: "background.png" -title-text: "" - -+ boot_menu { left/top/width/height = N%; item_color/selected_item_color = "#rrggbb" } -+ label { left/top/width = N%; text = "string"; color = "#rrggbb"; align = "center" } -``` - -**IMPORTANT**: Do NOT reference font names in theme.txt unless you know the exact internal name from grub-mkfont output. - -### 2. ISOLINUX Menu (BIOS boot) -- Text-only ANSI-style `MENU COLOR` directives -- Use `vesamenu.c32` for graphical, `menu.c32` for compatibility - -### 3. Plymouth Splash (kernel boot -> login) -- Theme: `branding/plymouth-theme/archipelago.script` -- Logo: `branding/plymouth-theme/logo.png` (PNG with transparency) -- Config: `branding/plymouth-theme/archipelago.plymouth` -- Kernel param `splash` must be present - -### 4. Console Banner (TTY login) -- ASCII art in `/etc/profile.d/archipelago.sh` -- Uses ANSI escape codes for color - -### 5. Installer Prompt -- In systemd service wrapper: `/usr/local/bin/archipelago-start-installer` - -## Image Specs - -| Asset | Format | Size | Notes | -|-------|--------|------|-------| -| GRUB background | PNG | 1024x768 recommended | Large images slow boot | -| Plymouth logo | PNG (RGBA) | 256x256 recommended | Transparent background | -| GRUB fonts | .pf2 | Generated | `grub-mkfont -s SIZE -o out.pf2 input.ttf` | - -## Build Integration - -GRUB theme: Step 2 (copied from `branding/grub-theme/`, fonts generated with `grub-mkfont`) -Plymouth theme: Step 3 (component copy) + Step 4 (auto-install.sh copies to target) -GRUB on target: auto-install.sh copies to `/mnt/target/boot/grub/themes/archipelago/` - -## What to Edit - -| File | Affects | -|------|---------| -| `branding/grub-theme/background.png` | GRUB boot screen image | -| `branding/grub-theme/theme.txt` | GRUB menu colors, layout | -| `branding/plymouth-theme/logo.png` | Plymouth boot logo | -| `branding/plymouth-theme/archipelago.script` | Plymouth animation/progress | -| `branding/generate-grub-background.py` | Procedural background generator | -| `branding/generate-plymouth-logo.py` | Procedural logo generator | diff --git a/.claude/skills/check-server/SKILL.md b/.claude/skills/check-server/SKILL.md deleted file mode 100644 index 580ccb54..00000000 --- a/.claude/skills/check-server/SKILL.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -name: check-server -description: Quick health check of the live Archipelago server -allowed-tools: Bash ---- - -Quick health check of the live server. SSH into `archipelago@192.168.1.228` (password: `EwPDR8q45l0Upx@`) and run: - -1. `systemctl is-active archipelago nginx` — are services running? -2. `sudo podman ps --format '{{.Names}} {{.Status}}'` — what containers are up? -3. `curl -s http://127.0.0.1:5678/health` — is the backend responding? -4. `sudo journalctl -u archipelago -n 10 --no-pager` — any recent errors? - -Report a brief one-paragraph status summary. diff --git a/.claude/skills/deploy-both/SKILL.md b/.claude/skills/deploy-both/SKILL.md deleted file mode 100644 index f1705fee..00000000 --- a/.claude/skills/deploy-both/SKILL.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: deploy-both -description: Deploy all changes to both Archipelago servers -disable-model-invocation: true -allowed-tools: Bash, Read ---- - -Deploy all changes to BOTH servers (primary: 192.168.1.228, secondary: 192.168.1.198). - -## Steps - -1. Run: - ```bash - ./scripts/deploy-to-target.sh --both - ``` - -2. This builds on the primary server first, then copies built artifacts to the secondary. - -3. Verify both servers respond: - ```bash - ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 'systemctl is-active archipelago' - ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.198 'systemctl is-active archipelago' - ``` diff --git a/.claude/skills/deploy/SKILL.md b/.claude/skills/deploy/SKILL.md deleted file mode 100644 index e0aeb65f..00000000 --- a/.claude/skills/deploy/SKILL.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -name: deploy -description: Deploy all changes to the live Archipelago server -disable-model-invocation: true -allowed-tools: Bash, Read ---- - -Deploy all changes to the live server (192.168.1.228). - -## Steps - -1. Run the deploy script from the project root: - ```bash - ./scripts/deploy-to-target.sh --live - ``` - -2. This syncs frontend and backend code, builds the Rust backend **on the server** (never locally on macOS), deploys frontend to `/opt/archipelago/web-ui/`, deploys backend binary to `/usr/local/bin/archipelago`, and restarts systemd + nginx. - -3. After deploy completes, verify the server is healthy: - ```bash - ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 'systemctl is-active archipelago nginx && sudo journalctl -u archipelago -n 10 --no-pager' - ``` - -4. Report whether the deploy succeeded and if any errors appeared in the logs. diff --git a/.claude/skills/design-pixel-retro/SKILL.md b/.claude/skills/design-pixel-retro/SKILL.md deleted file mode 100644 index a8e83036..00000000 --- a/.claude/skills/design-pixel-retro/SKILL.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -name: design-pixel-retro -description: > - Pixel Art Retro design system — ChonkyPixels font, neon glow CTAs, pixel - dot animations, and dark foundation theme. Use when building retro/pixel art - UIs, foundation sites, when user says "pixel art", "retro design", "8-bit - aesthetic", "neon glow buttons", "pixel font", or "retro foundation style". -metadata: - author: dorian - version: 1.0.0 - category: design-system - tags: [pixel-art, retro, 8-bit, neon, dark-theme, foundation] ---- - -# Pixel Art Retro Design System - -Extracted from Archipelago Foundation. Pixel-perfect aesthetics with modern -web technology, neon glow accents, and playful retro energy. - -## Design Identity - -**Name:** Pixel Art Retro -**Mood:** Playful retro, 8-bit nostalgia with modern polish -**Background:** Dark (#0A0A0A) with pixel texture overlays -**Accent:** Bitcoin orange (#F7931A) with radial neon glow - -## Typography - -```css ---font-pixel: 'ChonkyPixels', monospace; /* Display/headings — CRITICAL */ ---font-body: 'Avenir Next', system-ui, sans-serif; ---font-mono: 'Courier New', monospace; -``` - -**Rule:** ChonkyPixels must be loaded with `font-synthesis: none` and -`!important` on headings to prevent browser synthesis of bold/italic. - -## Color Palette - -Same dark base as Glassmorphism, but with neon glow effects: -```css ---bg-primary: #0A0A0A; ---accent: #F7931A; ---accent-glow: radial-gradient(circle, rgba(247,147,26,0.4) 0%, transparent 70%); ---neon-green: #39ff14; ---neon-pink: #ff6ec7; ---neon-blue: #04d9ff; -``` - -## Key Components - -### Neon Glow CTA -```css -.neon-cta { - background: linear-gradient(135deg, #f7931a, #e68a00); - border: 2px solid rgba(247, 147, 26, 0.5); - border-radius: 4px; /* Sharp corners — pixel aesthetic */ - padding: 12px 32px; - font-family: var(--font-pixel); - text-transform: uppercase; - position: relative; -} -.neon-cta::after { - content: ''; - position: absolute; - inset: -8px; - background: var(--accent-glow); - opacity: 0; - transition: opacity 0.3s; - z-index: -1; -} -.neon-cta:hover::after { opacity: 1; } -``` - -### Pixel Dot Animation -```css -@keyframes pixel-dot-bounce { - 0%, 100% { transform: translateY(0); } - 50% { transform: translateY(-4px); } -} -.pixel-dot { animation: pixel-dot-bounce 0.6s steps(2) infinite; } -``` - -### Intro Sequence -```css -.intro-container { animation: intro-container 0.6s ease-out; transform-origin: center; } -.intro-corners { animation: intro-corners 0.5s ease-out 0.35s both; } -.intro-logo { animation: fadeIn 0.5s ease-out 0.7s both; } - -@keyframes intro-container { from { transform: scale(0.97); opacity: 0; } } -@keyframes intro-corners { from { transform: scale(0.8); opacity: 0; } } -``` - -## UI Approach - -- Sharp corners (2-4px radius) — pixel aesthetic, not rounded -- Stepped animations (`steps(N)`) where possible for pixel feel -- Monospace alignment for data displays -- Donation modal: max-width 480px, QR code on white background -- Theme toggle: smooth dark/light with inverted logo filter - -## Modular Architecture - -- Pixel font loaded via `@font-face` with subset for performance -- Glow effects via CSS pseudo-elements (no extra DOM) -- Animation keyframes in global stylesheet -- Component-scoped overrides only diff --git a/.claude/skills/diagnose/SKILL.md b/.claude/skills/diagnose/SKILL.md deleted file mode 100644 index 1aa56313..00000000 --- a/.claude/skills/diagnose/SKILL.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -name: diagnose -description: Run a full diagnostic check on the Archipelago dev server -allowed-tools: Bash ---- - -SSH into the dev server and run a comprehensive diagnostic. Use `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` for all commands. - -## Checks to run - -1. **Services**: `systemctl is-active archipelago nginx` -2. **Backend status**: `sudo systemctl status archipelago --no-pager` -3. **Containers**: `sudo podman ps -a` -4. **Backend logs** (last 50): `sudo journalctl -u archipelago -n 50 --no-pager` -5. **Nginx errors**: `sudo tail -20 /var/log/nginx/error.log` -6. **RPC test**: `curl -s -X POST http://127.0.0.1:5678/rpc/v1 -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1,"method":"echo","params":{}}'` -7. **Tor hostname**: `sudo cat /var/lib/archipelago/tor/hidden_service_archipelago/hostname` -8. **Disk space**: `df -h /` -9. **Memory**: `free -h` - -Report findings clearly and suggest fixes for any issues found. If $ARGUMENTS is provided, focus the diagnosis on that specific area. diff --git a/.claude/skills/frontend-dev/SKILL.md b/.claude/skills/frontend-dev/SKILL.md deleted file mode 100644 index 242a81cc..00000000 --- a/.claude/skills/frontend-dev/SKILL.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -name: frontend-dev -description: Start the local frontend development environment for Archipelago -disable-model-invocation: true -allowed-tools: Bash ---- - -Start the local frontend development environment. - -```bash -cd neode-ui && npm start -``` - -This starts: -- **Mock backend** on port 5959 (simulates the Rust backend API) -- **Vite dev server** on port 8100 - -Access at http://localhost:8100 (password: `password123`) - -The mock backend lets you develop the UI without needing the live server. diff --git a/.claude/skills/gamepad-nav/SKILL.md b/.claude/skills/gamepad-nav/SKILL.md deleted file mode 100644 index c0337633..00000000 --- a/.claude/skills/gamepad-nav/SKILL.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: gamepad-nav -description: Expert-level gamepad/controller navigation for Archipelago's console-style UI. Use when working on D-pad navigation, focus management, spatial navigation, controller support, or 10-foot UI design. ---- - -# Gamepad Navigation Expert - -When working on gamepad/controller navigation in Archipelago, apply these console-derived patterns. - -## Architecture - -**File**: `neode-ui/src/composables/useControllerNav.ts` -**Styles**: `neode-ui/src/style.css` (focus-visible rules) - -The system uses `data-` attributes for navigation zones: -- `data-controller-zone="sidebar"` / `"main"` — navigation zones -- `data-controller-container` — focusable card/group (Enter drills in, Escape exits) -- `data-controller-focusable` — marks element as focusable -- `data-controller-ignore` — excludes from navigation -- `data-controller-install` / `data-controller-launch` — app-specific actions - -## Core Navigation Rules (Xbox/PS5/Switch consensus) - -### D-pad Movement -- **4 directions only** — Up/Down/Left/Right, one element per press -- **Spatial navigation** — find nearest focusable in direction using bounding rect geometry -- **Distance formula**: `euclidean + displacement - alignment` with overlap scoring -- **Tiebreaker for up/down**: prefer leftmost element (visual consistency in grids) - -### Wrapping -- **Linear lists (1D)**: WRAP (last to first, first to last) — sidebar menu, tab bars -- **Grids (2D)**: NO WRAP — stops at edges, prevents disorientation - -### Zone Transitions -- **Right from sidebar** -> first focusable in main content (topmost) -- **Left from main's leftmost** -> sidebar's active tab (`.nav-tab-active`) -- **Focus memory**: remember last-focused element per zone, restore on re-entry - -### Container Navigation -- **Enter/A**: drill into container (focus first inner element) -- **Escape/B**: exit container (focus the container itself) -- **D-pad inside container**: navigate among inner elements spatially -- **D-pad at container edge**: exit and navigate to adjacent container - -### Text Input Handling -- **Up/Down arrows**: EXIT input, navigate to nearest element above/below -- **Left/Right arrows**: stay in input (cursor movement) -- **Enter**: if next focusable is a button, click it directly (submit) -- **Escape**: blur input, navigate out - -### Button Mapping -| Action | Xbox | PlayStation | Switch | Keyboard | -|--------|------|------------|--------|----------| -| Confirm | A | Cross | A | Enter | -| Back | B | Circle | B | Escape | -| Navigate | D-pad | D-pad | D-pad | Arrow keys | - -## Focus Visual Design - -### Console standard (10-foot viewing distance) -- **Minimum 2px** border/outline (1px flickers on interlaced TVs) -- **3:1 contrast ratio** against adjacent colors (WCAG 2.4.7) -- **Smooth transitions**: 150-200ms ease-out -- **GPU compositing**: use `translateZ(0)` on animated elements -- **Never pure white** (#f1f1f1 prevents TV halo effects) - -### Archipelago Focus Patterns -```css -/* Global — subtle outline that follows border-radius */ -*:focus-visible { - outline: 2px solid rgba(251, 146, 60, 0.6); - outline-offset: 2px; -} - -/* Containers — soft glow + slight scale */ -[data-controller-container]:focus-visible { - outline: none; - transform: scale(1.01); - box-shadow: 0 0 0 1.5px rgba(251, 146, 60, 0.5), - 0 0 20px rgba(251, 146, 60, 0.15); -} - -/* Sidebar items — background tint + thin ring */ -.sidebar-nav-item:focus-visible { - outline: none; - background: rgba(251, 146, 60, 0.12); - box-shadow: 0 0 0 1.5px rgba(251, 146, 60, 0.45); -} -``` - -## Gamepad API Integration - -### Polling -- Poll `navigator.getGamepads()` in `requestAnimationFrame` loop (cheap, returns snapshot) -- Apply deadzone: `Math.abs(axis) > 0.2` before registering input -- D-pad repeat: 400ms initial delay, 150ms interval (gamepads don't auto-repeat) - -### Button indices (W3C Standard Mapping) -- 0=A, 1=B, 2=X, 3=Y, 4=LB, 5=RB, 12=DUp, 13=DDown, 14=DLeft, 15=DRight - -## When Investigating Issues - -1. Check `useControllerNav.ts` for the `handleKeyDown` function -2. Check `data-controller-*` attributes in the view's template -3. Verify focusable elements are in the right `data-controller-zone` -4. Test with: arrow keys on keyboard (simulates D-pad) -5. Check `style.css` for `focus-visible` rules - -## Key Sources -- [Xbox Accessibility Guideline 112](https://learn.microsoft.com/en-us/gaming/accessibility/xbox-accessibility-guidelines/112) -- [Microsoft: Gamepad and remote interactions](https://learn.microsoft.com/en-us/windows/apps/design/input/gamepad-and-remote-interactions) -- [W3C CSS Spatial Navigation](https://www.w3.org/TR/css-nav-1/) -- [W3C Gamepad Spec](https://w3c.github.io/gamepad/) -- [Norigin Spatial Navigation (React reference)](https://github.com/NoriginMedia/Norigin-Spatial-Navigation) diff --git a/.claude/skills/harden/SKILL.md b/.claude/skills/harden/SKILL.md deleted file mode 100644 index 6736cece..00000000 --- a/.claude/skills/harden/SKILL.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -name: harden -description: Security hardening review and fixes for Archipelago code and infrastructure -disable-model-invocation: true -allowed-tools: Read, Edit, Write, Glob, Grep, Bash -argument-hint: "[area: backend|frontend|containers|scripts|all]" ---- - -Perform a security hardening pass on $ARGUMENTS (default: all). - -## Backend Hardening (Rust) - -- [ ] No hardcoded credentials — check for Base64-encoded auth strings, passwords in source -- [ ] Secrets use `core/security/secrets_manager.rs` — verify encryption is implemented (not plaintext) -- [ ] All RPC endpoints validate inputs before processing -- [ ] No `unwrap()` on user-supplied data — handle errors gracefully -- [ ] Rate limiting on auth endpoints (login, password change) -- [ ] Session tokens have proper expiry and rotation -- [ ] File permissions: keys at 0o600, dirs at 0o700 -- [ ] Tracing never logs secrets, passwords, keys, or tokens - -## Frontend Hardening (Vue/TypeScript) - -- [ ] No secrets in source (API keys, passwords, tokens) -- [ ] No `eval()` or `innerHTML` with untrusted content -- [ ] XSS prevention — sanitize all user inputs -- [ ] CSRF protection on state-changing requests -- [ ] Credentials use `credentials: 'include'` not localStorage tokens -- [ ] No sensitive data in console.log statements - -## Container Hardening - -- [ ] All manifests: `readonly_root: true` (unless documented exception) -- [ ] All manifests: capabilities dropped, only required ones added -- [ ] All manifests: non-root user (UID > 1000) -- [ ] All manifests: `no-new-privileges: true` -- [ ] All images pinned to specific versions (no `:latest`) -- [ ] Network isolation — no `host` network unless required and documented -- [ ] AppArmor profiles defined and enforced - -## Script Hardening - -- [ ] All scripts use `set -euo pipefail` -- [ ] No hardcoded passwords (use deploy-config.sh or env vars) -- [ ] SSH uses proper key-based auth where possible -- [ ] No `chmod 777` or overly permissive permissions -- [ ] Temp files use `mktemp` not predictable paths - -Report all findings with file paths and line numbers. Fix issues directly where safe to do so. Flag anything that needs discussion. diff --git a/.claude/skills/iso-branding/SKILL.md b/.claude/skills/iso-branding/SKILL.md deleted file mode 100644 index 708432ba..00000000 --- a/.claude/skills/iso-branding/SKILL.md +++ /dev/null @@ -1,146 +0,0 @@ ---- -name: iso-branding -description: Design and implement Archipelago boot visuals — GRUB theme, Plymouth splash, ISOLINUX menu, console banners. Handles pixel-art cyberpunk aesthetic with Bitcoin orange accents. Use when working on boot screen design, splash animations, GRUB backgrounds, or installer UI appearance. -allowed-tools: Bash, Read, Write, Edit, Grep, Glob, Agent ---- - -# ISO Boot Branding — Archipelago - -Design and build the visual boot experience from USB power-on to web UI. - -## Brand Identity - -**Archipelago** = self-sovereign Bitcoin node OS. Floating islands in the sky. - -| Element | Value | -|---------|-------| -| Primary accent | `#fb923c` (Bitcoin orange) | -| Secondary accent | `#f7931a` (deeper orange) | -| Success | `#4ade80` (green) | -| Background | `#0a0a0a` → `#050505` (near-black) | -| Text | `#ffffff` (white), `#aaaaaa` (dim), `#555555` (subtle) | -| Glass | `rgba(255,255,255,0.06)` frost overlay | -| Style | Pixel art cyberpunk, dark glass morphism, CRT scanlines | -| Logo | Pixel-art lowercase "a" (from SVG favicon) | - -## Boot Stages & What's Customizable - -### 1. GRUB Menu (UEFI boot) -- **Background**: `branding/grub-theme/background.png` — any PNG, GRUB scales it -- **Theme**: `branding/grub-theme/theme.txt` — colors, layout, labels -- **Fonts**: Generated with `grub-mkfont` during build, .pf2 format -- **Config**: Written by build script in Step 5 (`grub.cfg` heredoc) - -GRUB theme.txt properties that work: -``` -desktop-color: "#rrggbb" # Fallback if no background -desktop-image: "background.png" # Background image -title-text: "" # Empty = no title - -+ boot_menu { - left/top/width/height = N% - item_color = "#rrggbb" - selected_item_color = "#rrggbb" - item_height = N - item_spacing = N - scrollbar = false -} - -+ label { - left/top/width = N% - text = "string" - color = "#rrggbb" - align = "center" -} -``` - -**IMPORTANT**: Do NOT reference font names in theme.txt unless you know the exact internal name from grub-mkfont output. GRUB falls back to default if a font reference fails, which causes the ENTIRE theme to not load. - -### 2. ISOLINUX Menu (BIOS boot) -- **Config**: Written by build script in Step 5 (`isolinux.cfg` heredoc) -- **Colors**: ANSI-style color codes in `MENU COLOR` directives -- **Title**: `MENU TITLE` string -- Text-only — no background image (use `vesamenu.c32` for graphical, but `menu.c32` is more compatible) - -### 3. Plymouth Splash (kernel boot → login) -- **Theme**: `branding/plymouth-theme/archipelago.script` -- **Logo**: `branding/plymouth-theme/logo.png` (PNG with transparency) -- **Config**: `branding/plymouth-theme/archipelago.plymouth` -- Supports: animated progress bar, logo sprites, LUKS password prompt -- Kernel param `splash` must be present (added to GRUB_CMDLINE_LINUX_DEFAULT) - -Plymouth script language: -```javascript -Window.SetBackgroundTopColor(r, g, b); // 0.0-1.0 -logo = Image("logo.png"); -sprite = Sprite(logo); -sprite.SetX(x); sprite.SetY(y); -Plymouth.SetRefreshFunction(callback); -Plymouth.SetBootProgressFunction(callback); -Plymouth.SetDisplayPasswordFunction(callback); -``` - -### 4. Console Banner (TTY login) -- ASCII art + system info in `/etc/profile.d/archipelago.sh` -- Generated in auto-install.sh (Step 4, the INSTALLER_SCRIPT heredoc) -- Uses ANSI escape codes for color - -### 5. Installer Prompt -- "ARCHIPELAGO BITCOIN NODE OS / Automatic Installer" -- In the systemd service wrapper: `/usr/local/bin/archipelago-start-installer` -- Built inside the debootstrap container in Step 2 - -## Dev Workflow - -### Quick preview (no ISO needed) -```bash -# Edit background, see it instantly: -open image-recipe/branding/grub-theme/background.png - -# Generate procedural background: -python3 image-recipe/branding/generate-grub-background.py /tmp/bg.png && open /tmp/bg.png - -# Generate Plymouth logo: -python3 image-recipe/branding/generate-plymouth-logo.py /tmp/logo.png && open /tmp/logo.png -``` - -### Full boot test (needs base ISO) -```bash -./image-recipe/dev-branding.sh [path-to-iso] -# Or via dev-start.sh option 0 -``` -Extracts ISO → patches branding → repackages → boots QEMU. ~30 seconds. - -### What to edit -| File | Affects | -|------|---------| -| `branding/grub-theme/background.png` | GRUB boot screen image | -| `branding/grub-theme/theme.txt` | GRUB menu colors, layout | -| `branding/plymouth-theme/logo.png` | Plymouth boot logo | -| `branding/plymouth-theme/archipelago.script` | Plymouth animation/progress | -| `branding/generate-grub-background.py` | Procedural background generator | -| `branding/generate-plymouth-logo.py` | Procedural logo generator | - -## Image Specs - -| Asset | Format | Size | Notes | -|-------|--------|------|-------| -| GRUB background | PNG | 1024x768 recommended | GRUB scales any size, but large images slow boot | -| Plymouth logo | PNG (RGBA) | 256x256 recommended | Transparent background | -| GRUB fonts | .pf2 | Generated | `grub-mkfont -s SIZE -o out.pf2 input.ttf` | - -## Build Integration - -GRUB theme is installed in Step 2 (after artifacts placed): -- Static `background.png` copied from `branding/grub-theme/` -- Falls back to Python generator if static file missing -- Fonts generated in debootstrap container with `grub-mkfont` - -Plymouth theme installed in Step 3 (component copy) + Step 4 (auto-install.sh): -- Files copied to `$ARCH_DIR/plymouth-theme/` in ISO -- Auto-install.sh copies to target at `/usr/share/plymouth/themes/archipelago/` -- Sets as default via `plymouth-set-default-theme` - -GRUB theme also installed on TARGET system (not just installer): -- Auto-install.sh copies theme to `/mnt/target/boot/grub/themes/archipelago/` -- Adds `GRUB_THEME=` to `/mnt/target/etc/default/grub` diff --git a/.claude/skills/iso-debug/SKILL.md b/.claude/skills/iso-debug/SKILL.md deleted file mode 100644 index 99426a3b..00000000 --- a/.claude/skills/iso-debug/SKILL.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -name: iso-debug -description: Diagnose and fix Archipelago ISO boot failures. Covers hybrid MBR/GPT, UEFI/BIOS boot chains, live-boot initramfs, GRUB/ISOLINUX configuration, xorriso packaging, and USB boot compatibility. Use when ISO doesn't boot, installer doesn't start, kernel panics, or USB isn't recognized by BIOS/UEFI. -allowed-tools: Bash, Read, Grep, Glob, Agent, Edit ---- - -# ISO Boot Debugging — Archipelago Custom Base - -Systematic diagnosis of ISO boot failures for the Archipelago debootstrap-based installer. - -## Architecture - -The ISO boot chain has 5 stages. Failure at any stage has distinct symptoms: - -| Stage | Component | Symptom if broken | -|-------|-----------|-------------------| -| 1. BIOS/UEFI recognition | Hybrid MBR + GPT | USB not in boot menu at all | -| 2. Bootloader | ISOLINUX (BIOS) or GRUB EFI (UEFI) | Black screen after selecting USB | -| 3. Kernel + initramfs | vmlinuz + initrd.img with live-boot | Kernel panic or initramfs shell | -| 4. Root filesystem | live-boot mounts filesystem.squashfs | "No root device" or blank screen | -| 5. Installer | systemd service + auto-install.sh | Boots to shell but no installer prompt | - -## Stage 1: USB Not Recognized - -**Most common cause**: Wrong MBR code in the ISO hybrid boot sector. - -### Diagnosis -```bash -# Compare first 16 bytes of working vs broken ISO -xxd -l 16 working.iso -xxd -l 16 broken.iso - -# Check for valid boot signature at offset 510 -xxd -s 510 -l 2 broken.iso -# Must show: 55aa -``` - -### Known MBR codes -- `4552` — Debian Live MBR (extracted from Debian Live ISO). **Works on all tested hardware.** -- `33ed` — ISOLINUX package generic isohdpfx.bin. **Does NOT work on some UEFI hardware.** - -### Fix -The project ships the proven MBR at `image-recipe/branding/isohdpfx.bin` (432 bytes, starts with `4552`). -Build script uses it via: `-isohybrid-mbr "$SCRIPT_DIR/branding/isohdpfx.bin"` - -### xorriso flags that matter -- `-isohybrid-mbr ` — Embeds MBR code for USB hybrid boot -- `-isohybrid-gpt-basdat` — Adds GPT partition entry for EFI (REQUIRED for UEFI USB boot) -- `-partition_offset 16` — Reserves space for GPT table (REQUIRED — without this some UEFI firmware won't see the USB) -- `-eltorito-alt-boot -e boot/grub/efi.img -no-emul-boot` — EFI boot catalog entry - -### Balena Etcher -Writes raw ISO to USB — no special formatting. If the ISO boots in QEMU but not on hardware, the MBR code is the issue, not Etcher. - -## Stage 2: Bootloader Failure - -### BIOS path: ISOLINUX -Required files in ISO: `isolinux/isolinux.bin`, `isolinux/ldlinux.c32`, `isolinux/boot.cat` -Config: `isolinux/isolinux.cfg` - -### UEFI path: GRUB -Required files: `EFI/BOOT/BOOTX64.EFI`, `boot/grub/efi.img`, `boot/grub/grub.cfg` -The EFI image is a FAT32 filesystem containing the GRUB binary, built with: -```bash -grub-mkimage -O x86_64-efi -o BOOTX64.EFI -p /boot/grub \ - part_gpt part_msdos fat iso9660 udf normal boot linux search \ - search_fs_uuid search_fs_file search_label configfile echo cat \ - ls test true loopback gfxterm gfxmenu font png all_video video \ - video_bochs video_cirrus efi_gop efi_uga -``` -**Critical**: `all_video`, `efi_gop`, `efi_uga` needed for display on real hardware. - -### Diagnosis -```bash -# Mount ISO and verify files -sudo mount -o loop,ro broken.iso /mnt -ls -la /mnt/isolinux/ -ls -la /mnt/EFI/BOOT/ -cat /mnt/boot/grub/grub.cfg -cat /mnt/isolinux/isolinux.cfg -sudo umount /mnt -``` - -## Stage 3: Kernel / Initramfs - -### live-boot -The initramfs must contain live-boot hooks. Without them, the kernel boots but can't find root. - -**Kernel params required**: `boot=live components` -- `boot=live` — triggers live-boot's initramfs scripts -- `components` — tells live-boot to scan live/ for squashfs files - -### Verify initramfs has live-boot -```bash -TMPDIR=$(mktemp -d) -unmkinitramfs /path/to/initrd.img $TMPDIR -# live-boot installs scripts/live as a FILE (not directory) -ls -la $TMPDIR/scripts/live # or $TMPDIR/main/scripts/live -file $TMPDIR/scripts/live # Should say "ASCII text" -``` - -### Common initramfs failures -1. **live-boot not installed**: debootstrap `--include` can't resolve its deps. Must install via `chroot apt-get` after debootstrap. -2. **Broken initramfs from container build**: `update-initramfs` needs `/proc`, `/sys`, `/dev` mounted in the chroot. -3. **scripts/live is a FILE not directory**: Verification code must use `[ -e ]` not `[ -d ]`. - -## Stage 4: Root Filesystem - -live-boot searches for squashfs files in `live/` on the boot media. -- Mounts boot media (USB/CDROM) at `/run/live/medium` -- Finds `live/filesystem.squashfs` -- Mounts it read-only, creates tmpfs overlay -- pivot_root into the combined root - -### Diagnosis -If you get an initramfs shell prompt `(initramfs)`: -```bash -# Inside initramfs shell: -ls /run/live/medium/ # Is boot media mounted? -ls /run/live/medium/live/ # Is squashfs there? -cat /proc/cmdline # Does it have boot=live? -``` - -## Stage 5: Installer Not Starting - -The installer auto-starts via: -1. Getty auto-login on tty1 (root, no password) -2. systemd service `archipelago-installer.service` -3. Wrapper script searches for boot media at: `/run/live/medium`, `/run/archiso`, `/cdrom` - -### Diagnosis -If you get a shell but no installer prompt: -```bash -systemctl status archipelago-installer.service -cat /usr/local/bin/archipelago-start-installer -ls /run/live/medium/archipelago/auto-install.sh -``` - -## Quick Verification Checklist - -Run against any ISO before flashing: -```bash -ISO=path/to/iso -MNT=$(mktemp -d) -sudo mount -o loop,ro $ISO $MNT - -echo "=== MBR ===" && xxd -l 4 $ISO -echo "=== Boot sig ===" && xxd -s 510 -l 2 $ISO -echo "=== Files ===" && for f in live/vmlinuz live/initrd.img live/filesystem.squashfs isolinux/isolinux.bin EFI/BOOT/BOOTX64.EFI boot/grub/grub.cfg archipelago/auto-install.sh; do [ -e $MNT/$f ] && echo "OK: $f" || echo "MISSING: $f"; done -echo "=== Kernel params ===" && grep "boot=live" $MNT/boot/grub/grub.cfg && echo OK || echo MISSING -echo "=== live-boot ===" && INITRD=$(mktemp -d) && unmkinitramfs $MNT/live/initrd.img $INITRD 2>/dev/null && ([ -e $INITRD/scripts/live ] && echo "OK" || echo "MISSING") - -sudo umount $MNT -``` - -## Key Files - -| File | Purpose | -|------|---------| -| `image-recipe/build-auto-installer-iso.sh` | Main build script (~2600 lines) | -| `image-recipe/branding/isohdpfx.bin` | Proven MBR code (432 bytes) | -| `image-recipe/branding/grub-theme/` | GRUB theme (theme.txt + background.png) | -| `image-recipe/branding/plymouth-theme/` | Plymouth boot splash | -| `.gitea/workflows/build-iso-dev.yml` | CI workflow with smoke test | -| `image-recipe/test-iso-qemu.sh` | QEMU testing script | -| `image-recipe/dev-branding.sh` | Quick branding iteration (patch + repackage) | - -## Infrastructure - -| What | Where | -|------|-------| -| CI runner | gitea-runner.service on 192.168.1.228 | -| ISO builds | FileBrowser at http://192.168.1.228:8083 → Builds/ | -| Dev branch | dev-iso (separate CI: build-iso-dev.yml) | -| Main branch | main (CI: build-iso.yml) — DO NOT break | diff --git a/.claude/skills/iso-debug/references/boot-chain-reference.md b/.claude/skills/iso-debug/references/boot-chain-reference.md deleted file mode 100644 index 93b86118..00000000 --- a/.claude/skills/iso-debug/references/boot-chain-reference.md +++ /dev/null @@ -1,383 +0,0 @@ -# Custom Debian ISO Boot Chain — Technical Reference - -Expert reference for building and debugging custom bootable Debian-based ISOs. -Covers hybrid MBR/GPT, live-boot, debootstrap, GRUB, ISOLINUX, Plymouth, and xorriso. - ---- - -## 1. Hybrid MBR/GPT for USB Boot - -### What is isohdpfx.bin? -The first 432 bytes of a hybrid-bootable ISO. Contains the Master Boot Record code that BIOS firmware executes when booting from USB. Different sources produce different MBR code: - -| Source | First bytes | Compatibility | -|--------|-------------|---------------| -| Debian Live ISO (`dd if=debian-live.iso bs=1 count=432`) | `45 52` | Best — works on all tested hardware | -| `/usr/lib/ISOLINUX/isohdpfx.bin` | `33 ed` | Generic — fails on some UEFI hardware | -| Manually built with `isohybrid` | Varies | Unpredictable | - -**Rule**: Always extract MBR from a known-working ISO. Never rely on the generic ISOLINUX one. - -### CRITICAL: Embedded vs Appended EFI — Real Hardware Impact - -Two approaches for EFI boot in xorriso. They produce DIFFERENT hybrid structures: - -| Approach | xorriso flag | cyl-align | CHS geometry | Real hardware | -|----------|-------------|-----------|--------------|---------------| -| **Embedded** | `-e boot/grub/efi.img` | `cyl-align-on` | Non-zero (e.g. 244/32) | **WORKS** | -| **Appended** | `-append_partition 2 ... -e --interval:appended_partition_2:all::` | `cyl-align-off` | `0/0` | **FAILS** | - -The Will Haley guide recommends appended, but on our Dell hardware only embedded works. -Use `xorriso -indev image.iso -report_system_area plain` to check which mode an ISO uses. - -### Common gotcha: installer minbase missing sudo -debootstrap --variant=minbase does NOT include sudo. If the installer runs as root -(via auto-login), do NOT use sudo in scripts. `bash: sudo: command not found` is the symptom. - -### xorriso flags for hybrid boot -```bash -xorriso -as mkisofs -o output.iso \ - -isohybrid-mbr isohdpfx.bin \ # Embeds MBR for BIOS USB boot - -c isolinux/boot.cat \ # El Torito boot catalog - -b isolinux/isolinux.bin \ # BIOS bootloader - -no-emul-boot -boot-load-size 4 -boot-info-table \ - -eltorito-alt-boot \ # Second boot entry (EFI) - -e boot/grub/efi.img \ # EFI boot image - -no-emul-boot \ - -isohybrid-gpt-basdat \ # Adds GPT partition for EFI - -partition_offset 16 \ # Space for GPT table — REQUIRED for UEFI - /path/to/iso/contents -``` - -**Critical flags**: -- `-isohybrid-gpt-basdat`: Without this, UEFI firmware won't see the EFI partition -- `-partition_offset 16`: Reserves 16 sectors for GPT. Without it, some UEFI firmware ignores the USB entirely -- `-isohybrid-mbr`: Without this, the ISO won't boot from USB at all (only CD-ROM) - -### Balena Etcher -Writes the ISO byte-for-byte to USB — no reformatting, no special partition creation. If the ISO works with `dd`, it works with Etcher. If BIOS doesn't see the USB, the MBR code is wrong, not Etcher. - -### Verifying hybrid structure -```bash -xxd -l 4 image.iso # MBR code (should be 45 52 for Debian Live) -xxd -s 510 -l 2 image.iso # Boot signature (must be 55 aa) -xxd -s 512 -l 8 image.iso # GPT signature at LBA 1 (should be "EFI PART") -file image.iso # Should say "DOS/MBR boot sector" and "bootable" -``` - ---- - -## 2. live-boot Package - -### What it does -Provides initramfs hooks that mount a squashfs file as the root filesystem using overlayfs. This is how every Debian/Ubuntu live ISO works. - -Boot flow: kernel → initramfs → live-boot scripts → find squashfs → mount overlayfs → pivot_root → systemd - -### Package structure -- `live-boot` (~29KB): Main package, boot scripts -- `live-boot-initramfs-tools` (~6KB): Initramfs hooks that get baked into initrd.img - -**Critical**: `scripts/live` is a **FILE**, not a directory. Verification must use `[ -e ]` not `[ -d ]`. - -### Kernel parameters -| Parameter | Required | Effect | -|-----------|----------|--------| -| `boot=live` | YES | Activates live-boot's initramfs hooks | -| `components` | YES | Scans live/ for additional squashfs modules | -| `toram` | No | Copies squashfs to RAM (faster, allows USB removal) | -| `persistence` | No | Enables writable overlay on a partition labeled "persistence" | -| `quiet` | No | Suppresses boot messages | -| `splash` | No | Enables Plymouth splash screen | -| `console=ttyS0,115200` | No | Serial console for QEMU debugging | - -### Where live-boot mounts things -- `/run/live/medium` — The boot media (USB/CDROM) mount point -- `/run/live/rootfs/filesystem.squashfs` — The mounted squashfs -- `/run/live/overlay` — The tmpfs overlay for writes - -### Verifying live-boot in initramfs -```bash -TMPDIR=$(mktemp -d) -unmkinitramfs /path/to/initrd.img $TMPDIR -# Check for live-boot scripts -file $TMPDIR/scripts/live # Should be "ASCII text" -# OR (some initramfs have main/ prefix) -file $TMPDIR/main/scripts/live -``` - -### Common failures -1. **live-boot not in initrd**: Installed in rootfs but initramfs not regenerated after -2. **Missing kernel params**: `boot=live` not in GRUB/ISOLINUX config -3. **Broken initramfs**: Built without /proc /sys /dev mounted in chroot -4. **Wrong verification**: `[ -d scripts/live ]` fails because it's a file - ---- - -## 3. debootstrap for Installer Environments - -### Variants -- `--variant=minbase`: Absolute minimum (~150MB). Only essential + apt. Good for installer squashfs. -- Default (no variant): Full base system (~300MB). More packages, fewer missing deps. - -### --include limitations -debootstrap's minbase resolver is simplified and **cannot resolve complex dependency chains**. Packages like `live-boot` that depend on `initramfs-tools` which depends on many other packages will silently fail or be skipped. - -**Fix**: Install complex packages via `chroot apt-get` after debootstrap completes: -```bash -debootstrap --variant=minbase --include=basic,packages bookworm /installer http://deb.debian.org/debian -# Then: -mount --bind /proc /installer/proc -mount --bind /sys /installer/sys -mount --bind /dev /installer/dev -chroot /installer apt-get update -chroot /installer apt-get install -y live-boot live-boot-initramfs-tools -umount /installer/dev /installer/sys /installer/proc -``` - -### Initramfs generation inside containers -`update-initramfs` REQUIRES `/proc`, `/sys`, `/dev` to be mounted in the chroot. Without them: -- Module detection fails (can't read /proc/modules) -- Device nodes missing (can't detect hardware) -- The resulting initramfs boots but can't load kernel modules - -### Container-in-container considerations -When running debootstrap inside a Podman/Docker container on a CI runner: -- `--privileged` flag needed for chroot to work -- The container runtime may kill the container after debootstrap exits if using `set -e` -- proc/sys/dev mounts inside the debootstrapped chroot work fine with `--privileged` - ---- - -## 4. GRUB Theming - -### theme.txt format -``` -desktop-color: "#0a0a0a" # Fallback background color -desktop-image: "background.png" # Background image (any PNG, GRUB scales) -title-text: "" # Empty = hide title - -+ boot_menu { - left = 25% - top = 40% - width = 50% - height = 30% - item_color = "#aaaaaa" # Normal menu item color - selected_item_color = "#fb923c" # Selected item color - item_height = 36 - item_spacing = 8 - scrollbar = false -} - -+ label { - left = 25% - top = 20% - width = 50% - text = "Some Text" - color = "#f7931a" - align = "center" -} -``` - -**IMPORTANT**: Do NOT specify `font = "Name Size"` in theme elements unless you know the exact internal font name. If GRUB can't find the font, the ENTIRE theme fails to load and you get the ugly default. - -### Font handling -```bash -# Generate .pf2 font file -grub-mkfont -s 16 -o dejavu_16.pf2 /usr/share/fonts/truetype/dejavu/DejaVuSans.ttf - -# In grub.cfg, load fonts BEFORE setting theme: -loadfont /boot/grub/font.pf2 -loadfont /boot/grub/themes/archipelago/dejavu_16.pf2 -set theme=/boot/grub/themes/archipelago/theme.txt -``` - -### Background images -- Any PNG works, GRUB scales to screen resolution -- Smaller images (1024x768) load faster -- Large images (3000x2000+) add seconds to boot and may fail on limited GRUB heap - -### grub-mkimage — essential modules for ISO boot -```bash -grub-mkimage -O x86_64-efi -o BOOTX64.EFI -p /boot/grub \ - part_gpt part_msdos fat iso9660 udf \ # Filesystem access - normal boot linux search search_fs_uuid search_fs_file search_label \ - configfile echo cat ls test true \ # Basic commands - loopback \ # Loop device support - gfxterm gfxmenu font png \ # Graphical display - all_video video video_bochs video_cirrus \ # Video drivers - efi_gop efi_uga # EFI display protocols -``` - -Missing `all_video`/`efi_gop` = black screen on real hardware (works in QEMU). - -### EFI boot image creation -```bash -dd if=/dev/zero of=efi.img bs=1M count=4 -mkfs.vfat efi.img -mmd -i efi.img ::/EFI ::/EFI/BOOT -mcopy -i efi.img BOOTX64.EFI ::/EFI/BOOT/BOOTX64.EFI -``` - ---- - -## 5. Plymouth Boot Splash - -### Theme types -- **script**: Most flexible. Lua-like scripting with sprites, animations, callbacks. -- **two-step**: Simple logo + spinner. Less customizable but easier. -- **fade-in**: Logo fades in. Minimal. - -### Script theme structure -``` -/usr/share/plymouth/themes/mytheme/ - mytheme.plymouth # Theme metadata - mytheme.script # Animation script - logo.png # Logo image (PNG with alpha) -``` - -### mytheme.plymouth -```ini -[Plymouth Theme] -Name=MyTheme -Description=Custom boot splash -ModuleName=script - -[script] -ImageDir=/usr/share/plymouth/themes/mytheme -ScriptFile=/usr/share/plymouth/themes/mytheme/mytheme.script -``` - -### Script language key functions -```javascript -Window.SetBackgroundTopColor(r, g, b); // 0.0-1.0 floats -Window.SetBackgroundBottomColor(r, g, b); -image = Image("logo.png"); -sprite = Sprite(image); -sprite.SetX(x); sprite.SetY(y); sprite.SetOpacity(0.0-1.0); -Plymouth.SetRefreshFunction(fn); // Called every frame -Plymouth.SetBootProgressFunction(fn); // fn(duration, progress) -Plymouth.SetDisplayPasswordFunction(fn); // fn(prompt, bullets) -Plymouth.SetQuitFunction(fn); -screen_w = Window.GetWidth(); -screen_h = Window.GetHeight(); -``` - -### Setting default theme -```bash -plymouth-set-default-theme mytheme -# OR manually: -ln -sf /usr/share/plymouth/themes/mytheme/mytheme.plymouth /etc/alternatives/default.plymouth -``` - -### Kernel params -- `splash` in GRUB_CMDLINE_LINUX_DEFAULT enables Plymouth -- `quiet` suppresses text that would overlay Plymouth - ---- - -## 6. ISOLINUX/SYSLINUX - -### Required files -| File | Source | Purpose | -|------|--------|---------| -| `isolinux.bin` | `/usr/lib/ISOLINUX/isolinux.bin` | BIOS bootloader | -| `ldlinux.c32` | `/usr/lib/syslinux/modules/bios/ldlinux.c32` | Core library (REQUIRED) | -| `menu.c32` | `/usr/lib/syslinux/modules/bios/menu.c32` | Text menu UI | -| `libutil.c32` | `/usr/lib/syslinux/modules/bios/libutil.c32` | Utility library | -| `boot.cat` | Auto-generated by xorriso | El Torito boot catalog | -| `isohdpfx.bin` | Extracted from working ISO | Hybrid MBR code | - -### Configuration (isolinux.cfg) -``` -UI menu.c32 -PROMPT 0 -TIMEOUT 50 # 5 seconds (units of 1/10 second) -DEFAULT install - -MENU TITLE MY INSTALLER -MENU COLOR border 30;44 #40ffffff #00000000 std -MENU COLOR title 1;36;44 #ff00b7ff #00000000 std -MENU COLOR sel 7;37;40 #ffffffff #ff333333 std -MENU COLOR unsel 37;44 #ffaaaaaa #00000000 std - -LABEL install - MENU LABEL Install System - KERNEL /live/vmlinuz - APPEND initrd=/live/initrd.img boot=live components quiet - MENU DEFAULT -``` - -### menu.c32 vs vesamenu.c32 -- `menu.c32`: Text-mode menu. More compatible, no background image. -- `vesamenu.c32`: VESA graphical menu. Supports background PNG, but some hardware/VMs don't support VESA. - ---- - -## 7. Testing Without Real Hardware - -### QEMU UEFI boot -```bash -qemu-system-x86_64 \ - -machine q35 \ - -drive if=pflash,format=raw,readonly=on,file=/path/to/OVMF_CODE.fd \ - -m 4G -smp 2 \ - -boot d -cdrom image.iso \ - -drive if=virtio,format=qcow2,file=test-disk.qcow2 \ - -vga virtio -display default -``` - -### QEMU BIOS boot (sees ISOLINUX) -```bash -qemu-system-x86_64 \ - -machine pc \ - -m 4G -smp 2 \ - -boot d -cdrom image.iso \ - -drive if=virtio,format=qcow2,file=test-disk.qcow2 \ - -vga virtio -display default -``` - -### Serial console capture -Add to QEMU: `-serial file:/tmp/serial.log` -Add to kernel params: `console=ttyS0,115200 console=tty0` - -### ISO structure verification (no boot required) -```bash -MNT=$(mktemp -d) -sudo mount -o loop,ro image.iso $MNT - -# Check all critical files -for f in live/vmlinuz live/initrd.img live/filesystem.squashfs \ - isolinux/isolinux.bin EFI/BOOT/BOOTX64.EFI boot/grub/grub.cfg; do - [ -e $MNT/$f ] && echo "OK: $f" || echo "MISSING: $f" -done - -# Check initramfs for live-boot -INITRD=$(mktemp -d) -unmkinitramfs $MNT/live/initrd.img $INITRD -[ -e $INITRD/scripts/live ] && echo "live-boot: OK" || echo "live-boot: MISSING" - -# Check kernel params -grep "boot=live" $MNT/boot/grub/grub.cfg && echo "params: OK" - -sudo umount $MNT -``` - ---- - -## 8. Security Considerations for Custom ISOs - -### Supply chain -- Pin the Debian mirror URL (don't use redirectors in production) -- Verify package signatures (debootstrap does this by default) -- Pin kernel and GRUB package versions for reproducibility - -### Installer security -- Auto-install.sh runs as root — validate all inputs before path construction -- LUKS key generation must use CSPRNG (`/dev/urandom`, never `/dev/random` which blocks) -- Drop the LUKS key file after writing to crypttab (or store in root-only location with 0400) - -### Boot security -- Secure Boot requires signed GRUB EFI binary (shim-signed package) -- Without Secure Boot, the unsigned BOOTX64.EFI works but users must disable Secure Boot in BIOS -- The MBR code (isohdpfx.bin) is not signed — Secure Boot only validates EFI path diff --git a/.claude/skills/lint/SKILL.md b/.claude/skills/lint/SKILL.md deleted file mode 100644 index 5bf386a2..00000000 --- a/.claude/skills/lint/SKILL.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -name: lint -description: Run all linters and type checks for the Archipelago project -allowed-tools: Bash, Read, Grep -argument-hint: "[backend|frontend|all]" ---- - -Run linters and type-checks for $ARGUMENTS (default: all). - -## Frontend Linting - -```bash -cd neode-ui - -# Type check -npm run type-check 2>&1 - -# Check for any `any` types (should be zero) -grep -rn ': any' src/ --include='*.ts' --include='*.vue' | grep -v node_modules | grep -v '.d.ts' - -# Check for inline Tailwind violations (long class strings) -grep -rn 'class="[^"]\{100,\}"' src/ --include='*.vue' - -# Check for TODO/FIXME -grep -rn 'TODO\|FIXME' src/ --include='*.ts' --include='*.vue' - -# Check for console.log (should be cleaned before production) -grep -rn 'console\.\(log\|warn\|error\)' src/ --include='*.ts' --include='*.vue' | wc -l -``` - -## Backend Linting (on dev server) - -```bash -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'source ~/.cargo/env && cd ~/archy/core && cargo clippy --all-targets --all-features 2>&1 && cargo fmt --all -- --check 2>&1' -``` - -## Script Linting - -```bash -# Check for scripts missing set -e -for f in scripts/*.sh; do - if ! head -5 "$f" | grep -q 'set -e'; then - echo "MISSING set -e: $f" - fi -done - -# Check for hardcoded IPs (should use variables) -grep -rn '192\.168\.1\.' scripts/ --include='*.sh' | grep -v deploy-config -``` - -Report all issues found with severity (critical/warning/info). diff --git a/.claude/skills/mesh/SKILL.md b/.claude/skills/mesh/SKILL.md deleted file mode 100644 index 656f8e1d..00000000 --- a/.claude/skills/mesh/SKILL.md +++ /dev/null @@ -1,155 +0,0 @@ ---- -name: mesh -description: Mesh networking development for Archipelago — protocol, crypto, serial driver, transport abstraction, and LoRa chat. Use when working on mesh radio, Meshcore protocol, LoRa messaging, transport layers, peer discovery, or off-grid communication features. ---- - -# Mesh Networking Skill - -## Architecture - -The mesh subsystem enables offline peer discovery and end-to-end encrypted messaging between Archipelago nodes via Meshcore LoRa radio devices (Heltec V3, T-Beam, RAK WisBlock). - -``` -USB Meshcore Device (115200 baud) - ↕ serial2-tokio -core/archipelago/src/mesh/ -├── mod.rs — MeshService: lifecycle, config, public API -├── types.rs — MeshPeer, MeshMessage, MeshStatus, MeshEvent -├── protocol.rs — Meshcore binary frame protocol (encode/decode) -├── serial.rs — MeshcoreDevice: async serial driver -├── crypto.rs — X25519 ECDH + ChaCha20-Poly1305 encryption -└── listener.rs — Background tokio task: serial reader + dispatcher - ↕ RPC -core/archipelago/src/api/rpc/mesh.rs — 6 endpoints - ↕ HTTP -neode-ui/src/stores/mesh.ts — Pinia store -neode-ui/src/views/Mesh.vue — Two-column chat UI -``` - -## Key Files - -### Backend (Rust) -- `core/archipelago/src/mesh/mod.rs` — MeshService (start/stop/status/peers/messages/send/configure) -- `core/archipelago/src/mesh/types.rs` — All shared types -- `core/archipelago/src/mesh/protocol.rs` — Binary frame format, command builders, response parsers (12 unit tests) -- `core/archipelago/src/mesh/serial.rs` — USB serial driver, handshake, device detection -- `core/archipelago/src/mesh/crypto.rs` — X25519 key agreement + ChaCha20-Poly1305 (7 unit tests) -- `core/archipelago/src/mesh/listener.rs` — Background event loop, auto-reconnect, peer cache -- `core/archipelago/src/api/rpc/mesh.rs` — RPC handlers (mesh.status/peers/messages/send/broadcast/configure) -- `core/archipelago/src/server.rs` — MeshService initialization (non-blocking) -- `core/archipelago/src/identity.rs` — Ed25519 keypair, DID, X25519 derivation - -### Frontend (Vue 3 + TypeScript) -- `neode-ui/src/stores/mesh.ts` — Pinia store with unread tracking -- `neode-ui/src/views/Mesh.vue` — Full chat UI (~1000 lines) -- `neode-ui/src/router/index.ts` — Route: `/dashboard/mesh` - -### Mock Backend -- `neode-ui/mock-backend.js` — Dev mode mesh RPC responses (mesh.status/peers/messages/send/broadcast/configure) - -## Protocol Reference - -### Meshcore Frame Format -- Outbound: `<` (0x3C) + 2-byte LE length + data -- Inbound: `>` (0x3E) + 2-byte LE length + data -- Max LoRa payload: 160 bytes -- Baud: 115200, 8N1 - -### Key Commands -| Byte | Command | Description | -|------|---------|-------------| -| 0x01 | APP_START | Init session with version negotiation | -| 0x02 | SEND_TXT_MSG | Direct message (6-byte pubkey prefix) | -| 0x03 | SEND_CHANNEL_TXT_MSG | Broadcast on channel | -| 0x04 | GET_CONTACTS | Fetch contact list | -| 0x06 | SET_DEVICE_TIME | Sync device clock | -| 0x07 | SEND_SELF_ADVERT | Broadcast identity | -| 0x0A | SYNC_NEXT_MESSAGE | Retrieve queued messages | - -### Identity Wire Format -`ARCHY:2:{ed25519_hex_64}:{x25519_hex_64}` (137 bytes, fits 160) - -### Encryption -- X25519 Diffie-Hellman from Ed25519 keys (RFC 7748 clamping) -- ChaCha20-Poly1305 AEAD with random 12-byte nonce -- Wire: `[nonce 12B] + [ciphertext + tag 16B]` — max 132B plaintext - -## RPC Endpoints - -| Method | Params | Returns | -|--------|--------|---------| -| `mesh.status` | — | MeshStatus | -| `mesh.peers` | — | `{peers, count}` | -| `mesh.messages` | `{limit?}` | `{messages, count}` | -| `mesh.send` | `{contact_id, message}` | `{sent, message_id, encrypted}` | -| `mesh.broadcast` | — | `{broadcast}` | -| `mesh.configure` | `{enabled?, device_path?, channel_name?, broadcast_identity?, advert_name?}` | `{configured}` | - -## Development Workflow - -### Building & Testing (on dev server, NOT macOS) -```bash -# Deploy mesh changes -./scripts/deploy-to-target.sh --live - -# Run mesh unit tests on server -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'cd ~/archy/core && cargo test --all-features -- mesh' - -# Check device is detected -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'ls -la /dev/ttyUSB* /dev/ttyACM* 2>/dev/null' - -# Watch mesh logs -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 \ - 'sudo journalctl -u archipelago -f | grep -i mesh' -``` - -### Frontend Dev (local, mock backend) -```bash -cd neode-ui && npm start -# Mesh mock data at http://localhost:8100/dashboard/mesh -``` - -## Roadmap Phases - -### Phase 1: Core Implementation (COMPLETE) -- Meshcore binary protocol, serial driver, crypto, listener, RPC, Vue UI - -### Phase 2: Mesh as Federation Transport -- NodeTransport trait abstraction (mesh/tor/lan backends) -- Transport priority: Mesh (1) > LAN/mDNS (2) > Tor (3) -- Chunked message protocol for >160B payloads (Reed-Solomon FEC) -- CBOR delta sync instead of full JSON state -- Transport indicator per peer in federation UI -- "Mesh only" off-grid mode -- Dependencies: `ciborium` (CBOR), `reed-solomon-erasure` (FEC), `mdns-sd` (LAN discovery) - -### Phase 3: Encrypted Mesh Messaging -- Double Ratchet (Signal protocol) over LoRa -- X3DH key agreement using existing Ed25519/X25519 -- Store-and-forward relay for offline peers (24h TTL) -- Message types: TEXT, ALERT, INVOICE (bolt11), PSBT_HASH, COORDINATE -- Per-peer chat threads, delivery status, offline indicators - -### Phase 4: Off-Grid Bitcoin Operations -- Compact block headers over mesh (SPV verification) -- Transaction relay via internet-connected mesh peer -- Lightning payment coordination over mesh -- Emergency alert system (signed alerts, GPS, dead man's switch) - -### Phase 5: Mesh Network Intelligence -- Adaptive routing, signal strength mapping, spreading factor adjustment -- Multi-path routing for reliability -- Steganographic modes -- Additional hardware: T-Beam, RAK WisBlock, WiFi mesh (802.11s), BLE, Blockstream Satellite - -## Conventions - -- All crypto uses existing identity infrastructure (Ed25519 signing key → X25519 derivation) -- Mesh init is non-blocking — errors logged but don't crash server -- Config persists to `{data_dir}/mesh-config.json` -- Message buffer: circular, max 100 messages -- Never build Rust on macOS — always deploy to server -- USB device paths: `/dev/ttyUSB*` and `/dev/ttyACM*` -- `archipelago` user must be in `dialout` group for serial access diff --git a/.claude/skills/podman-doctor/SKILL.md b/.claude/skills/podman-doctor/SKILL.md deleted file mode 100644 index 38564b70..00000000 --- a/.claude/skills/podman-doctor/SKILL.md +++ /dev/null @@ -1,275 +0,0 @@ ---- -name: podman-doctor -description: > - Comprehensive Podman container diagnostic for Archipelago. Audits all running containers, - port mappings, network connectivity, health status, restart policies, and config consistency - across all 4 layers (backend Rust, Podman runtime, Nginx proxy, frontend routing). - Handles rootless Podman (user: archipelago, UID 1000, subuid 100000:65536). - Use when asked to "diagnose containers", "check podman", "why is app not working", - "container health check", "port not reachable", "audit containers", "podman status", - or when any container/app is misbehaving. -allowed-tools: Bash Read Glob Grep ---- - -# Podman Doctor — Container Infrastructure Diagnostics - -Systematic diagnostic for Archipelago's **rootless Podman** container stack. Catches port conflicts, network misconfigurations, health failures, missing restart policies, UID mapping issues, and config drift across all layers. - -**SSH command**: `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` - -> **ROOTLESS PODMAN**: Archipelago runs Podman as the `archipelago` user (UID 1000), NOT root. -> Never use `sudo podman` — use plain `podman` after SSH'ing in as the `archipelago` user. -> Container UIDs are mapped via subuid: container UID N → host UID (100000 + N). - -If $ARGUMENTS is provided, focus diagnosis on that specific app/container. Otherwise run full audit. - -## Workflow - -### Step 1: Gather Runtime State - -Run these on the server (as `archipelago` user — NO sudo): - -```bash -# All containers with status, ports, networks -podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}\t{{.Networks}}" - -# Check for port conflicts on known ports -ss -tlnp | grep -E ":(80|443|3000|4080|5678|8080|8081|8082|8083|8085|8096|8123|8173|8174|8175|8240|8332|8333|8334|8888|9735|10009|11434|23000|50001)\b" -``` - -### Step 2: Rootless Podman Health Check - -Rootless Podman has specific requirements that must be verified: - -```bash -# Verify running as archipelago user (NOT root) -whoami # Must be "archipelago" -id # Must show uid=1000(archipelago) - -# Check XDG_RUNTIME_DIR is set (required for rootless podman socket) -echo "XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR" # Must be /run/user/1000 - -# Verify subuid/subgid mapping exists -grep archipelago /etc/subuid # Must show: archipelago:100000:65536 -grep archipelago /etc/subgid # Must show: archipelago:100000:65536 - -# Verify user lingering is enabled (keeps user services after logout) -ls /var/lib/systemd/linger/ | grep archipelago # Must exist - -# Check podman storage is accessible -podman info --format "{{.Store.GraphRoot}}" # ~/.local/share/containers/storage -ls -la ~/.local/share/containers/storage/ 2>/dev/null || echo "ERROR: Storage not accessible" - -# Check podman socket -ls -la /run/user/1000/podman/ 2>/dev/null || echo "WARNING: No podman socket directory" -``` - -### Step 3: Check Restart Policies - -Every container MUST have `--restart unless-stopped`. This is the #1 cause of downtime after reboots. - -```bash -for c in $(podman ps -a --format "{{.Names}}"); do - echo -n "$c: " - podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}" -done -``` - -**Red flag**: `no` or empty = container won't survive reboot. - -### Step 4: Volume Ownership Audit (Rootless UID Mapping) - -Rootless Podman maps container UIDs via subuid. Volume directories must be owned by the MAPPED UID, not the container UID. Formula: `host_uid = 100000 + container_uid` - -```bash -echo "=== Volume Ownership Check ===" - -# Default containers (run as root inside = UID 0 → host UID 100000) -for dir in lnd fedimint homeassistant jellyfin vaultwarden photoprism ollama filebrowser electrumx btcpay immich; do - if [ -d "/var/lib/archipelago/$dir" ]; then - owner=$(stat -c '%u:%g' "/var/lib/archipelago/$dir" 2>/dev/null) - if [ "$owner" != "100000:100000" ]; then - echo "WRONG: /var/lib/archipelago/$dir owned by $owner (should be 100000:100000)" - else - echo " OK: $dir → $owner" - fi - fi -done - -# Bitcoin Knots (container UID 101 → host UID 100101) -if [ -d "/var/lib/archipelago/bitcoin" ]; then - owner=$(stat -c '%u:%g' "/var/lib/archipelago/bitcoin") - [ "$owner" != "100101:100101" ] && echo "WRONG: bitcoin owned by $owner (should be 100101:100101)" || echo " OK: bitcoin → $owner" -fi - -# PostgreSQL (container UID 70 → host UID 100070) -for dir in /var/lib/archipelago/*-db /var/lib/archipelago/postgres-*; do - if [ -d "$dir" ]; then - owner=$(stat -c '%u:%g' "$dir") - [ "$owner" != "100070:100070" ] && echo "WRONG: $dir owned by $owner (should be 100070:100070)" || echo " OK: $(basename $dir) → $owner" - fi -done - -# Grafana (container UID 472 → host UID 100472) -if [ -d "/var/lib/archipelago/grafana" ]; then - owner=$(stat -c '%u:%g' "/var/lib/archipelago/grafana") - [ "$owner" != "100472:100472" ] && echo "WRONG: grafana owned by $owner (should be 100472:100472)" || echo " OK: grafana → $owner" -fi - -# MariaDB/MySQL (container UID 999 → host UID 100999) -if [ -d "/var/lib/archipelago/mysql-mempool" ]; then - owner=$(stat -c '%u:%g' "/var/lib/archipelago/mysql-mempool") - [ "$owner" != "100999:100999" ] && echo "WRONG: mysql-mempool owned by $owner (should be 100999:100999)" || echo " OK: mysql-mempool → $owner" -fi -``` - -### Step 5: Verify Port Mapping Consistency - -Cross-reference these 4 layers — mismatches between ANY two cause "app not loading" bugs: - -**Layer 1 — Backend Config (Rust)**: Read `core/archipelago/src/api/rpc/package.rs`, look at `get_app_config()` port mappings. - -**Layer 2 — Podman Runtime**: `podman ps --format "{{.Names}}: {{.Ports}}"` - -**Layer 3 — Nginx Proxy**: Read these for `/app/{id}/` location blocks: -- `image-recipe/configs/nginx-archipelago.conf` (HTTP) -- `image-recipe/configs/snippets/archipelago-https-app-proxies.conf` (HTTPS) - -**Layer 4 — Frontend Routing**: Read `neode-ui/src/stores/appLauncher.ts` — `PORT_TO_APP_ID` map. - -| Symptom | Root Cause | -|---------|-----------| -| App iframe shows 502/504 | Nginx proxies to wrong port, or container not running | -| App loads wrong content | Port collision — two containers on same host port | -| Works on port but not /app/ path | Missing nginx location block | -| Frontend can't find app | PORT_TO_APP_ID missing in appLauncher.ts | - -### Step 6: Network Connectivity Audit - -```bash -# Networks and their containers -podman network ls -podman network inspect archy-net 2>/dev/null || echo "WARNING: archy-net missing!" - -# Check container subnet (rootless uses 10.89.x.x, NOT 10.88.x.x) -podman network inspect archy-net --format "{{range .Subnets}}{{.Subnet}}{{end}}" 2>/dev/null -``` - -**Must be on archy-net**: bitcoin-knots, lnd, electrs/electrumx, mempool, btcpay-server, nbxplorer, fedimint, fedimint-gateway, nostr-rs-relay, indeedhub, ollama, open-webui - -**Must NOT be on archy-net**: grafana, nextcloud, filebrowser, vaultwarden, bitcoin-ui, lnd-ui, tailscale (host network) - -### Step 7: UFW Forward Policy Check - -Rootless Podman requires `DEFAULT_FORWARD_POLICY="ACCEPT"` in UFW, otherwise container ports are unreachable from LAN. - -```bash -grep DEFAULT_FORWARD_POLICY /etc/default/ufw -# Must be "ACCEPT", NOT "DROP" -# If DROP: containers work locally but NOT from other machines on the network -``` - -### Step 8: Systemd Service Sandbox Check - -The `archipelago.service` must have specific settings relaxed for rootless Podman: - -```bash -# Check critical settings -systemctl cat archipelago.service | grep -E "ProtectHome|PrivateTmp|RestrictNamespaces|ReadWritePaths|XDG_RUNTIME_DIR" -``` - -**Required settings for rootless Podman**: -- `ProtectHome=no` — podman stores images in `~/.local/share/containers/` -- `PrivateTmp=no` or disabled — podman runtime uses `/tmp/podman-run-1000/` -- `RestrictNamespaces=` must NOT be set — rootless podman needs user namespaces -- `ReadWritePaths=` must include `/var/lib/archipelago /run/user /tmp` -- `Environment=XDG_RUNTIME_DIR=/run/user/1000` - -### Step 9: Health Check Status - -```bash -# Containers with health checks — are they passing? -for c in $(podman ps --format "{{.Names}}"); do - health=$(podman inspect "$c" --format "{{.State.Health.Status}}" 2>/dev/null) - if [ -n "$health" ] && [ "$health" != "" ]; then - echo "$c: $health" - fi -done - -# Containers WITHOUT health checks (gap in monitoring) -for c in $(podman ps --format "{{.Names}}"); do - hc=$(podman inspect "$c" --format "{{.Config.Healthcheck}}" 2>/dev/null) - if [ "$hc" = "" ] || [ -z "$hc" ]; then - echo "NO HEALTHCHECK: $c" - fi -done -``` - -### Step 10: Resource & Failure Analysis - -```bash -# Resource usage -podman stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" - -# Recent deaths (last 24h) -podman events --filter event=died --since 24h 2>/dev/null | tail -20 - -# OOM kills -podman ps -a --format "{{.Names}}" | while read c; do - oom=$(podman inspect "$c" --format "{{.State.OOMKilled}}" 2>/dev/null) - [ "$oom" = "true" ] && echo "OOM KILLED: $c" -done - -# Non-zero exits -podman ps -a --filter status=exited --format "{{.Names}}\t{{.Status}}" -``` - -### Step 11: Systemd Integration - -```bash -systemctl is-active archipelago nginx -systemctl --user list-units --type=service 2>/dev/null | grep -i podman -systemctl list-timers --all | grep -i -E "podman|container|archipelago" -``` - -### Step 12: Generate Report - -Produce a structured report: - -``` -## Container Diagnostic Report - -### Rootless Podman Status -- User: archipelago (UID 1000) -- Subuid mapping: [OK/MISSING] -- XDG_RUNTIME_DIR: [OK/MISSING] -- User linger: [enabled/disabled] -- UFW forward policy: [ACCEPT/DROP] - -### Summary -- Total containers: X running, Y stopped, Z unhealthy -- Port conflicts: [list or "none"] -- Missing restart policies: [list or "none"] -- Network issues: [list or "none"] -- UID mapping issues: [list or "none"] -- Health check gaps: [list] - -### Critical Issues (fix immediately) -1. ... - -### Warnings (fix soon) -1. ... - -### Recommended Actions -1. ... -``` - -After diagnosis, suggest running `/podman-fix` for any issues found. - -## Port Reference - -See `references/port-map.md` for the canonical port assignment table across all 4 layers. - -## UID Mapping Reference - -See `references/uid-mapping.md` for the complete rootless UID mapping table. diff --git a/.claude/skills/podman-doctor/references/common-failures.md b/.claude/skills/podman-doctor/references/common-failures.md deleted file mode 100644 index 983a58fc..00000000 --- a/.claude/skills/podman-doctor/references/common-failures.md +++ /dev/null @@ -1,102 +0,0 @@ -# Common Podman Failure Patterns - -## Rootless Podman Specific Failures - -| Error | Cause | Fix | -|-------|-------|-----| -| `ERRO[0000] cannot find UID/GID for user` | subuid/subgid not configured | Add `archipelago:100000:65536` to `/etc/subuid` and `/etc/subgid` | -| `Error: unshare: operation not permitted` | Systemd `RestrictNamespaces` blocks user namespaces | Remove `RestrictNamespaces=` from `archipelago.service` | -| `Error: could not get runtime: creating runtime` | XDG_RUNTIME_DIR not set or /run/user/1000 missing | Set `Environment=XDG_RUNTIME_DIR=/run/user/1000` in service, ensure `loginctl enable-linger archipelago` | -| `permission denied` on volume mount | Wrong UID ownership — must use mapped UIDs | `sudo chown -R 100000:100000 /var/lib/archipelago/APP` (see UID mapping table) | -| `ERRO[0000] rootless containers not supported` | Podman not configured for rootless | Run `podman system migrate`, check `/etc/subuid` | -| `Error: creating container storage: layer not known` | Corrupted rootless storage | `podman system reset` (destroys all containers — last resort) | -| `Error: stat /tmp/podman-run-1000/...: no such file` | PrivateTmp=yes in systemd isolates /tmp | Set `PrivateTmp=no` in `archipelago.service` | -| Container ports unreachable from LAN | UFW DEFAULT_FORWARD_POLICY="DROP" | Change to "ACCEPT" in `/etc/default/ufw`, then `sudo ufw reload` | -| `Error: error creating network namespace` | Systemd `SystemCallFilter` blocks clone/unshare | Remove `SystemCallFilter=` from `archipelago.service` | -| Containers lose network after service restart | podman runtime dir in /tmp cleaned | Ensure `PrivateTmp=no` so /tmp/podman-run-1000/ persists | - -## Container Won't Start - -| Error | Cause | Fix | -|-------|-------|-----| -| `exec format error` | Binary built on wrong arch | Rebuild on the Linux server | -| `address already in use` | Port conflict | `ss -tlnp \| grep :PORT` to find offender | -| `permission denied` | Missing capability, wrong UID ownership, or read-only root | Check capabilities, check volume ownership with mapped UID, add tmpfs | -| `OCI runtime error` | Corrupt container state | `podman rm -f NAME && recreate` | -| `image not known` | Image not pulled | `podman pull IMAGE:TAG` | -| `no such network` | Network missing | `podman network create archy-net` | -| `Error: netavark: ...subnet overlap` | Network CIDR conflict | `podman network rm archy-net && podman network create archy-net` | - -## Container Starts But App Unreachable - -| Symptom | Check Layer | Fix | -|---------|------------|-----| -| Direct port works, /app/ doesn't | Nginx config | Add `/app/{id}/` location block | -| Neither works | Podman ports | `podman port NAME` — verify mapping exists | -| Port mapped but refused | Container logs | App crashing internally — check logs | -| Works sometimes | Resources | Check OOM kills, CPU, disk space | -| 502 Bad Gateway | Nginx→Container | Wrong port in proxy_pass or container restarted | -| Works locally but not from LAN | UFW forward policy | Set `DEFAULT_FORWARD_POLICY="ACCEPT"` in `/etc/default/ufw` | - -## Container Keeps Dying - -| Pattern | Cause | Fix | -|---------|-------|-----| -| Exits immediately (code 1) | Config error | Check `podman logs NAME` | -| Dies after minutes | OOM killed | Increase `--memory` limit | -| Dies when dep restarts | No restart policy | Add `--restart unless-stopped` | -| Crash loop | Repeated crash | Fix root cause, don't just restart | -| Exit code 127 | Missing binary in container | Wrong image tag or corrupted image — re-pull | -| Exit code 137 | Killed by OOM or signal | Check `dmesg` for OOM kill, check `podman inspect` for OOMKilled | - -## Network Issues - -| Problem | Cause | Fix | -|---------|-------|-----| -| Can't resolve container names | Not on archy-net | Recreate with `--network=archy-net` | -| Can't reach internet | DNS missing | Add `--dns 1.1.1.1` | -| Container-to-container timeout | Different networks | Put both on same network | -| Bitcoin RPC refused from container | rpcallowip wrong subnet | Use `rpcallowip=0.0.0.0/0` (safe: port mapped, not exposed) | -| Old containers can't find new network | Subnet changed (rootful→rootless) | Recreate containers on new archy-net (rootless uses 10.89.x.x) | - -## Volume Permission Patterns (Rootless UID Mapping) - -Formula: **host_uid = 100000 + container_uid** - -| Container UID | Host UID | Apps | Data Directory | -|---|---|---|---| -| 0 (root) | 100000 | lnd, fedimint, homeassistant, jellyfin, vaultwarden, photoprism, ollama, filebrowser, electrumx, btcpay, immich | `/var/lib/archipelago/{app}` | -| 70 | 100070 | postgres (btcpay-db, immich-db, penpot-postgres) | `/var/lib/archipelago/postgres-*` | -| 101 | 100101 | bitcoin-knots | `/var/lib/archipelago/bitcoin` | -| 472 | 100472 | grafana | `/var/lib/archipelago/grafana` | -| 999 | 100999 | MariaDB (mysql-mempool) | `/var/lib/archipelago/mysql-mempool` | - -## Capability Reference - -| Capability | Apps That Need It | Failure Mode | -|-----------|------------------|-------------| -| CHOWN | nextcloud, homeassistant, btcpay, jellyfin, portainer | Can't chown during setup | -| SETUID/SETGID | nextcloud, homeassistant, btcpay, jellyfin | Can't switch to service user | -| DAC_OVERRIDE | nextcloud, homeassistant, btcpay | Can't access cross-UID files | -| FOWNER | bitcoin-knots, lnd, fedimint | Can't modify data dir perms | -| NET_BIND_SERVICE | nginx-proxy-manager, vaultwarden | Can't bind ports <1024 | -| NET_ADMIN + NET_RAW | tailscale | Can't create TUN device or manage routes | - -## Read-Only Safe Apps - -Only these apps can run with `--read-only` + tmpfs: searxng, grafana, filebrowser, electrumx, mempool-electrs, electrs, nostr-rs-relay, ollama, indeedhub - -All others need writable root or will fail silently. - -## Systemd Sandbox Requirements for Rootless Podman - -These systemd service settings MUST be configured for rootless Podman to work: - -| Setting | Required Value | Why | -|---------|---------------|-----| -| `ProtectHome=` | `no` | Podman stores images in `~/.local/share/containers/` | -| `PrivateTmp=` | `no` | Podman runtime lives in `/tmp/podman-run-1000/` | -| `RestrictNamespaces=` | NOT SET | Rootless podman creates user namespaces | -| `SystemCallFilter=` | NOT SET | Rootless podman needs clone/unshare syscalls | -| `ReadWritePaths=` | Include `/var/lib/archipelago /run/user /tmp /etc/containers /var/lib/containers /run/containers` | Volume data + podman runtime paths | -| `Environment=` | `XDG_RUNTIME_DIR=/run/user/1000` | Podman socket location | diff --git a/.claude/skills/podman-doctor/references/port-map.md b/.claude/skills/podman-doctor/references/port-map.md deleted file mode 100644 index ad960883..00000000 --- a/.claude/skills/podman-doctor/references/port-map.md +++ /dev/null @@ -1,71 +0,0 @@ -# Archipelago Canonical Port Map - -All port assignments across the 4 configuration layers. When adding or debugging an app, every row must be consistent across all columns. - -## Bitcoin Stack - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| bitcoin-knots | 8332, 8333 | 8332, 8333 | archy-net | /app/bitcoin-knots/ | 8332→bitcoin-knots | -| bitcoin-ui | 8334 | 80 | bridge | /app/bitcoin-ui/ | 8334→bitcoin-knots | -| electrs | 50001 | 50001 | archy-net | /app/electrs/ | 50001→electrs | -| lnd | 9735, 10009, 8080 | 9735, 10009, 8080 | archy-net | /app/lnd/ | 10009→lnd | -| lnd-ui (RTL) | 8081 | 80 | bridge | /app/lnd-ui/ | 8081→lnd | - -## Lightning & Payment - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| btcpay-server | 23000 | 49392 | archy-net | /app/btcpay/ | 23000→btcpay-server | -| nbxplorer | 24444 | 32838 | archy-net | N/A (internal) | N/A | -| fedimint | 8173, 8174, 8175 | 8173, 8174, 8175 | archy-net | /app/fedimint/ | 8174→fedimint | -| fedimint-gateway | 8175 | 8175 | archy-net | /app/fedimint-gateway/ | 8175→fedimint-gateway | - -## Explorer & Monitoring - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| mempool | 4080 | 8080 | archy-net | /app/mempool/ | 4080→mempool | -| grafana | 3000 | 3000 | bridge | /app/grafana/ | 3000→grafana (new tab) | - -## Self-Hosted Apps - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| nextcloud | 8085 | 80 | bridge | /app/nextcloud/ | 8085→nextcloud | -| vaultwarden | 8082 | 80 | bridge | /app/vaultwarden/ | 8082→vaultwarden (new tab) | -| filebrowser | 8083 | 80 | bridge | /app/filebrowser/ | 8083→filebrowser | -| searxng | 8888 | 8080 | bridge | /app/searxng/ | 8888→searxng | -| photoprism | 2342 | 2342 | bridge | /app/photoprism/ | 2342→photoprism (new tab) | -| jellyfin | 8096 | 8096 | bridge | /app/jellyfin/ | 8096→jellyfin | -| homeassistant | 8123 | 8123 | bridge | /app/homeassistant/ | 8123→homeassistant (new tab) | -| ollama | 11434 | 11434 | archy-net | /app/ollama/ | 11434→ollama | -| open-webui | 3080 | 8080 | archy-net | /app/open-webui/ | 3080→open-webui | - -## Nostr & Social - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| nostr-rs-relay | 7000 | 8080 | archy-net | /app/nostr-rs-relay/ | 7000→nostr-rs-relay | -| indeedhub | 3001 | 3000 | archy-net | /app/indeedhub/ | 3001→indeedhub | - -## System - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| tailscale | 8240 | 8240 | host | /app/tailscale/ | N/A | -| nginx-proxy-manager | 81, 8443 | 81, 443 | bridge | N/A | 81→nginx-proxy-manager | - -## Multi-Container Stacks - -**Immich**: immich-server (2283), immich-postgres (internal 5432), immich-redis (internal 6379) — all on immich-net -**Penpot**: penpot-frontend (9001→80), penpot-backend, penpot-exporter, penpot-postgres, penpot-mailcatch — all on penpot-net -**Mempool**: mempool (4080→8080), mempool-db (internal 3306) — on archy-net -**BTCPay**: btcpay-server (23000→49392), nbxplorer (24444→32838), btcpay-postgres (internal 5432) — on archy-net - -## Key Notes - -- **archy-net apps** resolve each other by container name (e.g., `bitcoin-knots:8332`) -- **bridge apps** are standalone — access services via host IP/port -- **host network** (tailscale only) — shares host namespace, no port mapping -- **New tab apps**: btcpay (23000), grafana (3000), vaultwarden (8082), photoprism (2342), homeassistant (8123) — X-Frame-Options blocks iframe diff --git a/.claude/skills/podman-doctor/references/uid-mapping.md b/.claude/skills/podman-doctor/references/uid-mapping.md deleted file mode 100644 index a8338720..00000000 --- a/.claude/skills/podman-doctor/references/uid-mapping.md +++ /dev/null @@ -1,93 +0,0 @@ -# Rootless Podman UID Mapping Reference - -## How Rootless UID Mapping Works - -When Podman runs as the `archipelago` user (UID 1000), container processes don't run as their "apparent" UID on the host. Instead, Linux user namespaces remap UIDs. - -**Mapping formula**: `host_uid = 100000 + container_uid` - -This is configured in `/etc/subuid` and `/etc/subgid`: -``` -archipelago:100000:65536 -``` - -This means: -- Container UID 0 (root inside container) → Host UID 100000 (unprivileged on host) -- Container UID 70 (postgres) → Host UID 100070 -- Container UID 101 (bitcoin) → Host UID 100101 -- etc. - -## Why This Matters - -Volume directories (bind mounts) on the host must be owned by the **mapped** UID, not the container UID. If Bitcoin runs as UID 101 inside its container, the host directory must be owned by UID 100101. - -If ownership is wrong, the container gets `permission denied` when trying to read/write its data. - -## Complete UID Mapping Table - -| Container UID | Host UID | Containers | Fix Command | -|---|---|---|---| -| 0 (root) | 100000 | lnd, fedimint, fedimint-gateway, homeassistant, jellyfin, vaultwarden, photoprism, ollama, filebrowser, electrumx, btcpay-server, nbxplorer, immich, nostr-rs-relay, strfry, nextcloud, searxng, onlyoffice, tailscale, uptime-kuma | `sudo chown -R 100000:100000 /var/lib/archipelago/{app}` | -| 70 | 100070 | postgres (btcpay-db, immich-db, penpot-postgres) | `sudo chown -R 100070:100070 /var/lib/archipelago/postgres-*` | -| 101 | 100101 | bitcoin-knots, bitcoin-core | `sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin` | -| 472 | 100472 | grafana | `sudo chown -R 100472:100472 /var/lib/archipelago/grafana` | -| 999 | 100999 | MariaDB (mysql-mempool) | `sudo chown -R 100999:100999 /var/lib/archipelago/mysql-mempool` | - -## How to Find a Container's UID - -If you encounter a new container with permission issues: - -```bash -# Check what user the container runs as -podman inspect CONTAINER_NAME --format "{{.Config.User}}" - -# If empty, it runs as root (UID 0) → host UID 100000 - -# If it shows a username, find the UID inside the image -podman run --rm IMAGE_NAME id - -# Then calculate: host_uid = 100000 + container_uid -``` - -## Fix Script - -Run this after any fresh install, migration, or when containers have permission errors: - -```bash -#!/bin/bash -# Fix all rootless podman volume ownership - -# UID 0 → 100000 (most containers) -for dir in lnd fedimint fedimint-gateway homeassistant jellyfin vaultwarden photoprism \ - ollama filebrowser electrumx btcpay nbxplorer immich nostr-rs-relay nextcloud \ - searxng onlyoffice uptime-kuma; do - [ -d "/var/lib/archipelago/$dir" ] && sudo chown -R 100000:100000 "/var/lib/archipelago/$dir" -done - -# UID 101 → 100101 (Bitcoin) -[ -d "/var/lib/archipelago/bitcoin" ] && sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin - -# UID 70 → 100070 (PostgreSQL) -for dir in /var/lib/archipelago/postgres-* /var/lib/archipelago/btcpay-db /var/lib/archipelago/immich-db; do - [ -d "$dir" ] && sudo chown -R 100070:100070 "$dir" -done - -# UID 999 → 100999 (MariaDB) -[ -d "/var/lib/archipelago/mysql-mempool" ] && sudo chown -R 100999:100999 /var/lib/archipelago/mysql-mempool - -# UID 472 → 100472 (Grafana) -[ -d "/var/lib/archipelago/grafana" ] && sudo chown -R 100472:100472 /var/lib/archipelago/grafana -``` - -## Rootful vs Rootless Comparison - -| Aspect | Rootful (old) | Rootless (current) | -|--------|---------------|-------------------| -| Podman command | `sudo podman` | `podman` (as archipelago user) | -| Container storage | `/var/lib/containers/storage` | `~/.local/share/containers/storage` | -| Container subnet | `10.88.0.0/16` | `10.89.0.0/16` | -| Volume ownership | Container UID directly | Mapped UID (100000 + container_uid) | -| Requires root? | Yes | No (except fixing volume ownership) | -| XDG_RUNTIME_DIR | Not needed | Required: `/run/user/1000` | -| User lingering | Not needed | Required: `loginctl enable-linger` | -| Systemd restrictions | All can be enabled | Must disable: RestrictNamespaces, SystemCallFilter | diff --git a/.claude/skills/podman-fix/SKILL.md b/.claude/skills/podman-fix/SKILL.md deleted file mode 100644 index 15a4a789..00000000 --- a/.claude/skills/podman-fix/SKILL.md +++ /dev/null @@ -1,338 +0,0 @@ ---- -name: podman-fix -description: > - Fix Podman container issues on Archipelago — restart failed containers, repair port bindings, - fix network connectivity, add missing restart policies, fix rootless UID mapping, and resolve - config drift. Handles rootless Podman (user: archipelago, UID 1000, subuid 100000:65536). - Use when asked to "fix container", "restart app", "fix port mapping", "container not working", - "app won't start", "fix podman", "repair container", "container down", "permission denied", - or after /podman-doctor identifies issues to fix. -allowed-tools: Bash Read Edit Write Glob Grep ---- - -# Podman Fix — Container Remediation - -Targeted fix workflow for **rootless Podman** container issues on Archipelago. Given a specific problem (from /podman-doctor or user report), diagnose the root cause and fix it. - -**SSH command**: `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` - -> **ROOTLESS PODMAN**: All `podman` commands run as the `archipelago` user — NO sudo. -> Only use `sudo` for: chown on volume directories, UFW changes, systemd service edits, nginx reload. -> Container UIDs are mapped via subuid: container UID N → host UID (100000 + N). - -If $ARGUMENTS is provided, fix that specific app/issue. Otherwise ask what needs fixing. - -## Fix Procedures - -### Fix 1: Container Not Running - -```bash -# Check why it stopped -podman logs --tail 50 CONTAINER_NAME -podman inspect CONTAINER_NAME --format "{{.State.ExitCode}} {{.State.Error}}" - -# If clean exit or crash — just restart -podman start CONTAINER_NAME - -# If corrupt state — remove and recreate -podman rm -f CONTAINER_NAME -# Then recreate using the install flow (trigger from UI or re-run creation command) -``` - -**If container keeps crashing**, check logs for the actual error. Common causes: -- Missing config file → check if volume mount has the config -- Wrong permissions → fix UID mapping (see Fix 8 below) -- Dependency not ready → start dependency first, wait, then start this container -- Exit code 127 → missing binary in container image, re-pull the image - -### Fix 2: Missing Restart Policy - -The most common uptime killer. Fix for ALL containers at once: - -```bash -# Fix a single container -podman update --restart unless-stopped CONTAINER_NAME - -# Fix ALL containers that have no restart policy -for c in $(podman ps -a --format "{{.Names}}"); do - policy=$(podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}") - if [ "$policy" = "no" ] || [ -z "$policy" ]; then - echo "Fixing restart policy for: $c" - podman update --restart unless-stopped "$c" - fi -done -``` - -**Also update the Rust source** so new installs get it right: -- Check `core/archipelago/src/api/rpc/package.rs` `get_app_config()` for the app -- Ensure `--restart` flag is in the podman run args - -### Fix 3: Port Mapping Issues - -#### Port conflict (address already in use) -```bash -# Find what's using the port -ss -tlnp | grep :PORT_NUMBER - -# If it's another container, either change one's port or stop the conflicting one -podman stop CONFLICTING_CONTAINER - -# If it's a host process (e.g., system tor vs container tor) -sudo systemctl stop tor # Stop system service if container needs the port -sudo systemctl disable tor -``` - -#### Port not mapped (container running but port unreachable) -```bash -# Check current port mappings -podman port CONTAINER_NAME - -# Can't add ports to running container — must recreate -podman stop CONTAINER_NAME -podman rm CONTAINER_NAME -# Recreate with correct -p flags (use the Rust install flow or manual podman run) -``` - -#### Nginx proxy missing or wrong -Read and fix the nginx config: -- HTTP: `image-recipe/configs/nginx-archipelago.conf` -- HTTPS: `image-recipe/configs/snippets/archipelago-https-app-proxies.conf` - -Add a location block: -```nginx -location /app/APP_ID/ { - proxy_pass http://127.0.0.1:HOST_PORT/; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection $connection_upgrade; - # Hide X-Frame-Options so it works in our iframe - proxy_hide_header X-Frame-Options; - proxy_hide_header Content-Security-Policy; -} -``` - -After editing nginx config, deploy and reload: -```bash -# On server -sudo nginx -t && sudo systemctl reload nginx -``` - -#### Frontend routing missing -Edit `neode-ui/src/stores/appLauncher.ts`: -- Add entry to `PORT_TO_APP_ID` map -- If app blocks iframes, add port to the new-tab list in `resolveAppIdFromUrl()` - -### Fix 4: Network Issues - -#### Container not on archy-net (can't resolve other containers) -```bash -# Connect to archy-net without recreating -podman network connect archy-net CONTAINER_NAME - -# Verify -podman inspect CONTAINER_NAME --format "{{.NetworkSettings.Networks}}" -``` - -#### archy-net doesn't exist -```bash -podman network create archy-net -# Then reconnect all containers that need it -``` - -#### DNS not working inside container -```bash -# Test DNS from inside container -podman exec CONTAINER_NAME nslookup bitcoin-knots 2>/dev/null || \ -podman exec CONTAINER_NAME ping -c1 bitcoin-knots - -# If DNS fails, check the container's resolv.conf -podman exec CONTAINER_NAME cat /etc/resolv.conf - -# If DNS fails, recreate container with explicit DNS -# Add --dns 1.1.1.1 to the podman run command -``` - -#### Container subnet changed (rootful → rootless migration) -```bash -# Old rootful subnet: 10.88.0.0/16 -# New rootless subnet: 10.89.0.0/16 -# Bitcoin RPC rpcallowip must be updated if using subnet-specific allowlist - -# Check current archy-net subnet -podman network inspect archy-net --format "{{range .Subnets}}{{.Subnet}}{{end}}" - -# If Bitcoin RPC refuses connections from containers: -# Update bitcoin.conf rpcallowip to 0.0.0.0/0 (safe: only accessible via port mapping) -``` - -### Fix 5: Health Check Issues - -#### Add missing health check to running container -Can't add to running container — must recreate with health check flags: -```bash -# Example for a web app -podman run ... \ - --health-cmd "curl -f http://localhost:PORT/health || exit 1" \ - --health-interval 30s \ - --health-timeout 5s \ - --health-retries 3 \ - --health-start-period 60s \ - IMAGE -``` - -#### Fix unhealthy container -```bash -# See what the health check is actually running -podman inspect CONTAINER_NAME --format "{{.Config.Healthcheck.Test}}" - -# Run the health check manually to see the error -podman exec CONTAINER_NAME HEALTH_CHECK_COMMAND - -# Common fixes: -# - curl not installed in container → use wget or nc instead -# - Wrong port in health check → fix the check command -# - App takes too long to start → increase --health-start-period -``` - -### Fix 6: Permission/Capability Issues - -```bash -# Check what capabilities container has -podman inspect CONTAINER_NAME --format "{{.HostConfig.CapAdd}}" - -# If missing required caps, must recreate with correct --cap-add flags -# Refer to the capability reference in /podman-doctor references -``` - -### Fix 7: Full Config Consistency Fix - -When port map is inconsistent across layers, fix ALL layers: - -1. **Decide the correct port** (usually what's in package.rs) -2. **Fix Podman**: recreate container with correct `-p` flags -3. **Fix Nginx**: update location block's `proxy_pass` port -4. **Fix Frontend**: update `PORT_TO_APP_ID` in appLauncher.ts -5. **Deploy**: `./scripts/deploy-to-target.sh --live` -6. **Verify**: `curl -I http://192.168.1.228/app/APP_ID/` - -### Fix 8: Rootless UID Mapping (Permission Denied on Volumes) - -This is the #1 rootless-specific issue. Container UIDs are remapped by user namespaces. - -**Formula**: `host_uid = 100000 + container_uid` - -```bash -# Fix UID 0 containers (most apps — run as root inside, mapped to 100000 on host) -sudo chown -R 100000:100000 /var/lib/archipelago/APP_NAME - -# Fix Bitcoin (container UID 101 → host UID 100101) -sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin - -# Fix PostgreSQL (container UID 70 → host UID 100070) -sudo chown -R 100070:100070 /var/lib/archipelago/postgres-APP_NAME - -# Fix Grafana (container UID 472 → host UID 100472) -sudo chown -R 100472:100472 /var/lib/archipelago/grafana - -# Fix MariaDB (container UID 999 → host UID 100999) -sudo chown -R 100999:100999 /var/lib/archipelago/mysql-mempool -``` - -**How to find the right UID for a new container:** -```bash -# Check what user the container image runs as -podman inspect IMAGE_NAME --format "{{.Config.User}}" -# If empty = root (UID 0) → host UID 100000 -# If number → host UID = 100000 + that number -# If username → run: podman run --rm IMAGE_NAME id -``` - -After fixing ownership, restart the container: -```bash -podman restart CONTAINER_NAME -``` - -### Fix 9: UFW Forward Policy (LAN Access Broken) - -If containers work locally but not from other machines on the network: - -```bash -# Check current policy -grep DEFAULT_FORWARD_POLICY /etc/default/ufw - -# Fix: change DROP to ACCEPT -sudo sed -i 's/DEFAULT_FORWARD_POLICY="DROP"/DEFAULT_FORWARD_POLICY="ACCEPT"/' /etc/default/ufw -sudo ufw reload -``` - -### Fix 10: Systemd Sandbox Too Restrictive - -If the Rust backend can't scan/manage containers after a systemd update: - -```bash -# Check what's blocked -sudo journalctl -u archipelago --since "10 min ago" | grep -i "denied\|permission\|namespace\|syscall" - -# The archipelago.service MUST have these for rootless podman: -# ProtectHome=no -# PrivateTmp=no (or disabled) -# RestrictNamespaces= (NOT SET — don't restrict) -# SystemCallFilter= (NOT SET — don't filter) -# ReadWritePaths=/var/lib/archipelago /etc/containers /var/lib/containers /run/containers /run/user /tmp -# Environment=XDG_RUNTIME_DIR=/run/user/1000 -``` - -Edit the service file: -```bash -sudo systemctl edit archipelago.service -# Add overrides, then: -sudo systemctl daemon-reload -sudo systemctl restart archipelago -``` - -### Fix 11: Stale Podman Processes - -If `podman ps` hangs or is very slow: - -```bash -# Kill stuck podman processes (>10 of them = something is wrong) -stuck=$(pgrep -c -f "podman ps\|podman stats" 2>/dev/null || echo 0) -if [ "$stuck" -gt 10 ]; then - pkill -f "podman ps\|podman stats" - echo "Killed $stuck stuck podman processes" -fi - -# Kill orphaned conmon processes holding ports -for pid in $(pgrep conmon); do - container=$(cat /proc/$pid/cmdline 2>/dev/null | tr '\0' ' ' | grep -oP '(?<=--cid )\S+') - if [ -n "$container" ] && ! podman ps -a --format "{{.ID}}" | grep -q "${container:0:12}"; then - kill "$pid" 2>/dev/null && echo "Killed orphan conmon $pid" - fi -done -``` - -## After Fixing - -Always verify the fix: -```bash -# Container running? -podman ps --filter name=CONTAINER_NAME - -# Port reachable? -curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:PORT/ - -# Via nginx proxy? -curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1/app/APP_ID/ - -# Health check passing? -podman inspect CONTAINER_NAME --format "{{.State.Health.Status}}" - -# Volume permissions correct? (rootless check) -podman exec CONTAINER_NAME ls -la /data/ 2>/dev/null || echo "Check container data path" -``` - -Run `/podman-doctor` again to confirm all issues are resolved. diff --git a/.claude/skills/podman-uptime/SKILL.md b/.claude/skills/podman-uptime/SKILL.md deleted file mode 100644 index 7142f7d2..00000000 --- a/.claude/skills/podman-uptime/SKILL.md +++ /dev/null @@ -1,410 +0,0 @@ ---- -name: podman-uptime -description: > - Ensure 100% container uptime on Archipelago. Sets up systemd watchdog timers, verifies - restart policies, creates health check monitors, and configures auto-recovery for all - containers. Handles rootless Podman (user: archipelago, UID 1000, subuid 100000:65536). - Use when asked to "ensure uptime", "containers keep dying", "auto-restart", - "watchdog", "container monitoring", "uptime guarantee", "keep containers running", - "survive reboot", or to harden container reliability. -allowed-tools: Bash Read Edit Write Glob Grep ---- - -# Podman Uptime — Container Reliability Guardian - -Ensures every Archipelago container survives reboots, recovers from crashes, and stays healthy. Sets up the three layers of uptime defense: restart policies, systemd watchdog, and health-based auto-recovery. - -**SSH command**: `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` - -> **ROOTLESS PODMAN**: All `podman` commands run as the `archipelago` user — NO sudo. -> Only use `sudo` for: systemd unit files, chown on volumes, UFW changes. -> The archipelago user runs containers directly via user namespaces. - -## Prerequisites for Rootless Uptime - -Before setting up uptime infrastructure, verify rootless Podman basics are working: - -```bash -# Must be the archipelago user -whoami # archipelago - -# User lingering must be enabled (keeps user services running after logout) -ls /var/lib/systemd/linger/ | grep archipelago || sudo loginctl enable-linger archipelago - -# XDG_RUNTIME_DIR must be set -echo $XDG_RUNTIME_DIR # /run/user/1000 - -# Subuid/subgid must be configured -grep archipelago /etc/subuid # archipelago:100000:65536 - -# UFW forward policy must be ACCEPT (for LAN access to containers) -grep DEFAULT_FORWARD_POLICY /etc/default/ufw # Must be "ACCEPT" -``` - -## Layer 1: Restart Policies (Survive Reboots) - -Every container MUST have `--restart unless-stopped`. This is non-negotiable. - -### Audit and fix all containers - -```bash -# Audit -for c in $(podman ps -a --format "{{.Names}}"); do - policy=$(podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}") - echo "$c: $policy" -done - -# Fix any with "no" or empty policy -for c in $(podman ps -a --format "{{.Names}}"); do - policy=$(podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}") - if [ "$policy" = "no" ] || [ -z "$policy" ]; then - echo "Fixing: $c" - podman update --restart unless-stopped "$c" - fi -done -``` - -### Ensure podman auto-starts containers on boot - -For rootless Podman, containers with restart policies are auto-started by `podman-restart` as a **user** service: - -```bash -# Enable the rootless podman-restart user service -systemctl --user enable podman-restart.service 2>/dev/null - -# If the user service doesn't exist, create a system-level one -# (runs as archipelago user via User= directive) -cat <<'EOF' | sudo tee /etc/systemd/system/podman-restart.service -[Unit] -Description=Podman Start All Containers With Restart Policy -After=network-online.target -Wants=network-online.target - -[Service] -Type=oneshot -User=archipelago -Group=archipelago -Environment=XDG_RUNTIME_DIR=/run/user/1000 -ExecStart=/usr/bin/podman start --all --filter restart-policy=unless-stopped -RemainAfterExit=yes -TimeoutStartSec=300 - -[Install] -WantedBy=multi-user.target -EOF - -sudo systemctl daemon-reload -sudo systemctl enable podman-restart.service -``` - -## Layer 2: Systemd Watchdog (Detect and Recover) - -Create a systemd timer that checks container health every 2 minutes and restarts unhealthy or stopped containers. - -### Create the watchdog script - -```bash -cat <<'SCRIPT' | sudo tee /usr/local/bin/archipelago-container-watchdog.sh -#!/bin/bash -# Archipelago Container Watchdog (Rootless Podman) -# Runs as archipelago user — NO sudo for podman commands - -LOG_TAG="container-watchdog" - -# Run podman as the archipelago user with correct XDG path -export XDG_RUNTIME_DIR=/run/user/1000 -PODMAN="/usr/bin/podman" - -# Restart any stopped containers that should be running (have restart policy) -for c in $($PODMAN ps -a --filter status=exited --filter restart-policy=unless-stopped --format "{{.Names}}" 2>/dev/null); do - logger -t "$LOG_TAG" "Restarting stopped container: $c" - $PODMAN start "$c" 2>&1 | logger -t "$LOG_TAG" -done - -# Restart unhealthy containers -for c in $($PODMAN ps --filter health=unhealthy --format "{{.Names}}" 2>/dev/null); do - logger -t "$LOG_TAG" "Restarting unhealthy container: $c" - $PODMAN restart "$c" 2>&1 | logger -t "$LOG_TAG" -done - -# Check for containers in "created" state (never started) -for c in $($PODMAN ps -a --filter status=created --format "{{.Names}}" 2>/dev/null); do - logger -t "$LOG_TAG" "Starting created container: $c" - $PODMAN start "$c" 2>&1 | logger -t "$LOG_TAG" -done -SCRIPT - -sudo chmod +x /usr/local/bin/archipelago-container-watchdog.sh -``` - -### Create the systemd timer - -```bash -# Service unit — runs as archipelago user for rootless podman -cat <<'EOF' | sudo tee /etc/systemd/system/archipelago-watchdog.service -[Unit] -Description=Archipelago Container Watchdog -After=podman-restart.service - -[Service] -Type=oneshot -User=archipelago -Group=archipelago -Environment=XDG_RUNTIME_DIR=/run/user/1000 -ExecStart=/usr/local/bin/archipelago-container-watchdog.sh -EOF - -# Timer unit — runs every 2 minutes -cat <<'EOF' | sudo tee /etc/systemd/system/archipelago-watchdog.timer -[Unit] -Description=Run Archipelago Container Watchdog every 2 minutes - -[Timer] -OnBootSec=120 -OnUnitActiveSec=120 -AccuracySec=30 - -[Install] -WantedBy=timers.target -EOF - -sudo systemctl daemon-reload -sudo systemctl enable --now archipelago-watchdog.timer -``` - -### Verify watchdog is running - -```bash -sudo systemctl status archipelago-watchdog.timer -sudo systemctl list-timers | grep archipelago -# Check watchdog logs -sudo journalctl -t container-watchdog --since "1 hour ago" --no-pager -``` - -## Layer 3: Dependency-Aware Startup Order - -Some containers depend on others. The watchdog handles restarts, but initial boot order matters. - -### Create ordered startup script - -```bash -cat <<'SCRIPT' | sudo tee /usr/local/bin/archipelago-ordered-start.sh -#!/bin/bash -# Ordered container startup for Archipelago (Rootless Podman) -# Runs as archipelago user — NO sudo for podman commands -# Respects dependency chain: bitcoin → electrs/lnd → mempool/btcpay - -LOG_TAG="ordered-start" -export XDG_RUNTIME_DIR=/run/user/1000 -PODMAN="/usr/bin/podman" - -wait_for_container() { - local name=$1 - local max_wait=${2:-60} - local waited=0 - while [ $waited -lt $max_wait ]; do - status=$($PODMAN inspect "$name" --format "{{.State.Running}}" 2>/dev/null) - if [ "$status" = "true" ]; then - logger -t "$LOG_TAG" "$name is running" - return 0 - fi - sleep 5 - waited=$((waited + 5)) - done - logger -t "$LOG_TAG" "WARNING: $name not running after ${max_wait}s" - return 1 -} - -# Tier 0: Infrastructure -logger -t "$LOG_TAG" "Starting Tier 0: Infrastructure" -$PODMAN start tailscale 2>/dev/null - -# Tier 1: Databases (must start before services that depend on them) -logger -t "$LOG_TAG" "Starting Tier 1: Databases" -$PODMAN start mempool-db 2>/dev/null -$PODMAN start btcpay-postgres 2>/dev/null -$PODMAN start immich_postgres 2>/dev/null -sleep 5 - -# Tier 2: Bitcoin (foundation for Lightning and explorers) -logger -t "$LOG_TAG" "Starting Tier 2: Bitcoin" -$PODMAN start bitcoin-knots 2>/dev/null -wait_for_container bitcoin-knots 120 - -# Tier 3: Bitcoin-dependent services -logger -t "$LOG_TAG" "Starting Tier 3: Bitcoin-dependent" -$PODMAN start electrumx 2>/dev/null -$PODMAN start lnd 2>/dev/null -wait_for_container electrumx 90 -wait_for_container lnd 90 - -# Tier 4: Services depending on Tier 3 -logger -t "$LOG_TAG" "Starting Tier 4: Second-order dependencies" -$PODMAN start mempool 2>/dev/null -$PODMAN start nbxplorer 2>/dev/null -sleep 10 -$PODMAN start btcpay-server 2>/dev/null -$PODMAN start fedimint 2>/dev/null -$PODMAN start fedimint-gateway 2>/dev/null - -# Tier 5: Independent apps (start all remaining) -logger -t "$LOG_TAG" "Starting Tier 5: Independent apps" -$PODMAN start --all 2>/dev/null - -# Tier 6: UI containers (need parent apps running first) -logger -t "$LOG_TAG" "Starting Tier 6: UI containers" -$PODMAN start bitcoin-ui 2>/dev/null -$PODMAN start lnd-ui 2>/dev/null -$PODMAN start electrs-ui 2>/dev/null - -logger -t "$LOG_TAG" "Startup sequence complete" -SCRIPT - -sudo chmod +x /usr/local/bin/archipelago-ordered-start.sh -``` - -### Wire into boot sequence - -```bash -# Runs as archipelago user for rootless podman -cat <<'EOF' | sudo tee /etc/systemd/system/archipelago-containers.service -[Unit] -Description=Archipelago Ordered Container Startup -After=network-online.target -Wants=network-online.target -Before=archipelago.service - -[Service] -Type=oneshot -User=archipelago -Group=archipelago -Environment=XDG_RUNTIME_DIR=/run/user/1000 -ExecStart=/usr/local/bin/archipelago-ordered-start.sh -RemainAfterExit=yes -TimeoutStartSec=600 - -[Install] -WantedBy=multi-user.target -EOF - -sudo systemctl daemon-reload -sudo systemctl enable archipelago-containers.service -``` - -## Rootless-Specific Uptime Considerations - -### Volume ownership survives reboots -Volume ownership doesn't change on reboot, but if a container image is updated (re-pulled), the new container may run as a different UID. Always verify after image updates: - -```bash -# Quick ownership audit after image pull -podman inspect CONTAINER_NAME --format "{{.Config.User}}" -# Then verify: sudo stat -c '%u:%g' /var/lib/archipelago/APP_NAME -# Formula: host_uid = 100000 + container_uid -``` - -### XDG_RUNTIME_DIR on boot -Rootless Podman requires `/run/user/1000` to exist. This is created by `pam_systemd` when the user logs in, or by `loginctl enable-linger`. If it's missing after boot, containers won't start. - -```bash -# Verify it exists -ls -la /run/user/1000/ || echo "CRITICAL: /run/user/1000 missing — run: sudo loginctl enable-linger archipelago" -``` - -### Systemd sandbox must not block podman -If the archipelago.service sandbox blocks namespace/syscall operations, the Rust backend can't scan containers. See Fix 10 in /podman-fix. - -## Verification Checklist - -After setting up all 3 layers, verify: - -```bash -echo "=== Rootless Podman Prerequisites ===" -echo "User: $(whoami)" -echo "XDG_RUNTIME_DIR: $XDG_RUNTIME_DIR" -grep archipelago /etc/subuid | head -1 -ls /var/lib/systemd/linger/ | grep archipelago && echo "Linger: enabled" || echo "Linger: DISABLED" -grep DEFAULT_FORWARD_POLICY /etc/default/ufw - -echo "" -echo "=== Layer 1: Restart Policies ===" -for c in $(podman ps -a --format "{{.Names}}"); do - policy=$(podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}") - echo " $c: $policy" -done - -echo "" -echo "=== Layer 2: Watchdog Timer ===" -sudo systemctl is-active archipelago-watchdog.timer -sudo systemctl list-timers | grep archipelago - -echo "" -echo "=== Layer 3: Boot Services ===" -sudo systemctl is-enabled podman-restart.service 2>/dev/null || echo "podman-restart: not found" -sudo systemctl is-enabled archipelago-containers.service 2>/dev/null || echo "ordered-start: not found" -sudo systemctl is-enabled archipelago-watchdog.timer 2>/dev/null || echo "watchdog: not found" - -echo "" -echo "=== Container Health Summary ===" -total=$(podman ps -a --format "{{.Names}}" | wc -l) -running=$(podman ps --format "{{.Names}}" | wc -l) -stopped=$((total - running)) -unhealthy=$(podman ps --filter health=unhealthy --format "{{.Names}}" | wc -l) -echo " Total: $total | Running: $running | Stopped: $stopped | Unhealthy: $unhealthy" - -echo "" -echo "=== Volume Ownership Spot Check ===" -for dir in bitcoin lnd grafana; do - if [ -d "/var/lib/archipelago/$dir" ]; then - echo " $dir: $(stat -c '%u:%g' /var/lib/archipelago/$dir)" - fi -done -``` - -## Reboot Test - -The ultimate uptime test — reboot the server and verify everything comes back: - -```bash -# Before reboot: record running containers -podman ps --format "{{.Names}}" | sort > /tmp/before-reboot.txt - -# Reboot -sudo reboot - -# After reboot (wait ~3 minutes, then SSH back in): -podman ps --format "{{.Names}}" | sort > /tmp/after-reboot.txt - -# Compare -diff /tmp/before-reboot.txt /tmp/after-reboot.txt -# Should show no differences - -# Also verify XDG_RUNTIME_DIR survived reboot -ls /run/user/1000/ || echo "CRITICAL: lingering not working" -``` - -## Monitoring - -Check uptime status anytime: -```bash -# Quick status -podman ps -a --format "table {{.Names}}\t{{.Status}}" | sort - -# Watchdog activity -sudo journalctl -t container-watchdog --since "24 hours ago" --no-pager - -# Container events (starts, stops, deaths) -podman events --since 24h --filter event=start --filter event=stop --filter event=died 2>/dev/null | tail -30 - -# Check for permission denied errors (rootless UID mapping issue) -podman ps -a --filter status=exited --format "{{.Names}}" | while read c; do - podman logs --tail 5 "$c" 2>&1 | grep -i "permission denied" && echo " ^ UID mapping issue in: $c" -done -``` - -## Integration - -- Run `/podman-doctor` first to identify issues (includes rootless health checks) -- Run `/podman-fix` for specific container repairs (includes UID mapping fixes) -- Run `/podman-uptime` to set up permanent reliability infrastructure -- Add to ISO build: copy watchdog scripts to `image-recipe/configs/` and enable in first-boot diff --git a/.claude/skills/podman/SKILL.md b/.claude/skills/podman/SKILL.md deleted file mode 100644 index dc4d7916..00000000 --- a/.claude/skills/podman/SKILL.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -name: podman -description: Rootless Podman container management — diagnose, fix, and harden uptime. Use for container issues, port problems, UID mapping, health checks, or uptime hardening. -disable-model-invocation: true -allowed-tools: Bash, Read, Edit, Write, Glob, Grep -argument-hint: "[diagnose|fix|uptime] [container-name]" ---- - -# Podman — Container Management - -Archipelago runs rootless Podman as `archipelago` user (UID 1000). All `podman` commands run without sudo. UID mapping: container UID N → host UID (100000 + N). - -**SSH**: `ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228` - -## Diagnose - -```bash -# Container status -podman ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}\t{{.Networks}}" - -# Restart policies (must be "unless-stopped") -for c in $(podman ps -a --format "{{.Names}}"); do - echo -n "$c: "; podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}" -done - -# Health checks -for c in $(podman ps --format "{{.Names}}"); do - health=$(podman inspect "$c" --format "{{.State.Health.Status}}" 2>/dev/null) - [ -n "$health" ] && [ "$health" != "" ] && echo "$c: $health" -done - -# Resource usage + recent deaths -podman stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}" -podman events --filter event=died --since 24h 2>/dev/null | tail -10 - -# Rootless prerequisites -echo "XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR" # must be /run/user/1000 -grep archipelago /etc/subuid # must show archipelago:100000:65536 -ls /var/lib/systemd/linger/ | grep archipelago # must exist -grep DEFAULT_FORWARD_POLICY /etc/default/ufw # must be ACCEPT -``` - -Cross-check 4 layers for port consistency: Backend config (package.rs) → Podman ports → Nginx proxy → Frontend appLauncher.ts. See `references/port-map.md`. - -## Fix - -**Restart policy missing**: `podman update --restart unless-stopped CONTAINER_NAME` - -**UID mapping (permission denied)**: `sudo chown -R HOST_UID:HOST_UID /var/lib/archipelago/APP`. Formula: host_uid = 100000 + container_uid. See `references/uid-mapping.md`. - -**Port conflict**: `ss -tlnp | grep :PORT` to find offender. Can't add ports to running container — must recreate. - -**Network missing**: `podman network connect archy-net CONTAINER_NAME` - -**UFW blocking LAN**: `sudo sed -i 's/DEFAULT_FORWARD_POLICY="DROP"/DEFAULT_FORWARD_POLICY="ACCEPT"/' /etc/default/ufw && sudo ufw reload` - -**Stale processes**: `pgrep -c -f "podman ps"` — if >10, kill stuck processes. - -See `references/common-failures.md` for the full error→cause→fix lookup table. - -## Uptime Hardening - -### Layer 1: Restart policies -```bash -for c in $(podman ps -a --format "{{.Names}}"); do - policy=$(podman inspect "$c" --format "{{.HostConfig.RestartPolicy.Name}}") - [ "$policy" = "no" ] || [ -z "$policy" ] && podman update --restart unless-stopped "$c" -done -``` - -### Layer 2: Watchdog timer -Create `/usr/local/bin/archipelago-container-watchdog.sh` that restarts stopped/unhealthy containers every 2 minutes via systemd timer. Script runs as archipelago user with `XDG_RUNTIME_DIR=/run/user/1000`. - -### Layer 3: Ordered startup -Bitcoin stack has dependency chain: bitcoin-knots → electrumx + lnd → mempool + btcpay + fedimint → UI containers. Create `/usr/local/bin/archipelago-ordered-start.sh` with wait-for-container logic between tiers. - -### Verification -```bash -sudo reboot # then SSH back after 3 min -podman ps --format "{{.Names}}" | sort # should match pre-reboot list -``` - -## Systemd Requirements - -The archipelago.service needs these for rootless Podman: -- `ProtectHome=no` (podman stores in ~/.local/share/containers/) -- `PrivateTmp=no` (runtime in /tmp/podman-run-1000/) -- Do not set `RestrictNamespaces=` or `SystemCallFilter=` -- `Environment=XDG_RUNTIME_DIR=/run/user/1000` diff --git a/.claude/skills/podman/references/common-failures.md b/.claude/skills/podman/references/common-failures.md deleted file mode 100644 index 983a58fc..00000000 --- a/.claude/skills/podman/references/common-failures.md +++ /dev/null @@ -1,102 +0,0 @@ -# Common Podman Failure Patterns - -## Rootless Podman Specific Failures - -| Error | Cause | Fix | -|-------|-------|-----| -| `ERRO[0000] cannot find UID/GID for user` | subuid/subgid not configured | Add `archipelago:100000:65536` to `/etc/subuid` and `/etc/subgid` | -| `Error: unshare: operation not permitted` | Systemd `RestrictNamespaces` blocks user namespaces | Remove `RestrictNamespaces=` from `archipelago.service` | -| `Error: could not get runtime: creating runtime` | XDG_RUNTIME_DIR not set or /run/user/1000 missing | Set `Environment=XDG_RUNTIME_DIR=/run/user/1000` in service, ensure `loginctl enable-linger archipelago` | -| `permission denied` on volume mount | Wrong UID ownership — must use mapped UIDs | `sudo chown -R 100000:100000 /var/lib/archipelago/APP` (see UID mapping table) | -| `ERRO[0000] rootless containers not supported` | Podman not configured for rootless | Run `podman system migrate`, check `/etc/subuid` | -| `Error: creating container storage: layer not known` | Corrupted rootless storage | `podman system reset` (destroys all containers — last resort) | -| `Error: stat /tmp/podman-run-1000/...: no such file` | PrivateTmp=yes in systemd isolates /tmp | Set `PrivateTmp=no` in `archipelago.service` | -| Container ports unreachable from LAN | UFW DEFAULT_FORWARD_POLICY="DROP" | Change to "ACCEPT" in `/etc/default/ufw`, then `sudo ufw reload` | -| `Error: error creating network namespace` | Systemd `SystemCallFilter` blocks clone/unshare | Remove `SystemCallFilter=` from `archipelago.service` | -| Containers lose network after service restart | podman runtime dir in /tmp cleaned | Ensure `PrivateTmp=no` so /tmp/podman-run-1000/ persists | - -## Container Won't Start - -| Error | Cause | Fix | -|-------|-------|-----| -| `exec format error` | Binary built on wrong arch | Rebuild on the Linux server | -| `address already in use` | Port conflict | `ss -tlnp \| grep :PORT` to find offender | -| `permission denied` | Missing capability, wrong UID ownership, or read-only root | Check capabilities, check volume ownership with mapped UID, add tmpfs | -| `OCI runtime error` | Corrupt container state | `podman rm -f NAME && recreate` | -| `image not known` | Image not pulled | `podman pull IMAGE:TAG` | -| `no such network` | Network missing | `podman network create archy-net` | -| `Error: netavark: ...subnet overlap` | Network CIDR conflict | `podman network rm archy-net && podman network create archy-net` | - -## Container Starts But App Unreachable - -| Symptom | Check Layer | Fix | -|---------|------------|-----| -| Direct port works, /app/ doesn't | Nginx config | Add `/app/{id}/` location block | -| Neither works | Podman ports | `podman port NAME` — verify mapping exists | -| Port mapped but refused | Container logs | App crashing internally — check logs | -| Works sometimes | Resources | Check OOM kills, CPU, disk space | -| 502 Bad Gateway | Nginx→Container | Wrong port in proxy_pass or container restarted | -| Works locally but not from LAN | UFW forward policy | Set `DEFAULT_FORWARD_POLICY="ACCEPT"` in `/etc/default/ufw` | - -## Container Keeps Dying - -| Pattern | Cause | Fix | -|---------|-------|-----| -| Exits immediately (code 1) | Config error | Check `podman logs NAME` | -| Dies after minutes | OOM killed | Increase `--memory` limit | -| Dies when dep restarts | No restart policy | Add `--restart unless-stopped` | -| Crash loop | Repeated crash | Fix root cause, don't just restart | -| Exit code 127 | Missing binary in container | Wrong image tag or corrupted image — re-pull | -| Exit code 137 | Killed by OOM or signal | Check `dmesg` for OOM kill, check `podman inspect` for OOMKilled | - -## Network Issues - -| Problem | Cause | Fix | -|---------|-------|-----| -| Can't resolve container names | Not on archy-net | Recreate with `--network=archy-net` | -| Can't reach internet | DNS missing | Add `--dns 1.1.1.1` | -| Container-to-container timeout | Different networks | Put both on same network | -| Bitcoin RPC refused from container | rpcallowip wrong subnet | Use `rpcallowip=0.0.0.0/0` (safe: port mapped, not exposed) | -| Old containers can't find new network | Subnet changed (rootful→rootless) | Recreate containers on new archy-net (rootless uses 10.89.x.x) | - -## Volume Permission Patterns (Rootless UID Mapping) - -Formula: **host_uid = 100000 + container_uid** - -| Container UID | Host UID | Apps | Data Directory | -|---|---|---|---| -| 0 (root) | 100000 | lnd, fedimint, homeassistant, jellyfin, vaultwarden, photoprism, ollama, filebrowser, electrumx, btcpay, immich | `/var/lib/archipelago/{app}` | -| 70 | 100070 | postgres (btcpay-db, immich-db, penpot-postgres) | `/var/lib/archipelago/postgres-*` | -| 101 | 100101 | bitcoin-knots | `/var/lib/archipelago/bitcoin` | -| 472 | 100472 | grafana | `/var/lib/archipelago/grafana` | -| 999 | 100999 | MariaDB (mysql-mempool) | `/var/lib/archipelago/mysql-mempool` | - -## Capability Reference - -| Capability | Apps That Need It | Failure Mode | -|-----------|------------------|-------------| -| CHOWN | nextcloud, homeassistant, btcpay, jellyfin, portainer | Can't chown during setup | -| SETUID/SETGID | nextcloud, homeassistant, btcpay, jellyfin | Can't switch to service user | -| DAC_OVERRIDE | nextcloud, homeassistant, btcpay | Can't access cross-UID files | -| FOWNER | bitcoin-knots, lnd, fedimint | Can't modify data dir perms | -| NET_BIND_SERVICE | nginx-proxy-manager, vaultwarden | Can't bind ports <1024 | -| NET_ADMIN + NET_RAW | tailscale | Can't create TUN device or manage routes | - -## Read-Only Safe Apps - -Only these apps can run with `--read-only` + tmpfs: searxng, grafana, filebrowser, electrumx, mempool-electrs, electrs, nostr-rs-relay, ollama, indeedhub - -All others need writable root or will fail silently. - -## Systemd Sandbox Requirements for Rootless Podman - -These systemd service settings MUST be configured for rootless Podman to work: - -| Setting | Required Value | Why | -|---------|---------------|-----| -| `ProtectHome=` | `no` | Podman stores images in `~/.local/share/containers/` | -| `PrivateTmp=` | `no` | Podman runtime lives in `/tmp/podman-run-1000/` | -| `RestrictNamespaces=` | NOT SET | Rootless podman creates user namespaces | -| `SystemCallFilter=` | NOT SET | Rootless podman needs clone/unshare syscalls | -| `ReadWritePaths=` | Include `/var/lib/archipelago /run/user /tmp /etc/containers /var/lib/containers /run/containers` | Volume data + podman runtime paths | -| `Environment=` | `XDG_RUNTIME_DIR=/run/user/1000` | Podman socket location | diff --git a/.claude/skills/podman/references/port-map.md b/.claude/skills/podman/references/port-map.md deleted file mode 100644 index ad960883..00000000 --- a/.claude/skills/podman/references/port-map.md +++ /dev/null @@ -1,71 +0,0 @@ -# Archipelago Canonical Port Map - -All port assignments across the 4 configuration layers. When adding or debugging an app, every row must be consistent across all columns. - -## Bitcoin Stack - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| bitcoin-knots | 8332, 8333 | 8332, 8333 | archy-net | /app/bitcoin-knots/ | 8332→bitcoin-knots | -| bitcoin-ui | 8334 | 80 | bridge | /app/bitcoin-ui/ | 8334→bitcoin-knots | -| electrs | 50001 | 50001 | archy-net | /app/electrs/ | 50001→electrs | -| lnd | 9735, 10009, 8080 | 9735, 10009, 8080 | archy-net | /app/lnd/ | 10009→lnd | -| lnd-ui (RTL) | 8081 | 80 | bridge | /app/lnd-ui/ | 8081→lnd | - -## Lightning & Payment - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| btcpay-server | 23000 | 49392 | archy-net | /app/btcpay/ | 23000→btcpay-server | -| nbxplorer | 24444 | 32838 | archy-net | N/A (internal) | N/A | -| fedimint | 8173, 8174, 8175 | 8173, 8174, 8175 | archy-net | /app/fedimint/ | 8174→fedimint | -| fedimint-gateway | 8175 | 8175 | archy-net | /app/fedimint-gateway/ | 8175→fedimint-gateway | - -## Explorer & Monitoring - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| mempool | 4080 | 8080 | archy-net | /app/mempool/ | 4080→mempool | -| grafana | 3000 | 3000 | bridge | /app/grafana/ | 3000→grafana (new tab) | - -## Self-Hosted Apps - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| nextcloud | 8085 | 80 | bridge | /app/nextcloud/ | 8085→nextcloud | -| vaultwarden | 8082 | 80 | bridge | /app/vaultwarden/ | 8082→vaultwarden (new tab) | -| filebrowser | 8083 | 80 | bridge | /app/filebrowser/ | 8083→filebrowser | -| searxng | 8888 | 8080 | bridge | /app/searxng/ | 8888→searxng | -| photoprism | 2342 | 2342 | bridge | /app/photoprism/ | 2342→photoprism (new tab) | -| jellyfin | 8096 | 8096 | bridge | /app/jellyfin/ | 8096→jellyfin | -| homeassistant | 8123 | 8123 | bridge | /app/homeassistant/ | 8123→homeassistant (new tab) | -| ollama | 11434 | 11434 | archy-net | /app/ollama/ | 11434→ollama | -| open-webui | 3080 | 8080 | archy-net | /app/open-webui/ | 3080→open-webui | - -## Nostr & Social - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| nostr-rs-relay | 7000 | 8080 | archy-net | /app/nostr-rs-relay/ | 7000→nostr-rs-relay | -| indeedhub | 3001 | 3000 | archy-net | /app/indeedhub/ | 3001→indeedhub | - -## System - -| App | Host Port(s) | Container Port(s) | Network | Nginx Path | Frontend Map | -|-----|-------------|-------------------|---------|------------|-------------| -| tailscale | 8240 | 8240 | host | /app/tailscale/ | N/A | -| nginx-proxy-manager | 81, 8443 | 81, 443 | bridge | N/A | 81→nginx-proxy-manager | - -## Multi-Container Stacks - -**Immich**: immich-server (2283), immich-postgres (internal 5432), immich-redis (internal 6379) — all on immich-net -**Penpot**: penpot-frontend (9001→80), penpot-backend, penpot-exporter, penpot-postgres, penpot-mailcatch — all on penpot-net -**Mempool**: mempool (4080→8080), mempool-db (internal 3306) — on archy-net -**BTCPay**: btcpay-server (23000→49392), nbxplorer (24444→32838), btcpay-postgres (internal 5432) — on archy-net - -## Key Notes - -- **archy-net apps** resolve each other by container name (e.g., `bitcoin-knots:8332`) -- **bridge apps** are standalone — access services via host IP/port -- **host network** (tailscale only) — shares host namespace, no port mapping -- **New tab apps**: btcpay (23000), grafana (3000), vaultwarden (8082), photoprism (2342), homeassistant (8123) — X-Frame-Options blocks iframe diff --git a/.claude/skills/podman/references/uid-mapping.md b/.claude/skills/podman/references/uid-mapping.md deleted file mode 100644 index a8338720..00000000 --- a/.claude/skills/podman/references/uid-mapping.md +++ /dev/null @@ -1,93 +0,0 @@ -# Rootless Podman UID Mapping Reference - -## How Rootless UID Mapping Works - -When Podman runs as the `archipelago` user (UID 1000), container processes don't run as their "apparent" UID on the host. Instead, Linux user namespaces remap UIDs. - -**Mapping formula**: `host_uid = 100000 + container_uid` - -This is configured in `/etc/subuid` and `/etc/subgid`: -``` -archipelago:100000:65536 -``` - -This means: -- Container UID 0 (root inside container) → Host UID 100000 (unprivileged on host) -- Container UID 70 (postgres) → Host UID 100070 -- Container UID 101 (bitcoin) → Host UID 100101 -- etc. - -## Why This Matters - -Volume directories (bind mounts) on the host must be owned by the **mapped** UID, not the container UID. If Bitcoin runs as UID 101 inside its container, the host directory must be owned by UID 100101. - -If ownership is wrong, the container gets `permission denied` when trying to read/write its data. - -## Complete UID Mapping Table - -| Container UID | Host UID | Containers | Fix Command | -|---|---|---|---| -| 0 (root) | 100000 | lnd, fedimint, fedimint-gateway, homeassistant, jellyfin, vaultwarden, photoprism, ollama, filebrowser, electrumx, btcpay-server, nbxplorer, immich, nostr-rs-relay, strfry, nextcloud, searxng, onlyoffice, tailscale, uptime-kuma | `sudo chown -R 100000:100000 /var/lib/archipelago/{app}` | -| 70 | 100070 | postgres (btcpay-db, immich-db, penpot-postgres) | `sudo chown -R 100070:100070 /var/lib/archipelago/postgres-*` | -| 101 | 100101 | bitcoin-knots, bitcoin-core | `sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin` | -| 472 | 100472 | grafana | `sudo chown -R 100472:100472 /var/lib/archipelago/grafana` | -| 999 | 100999 | MariaDB (mysql-mempool) | `sudo chown -R 100999:100999 /var/lib/archipelago/mysql-mempool` | - -## How to Find a Container's UID - -If you encounter a new container with permission issues: - -```bash -# Check what user the container runs as -podman inspect CONTAINER_NAME --format "{{.Config.User}}" - -# If empty, it runs as root (UID 0) → host UID 100000 - -# If it shows a username, find the UID inside the image -podman run --rm IMAGE_NAME id - -# Then calculate: host_uid = 100000 + container_uid -``` - -## Fix Script - -Run this after any fresh install, migration, or when containers have permission errors: - -```bash -#!/bin/bash -# Fix all rootless podman volume ownership - -# UID 0 → 100000 (most containers) -for dir in lnd fedimint fedimint-gateway homeassistant jellyfin vaultwarden photoprism \ - ollama filebrowser electrumx btcpay nbxplorer immich nostr-rs-relay nextcloud \ - searxng onlyoffice uptime-kuma; do - [ -d "/var/lib/archipelago/$dir" ] && sudo chown -R 100000:100000 "/var/lib/archipelago/$dir" -done - -# UID 101 → 100101 (Bitcoin) -[ -d "/var/lib/archipelago/bitcoin" ] && sudo chown -R 100101:100101 /var/lib/archipelago/bitcoin - -# UID 70 → 100070 (PostgreSQL) -for dir in /var/lib/archipelago/postgres-* /var/lib/archipelago/btcpay-db /var/lib/archipelago/immich-db; do - [ -d "$dir" ] && sudo chown -R 100070:100070 "$dir" -done - -# UID 999 → 100999 (MariaDB) -[ -d "/var/lib/archipelago/mysql-mempool" ] && sudo chown -R 100999:100999 /var/lib/archipelago/mysql-mempool - -# UID 472 → 100472 (Grafana) -[ -d "/var/lib/archipelago/grafana" ] && sudo chown -R 100472:100472 /var/lib/archipelago/grafana -``` - -## Rootful vs Rootless Comparison - -| Aspect | Rootful (old) | Rootless (current) | -|--------|---------------|-------------------| -| Podman command | `sudo podman` | `podman` (as archipelago user) | -| Container storage | `/var/lib/containers/storage` | `~/.local/share/containers/storage` | -| Container subnet | `10.88.0.0/16` | `10.89.0.0/16` | -| Volume ownership | Container UID directly | Mapped UID (100000 + container_uid) | -| Requires root? | Yes | No (except fixing volume ownership) | -| XDG_RUNTIME_DIR | Not needed | Required: `/run/user/1000` | -| User lingering | Not needed | Required: `loginctl enable-linger` | -| Systemd restrictions | All can be enabled | Must disable: RestrictNamespaces, SystemCallFilter | diff --git a/.claude/skills/polish-backend/SKILL.md b/.claude/skills/polish-backend/SKILL.md deleted file mode 100644 index 327f4202..00000000 --- a/.claude/skills/polish-backend/SKILL.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -name: polish-backend -description: Fix Rust backend quality issues in Archipelago. Eliminates panics/unwraps, adds timeouts, implements connection pooling, fixes clippy warnings. Use when user says "polish backend", "fix unwraps", "backend quality", or "eliminate panics". ---- - -# Skill: Polish Backend Quality - -Fix Rust backend quality issues: eliminate panics, add timeouts, implement connection pooling, fix clippy warnings. The backend must never crash in production. - -## Priority 1: Eliminate Panics - -### Find all unwrap/expect in production code -```bash -ssh archipelago@192.168.1.228 "cd ~/archy && grep -rn 'unwrap()\|\.expect(' core/archipelago/src/ core/container/src/ core/security/src/ core/performance/src/ --include='*.rs' | grep -v test | grep -v '#\[test\]' | grep -v '_test.rs'" -``` - -### Fix patterns: - -**Response builder unwraps** (handler.rs): -```rust -// BAD -Response::builder().body(body).unwrap() - -// GOOD -Response::builder().body(body).map_err(|e| { - tracing::error!("Failed to build response: {}", e); - // Return a minimal 500 response -})? -``` - -**Socket address parsing** (main.rs): -```rust -// BAD -addr.parse().expect("Invalid bind address") - -// GOOD -addr.parse().context("Invalid bind address")? -``` - -**TOTP secret creation** (totp.rs): -```rust -// BAD -TOTP::new(...).unwrap() - -// GOOD -TOTP::new(...).map_err(|e| anyhow::anyhow!("Failed to create TOTP: {}", e))? -``` - -**Cosign URL parsing** (image_verifier.rs): -```rust -// BAD -sig_url.strip_prefix("cosign://").unwrap() - -// GOOD -sig_url.strip_prefix("cosign://") - .ok_or_else(|| anyhow::anyhow!("Invalid cosign URL format: {}", sig_url))? -``` - -## Priority 2: Add Timeouts - -Every external call must have an explicit timeout: - -```rust -// Container operations -tokio::time::timeout(Duration::from_secs(30), podman_operation()).await - .context("Container operation timed out after 30s")??; - -// HTTP calls (Bitcoin RPC, LND proxy) -let client = reqwest::Client::builder() - .timeout(Duration::from_secs(10)) - .build()?; - -// Nostr operations -tokio::time::timeout(Duration::from_secs(15), nostr_publish()).await - .context("Nostr publish timed out")?; -``` - -## Priority 3: Connection Pooling - -Store a reusable `reqwest::Client` in `RpcHandler`: -```rust -pub struct RpcHandler { - // ... existing fields - http_client: reqwest::Client, -} - -impl RpcHandler { - pub fn new(...) -> Self { - let http_client = reqwest::Client::builder() - .timeout(Duration::from_secs(10)) - .pool_max_idle_per_host(5) - .build() - .expect("Failed to create HTTP client"); - // ... - } -} -``` - -Use `self.http_client` everywhere instead of creating new clients per request. - -## Priority 4: Fix Clippy Warnings - -Run on dev server: -```bash -ssh archipelago@192.168.1.228 "cd ~/archy && cargo clippy --all-targets --all-features 2>&1" -``` - -Known warnings to fix: -- `should_implement_trait`: Implement `FromStr` for `AppManifest` -- `get_first` → `.first()` -- `assign_op_pattern` → use `+=` -- `wildcard_in_or_patterns` → remove redundant `_` -- `redundant_field_names` → shorthand -- `very_complex_type` → type alias -- `if_else_collapse` → simplify - -## Priority 5: Replace println with tracing - -```bash -ssh archipelago@192.168.1.228 "cd ~/archy && grep -rn 'println!\|eprintln!' core/ --include='*.rs' | grep -v test | grep -v target/" -``` - -Replace: -- `println!("...")` → `tracing::info!("...")` -- `eprintln!("...")` → `tracing::warn!("...")` - -## Priority 6: Remove Dead Code - -- Remove `#[allow(dead_code)]` annotations, verify if types are actually used -- Remove unused fields (e.g., `identity_dir` in NodeIdentity) -- Remove unused methods (e.g., `verify()`, `did_key()` in NodeIdentity) - -## Verification - -```bash -ssh archipelago@192.168.1.228 "cd ~/archy && cargo clippy --all-targets --all-features 2>&1 | grep -c 'warning'" -# Should be 0 - -ssh archipelago@192.168.1.228 "cd ~/archy && grep -rn 'unwrap()\|\.expect(' core/archipelago/src/ --include='*.rs' | grep -v test | grep -v '_test.rs' | wc -l" -# Should be 0 (or near-zero with justified exceptions) - -ssh archipelago@192.168.1.228 "cd ~/archy && grep -rn 'println!\|eprintln!' core/ --include='*.rs' | grep -v test | grep -v target/ | wc -l" -# Should be 0 -``` - -## Build & Deploy - -All Rust changes MUST be built on the dev server, never macOS: -```bash -./scripts/deploy-to-target.sh --live -``` - -After deploy, verify: -```bash -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 "systemctl status archipelago && curl -s http://localhost:5678/health" -``` diff --git a/.claude/skills/polish-deploy/SKILL.md b/.claude/skills/polish-deploy/SKILL.md deleted file mode 100644 index 896ba4bb..00000000 --- a/.claude/skills/polish-deploy/SKILL.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -name: polish-deploy -description: Harden Archipelago deployment pipeline with rollback capability, pre-deploy checks, post-deploy health verification, and deployment locking. Use when user says "polish deploy", "harden deployment", "add rollback", or "deploy safety". ---- - -# Skill: Polish Deployment Pipeline - -Harden deploy-to-target.sh with rollback capability, pre-deploy checks, post-deploy health verification, and deployment locking. - -## 1. Pre-Deploy Checks - -Add to the beginning of deploy-to-target.sh: - -```bash -pre_deploy_checks() { - echo "Running pre-deploy checks..." - - # SSH key exists - if [ ! -f "$SSH_KEY" ]; then - echo "ERROR: SSH key not found at $SSH_KEY" - exit 1 - fi - - # Target reachable - ssh $SSH_OPTS "$TARGET_HOST" "echo ok" >/dev/null 2>&1 || { - echo "ERROR: Cannot reach $TARGET_HOST" - exit 1 - } - - # Disk space (need 2GB free) - local free_kb=$(ssh $SSH_OPTS "$TARGET_HOST" "df /home | tail -1 | awk '{print \$4}'") - if [ "$free_kb" -lt 2097152 ]; then - echo "ERROR: Need 2GB free disk space, have $(( free_kb / 1024 ))MB" - exit 1 - fi - - echo "Pre-deploy checks passed" -} -``` - -## 2. Backup Before Deploy - -Before overwriting binary or frontend: - -```bash -backup_current() { - echo "Backing up current deployment..." - ssh $SSH_OPTS "$TARGET_HOST" " - # Backup binary - if [ -f /usr/local/bin/archipelago ]; then - sudo cp /usr/local/bin/archipelago /usr/local/bin/archipelago.backup - fi - # Backup frontend - if [ -d /opt/archipelago/web-ui ]; then - sudo cp -a /opt/archipelago/web-ui /opt/archipelago/web-ui.backup - fi - # Backup nginx config - if [ -f /etc/nginx/sites-available/archipelago ]; then - sudo cp /etc/nginx/sites-available/archipelago /etc/nginx/sites-available/archipelago.backup - fi - " - echo "Backup complete" -} -``` - -## 3. Post-Deploy Health Check - -After restarting services: - -```bash -health_check() { - echo "Running post-deploy health check..." - local max_attempts=15 - local attempt=0 - - while [ $attempt -lt $max_attempts ]; do - attempt=$((attempt + 1)) - local status=$(ssh $SSH_OPTS "$TARGET_HOST" "curl -s -o /dev/null -w '%{http_code}' http://localhost:5678/health" 2>/dev/null) - if [ "$status" = "200" ]; then - echo "Health check passed (attempt $attempt)" - return 0 - fi - echo "Health check attempt $attempt/$max_attempts (status: $status)" - sleep 2 - done - - echo "ERROR: Health check failed after $max_attempts attempts" - return 1 -} -``` - -## 4. Rollback on Failure - -If health check fails: - -```bash -rollback() { - echo "ROLLING BACK deployment..." - ssh $SSH_OPTS "$TARGET_HOST" " - # Restore binary - if [ -f /usr/local/bin/archipelago.backup ]; then - sudo cp /usr/local/bin/archipelago.backup /usr/local/bin/archipelago - fi - # Restore frontend - if [ -d /opt/archipelago/web-ui.backup ]; then - sudo rm -rf /opt/archipelago/web-ui - sudo mv /opt/archipelago/web-ui.backup /opt/archipelago/web-ui - fi - # Restore nginx - if [ -f /etc/nginx/sites-available/archipelago.backup ]; then - sudo cp /etc/nginx/sites-available/archipelago.backup /etc/nginx/sites-available/archipelago - sudo nginx -t && sudo systemctl reload nginx - fi - # Restart with old binary - sudo systemctl restart archipelago - " - echo "Rollback complete. Previous version restored." -} -``` - -## 5. Deployment Lock - -Prevent concurrent deploys: - -```bash -LOCK_FILE="/tmp/archipelago-deploy.lock" - -acquire_lock() { - exec 9>"$LOCK_FILE" - flock -n 9 || { - echo "ERROR: Another deployment is in progress" - exit 1 - } - trap "flock -u 9; rm -f $LOCK_FILE" EXIT -} -``` - -## 6. Nginx Config Validation - -Before reloading nginx: - -```bash -validate_nginx() { - ssh $SSH_OPTS "$TARGET_HOST" "sudo nginx -t" 2>&1 || { - echo "ERROR: Nginx config invalid. Restoring backup..." - ssh $SSH_OPTS "$TARGET_HOST" " - sudo cp /etc/nginx/sites-available/archipelago.backup /etc/nginx/sites-available/archipelago - sudo nginx -t && sudo systemctl reload nginx - " - return 1 - } -} -``` - -## Integration - -The deploy flow becomes: -1. `acquire_lock` -2. `pre_deploy_checks` -3. `backup_current` -4. Build + deploy (existing logic) -5. `validate_nginx` -6. Restart services -7. `health_check || rollback` - -## Verification - -Test the rollback: -1. Deploy a working version -2. Intentionally break the binary (e.g., truncate it) -3. Deploy the broken version -4. Verify rollback triggers and previous version is restored -5. Verify service is healthy after rollback - -## Deploy - -```bash -./scripts/deploy-to-target.sh --live -``` - -After modifying the deploy script itself, test with a known-good deploy first. diff --git a/.claude/skills/polish-errors/SKILL.md b/.claude/skills/polish-errors/SKILL.md deleted file mode 100644 index 85ac5a81..00000000 --- a/.claude/skills/polish-errors/SKILL.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -name: polish-errors -description: Fix silent error handling across Archipelago codebase. Replaces empty catch blocks, adds user-visible error feedback for all async operations. Use when user says "polish errors", "fix error handling", "silent catches", or "error feedback". ---- - -# Skill: Polish Error Handling - -Fix silent error handling patterns across the entire codebase. Every async operation must have visible, actionable error feedback for the user. - -## What to Fix - -### Frontend (neode-ui/src/) - -1. **Silent catch blocks**: Find and replace all `.catch(() => {})` patterns - - Search: `grep -rn "catch.*=>.*{}" --include="*.vue" --include="*.ts" src/` - - Replace with: proper error logging + user-visible feedback (toast, inline error, or modal) - - Pattern: - ```typescript - .catch((err) => { - console.error('[ComponentName] operation failed:', err) - errorMessage.value = formatError(err) - }) - ``` - -2. **Unhandled router.push**: Find `router.push(...).catch(() => {})` - - Replace with: `router.push(...).catch(console.error)` minimum - - Or handle NavigationDuplicated gracefully - -3. **Silent try/catch**: Find `try { ... } catch { /* empty */ }` - - Every catch block must either: log the error, show user feedback, or explicitly comment why it's safe to ignore - -4. **Missing error states**: For each view, verify: - - `ref` error variable exists - - Error is displayed in template (inline message, not just console) - - Error clears on retry or navigation - -### Backend (core/) - -5. **Silent error swallowing**: Find `unwrap_or_default()` on serialization - - Replace with proper error propagation or logging - - Pattern: `.map_err(|e| anyhow::anyhow!("Serialization failed: {}", e))?` - -6. **Error response consistency**: All RPC errors should use: - - Consistent error codes (not random negative numbers) - - Human-readable messages - - Consistent JSON structure - -## Verification - -After fixes, run: -```bash -# Zero silent catches -grep -rn "catch.*=>.*{}\|catch\s*{" neode-ui/src/ --include="*.vue" --include="*.ts" | grep -v node_modules | grep -v "console\|error\|log\|warn" - -# Zero empty catch blocks -grep -rn "catch.*{$" neode-ui/src/ --include="*.vue" --include="*.ts" -A1 | grep -P "^\d+-\s*\}" -``` - -Both should return zero results. - -## Error Display Pattern - -Use this consistent pattern for user-facing errors: -```typescript -const errorMessage = ref(null) - -async function doAction() { - errorMessage.value = null - try { - await rpcClient.someCall() - } catch (err) { - errorMessage.value = err instanceof Error ? err.message : 'Operation failed' - } -} -``` - -Template: -```vue -

{{ errorMessage }}

-``` - -## Deploy After Fixes - -Always deploy and verify on live server after making changes: -```bash -./scripts/deploy-to-target.sh --live -``` diff --git a/.claude/skills/polish-forms/SKILL.md b/.claude/skills/polish-forms/SKILL.md deleted file mode 100644 index c11a64e5..00000000 --- a/.claude/skills/polish-forms/SKILL.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -name: polish-forms -description: Improve form validation across Archipelago UI with real-time feedback, input sanitization, disabled states during submission, and consistent error messaging. Use when user says "polish forms", "form validation", "input validation", or "fix forms". ---- - -# Skill: Polish Form Validation - -Improve all form inputs to have real-time validation feedback, proper trimming, disabled states during submission, and consistent error messaging. - -## Forms to Polish - -### 1. Login.vue — Password Setup -- Real-time validation as user types (debounced 300ms): - - Length >= 8 chars (show checkmark/X) - - Passwords match (show match indicator) -- Trim input on submit -- Disable submit button while `isSubmitting` -- Clear error on new input - -### 2. Login.vue — TOTP Verification -- `inputmode="numeric"` + `pattern="[0-9]*"` -- Auto-submit when 6 digits entered -- Show session timeout countdown if applicable -- Trim and strip non-numeric characters on paste - -### 3. Settings.vue — Password Change -- Real-time strength validation: - - 12+ characters - - Has uppercase, lowercase, digit, special char - - New password matches confirmation -- Show strength meter (weak/medium/strong) -- Disable button during submission -- Show spinner in button during async operation - -### 4. Any other form inputs found across views - -## Validation Pattern - -```typescript -const password = ref('') -const confirmPassword = ref('') -const isSubmitting = ref(false) - -const passwordErrors = computed(() => { - const errors: string[] = [] - if (password.value.length > 0 && password.value.length < 8) - errors.push('Must be at least 8 characters') - return errors -}) - -const passwordsMatch = computed(() => - confirmPassword.value.length > 0 && password.value === confirmPassword.value -) - -async function submit() { - if (isSubmitting.value) return - isSubmitting.value = true - try { - await rpcClient.call(...) - } catch (err) { - errorMessage.value = formatError(err) - } finally { - isSubmitting.value = false - } -} -``` - -## Template Pattern - -```vue - -
    -
  • {{ err }}
  • -
- - -``` - -## Input Trimming - -All text inputs should be trimmed before submission: -```typescript -const trimmed = password.value.trim() -``` - -## Error Message Consistency - -Create or use a `formatError` utility: -```typescript -function formatError(err: unknown): string { - if (err instanceof Error) { - if (err.message.includes('fetch') || err.message.includes('network')) - return 'Unable to reach server. Check your connection.' - if (err.message.includes('401') || err.message.includes('unauthorized')) - return 'Session expired. Please log in again.' - return err.message - } - return 'Something went wrong. Please try again.' -} -``` - -## Verification - -For each form: -- [ ] Real-time validation shows feedback as user types -- [ ] Submit button disabled during operation -- [ ] Submit button disabled when validation fails -- [ ] Inputs trimmed before submission -- [ ] Error messages are user-friendly (no raw error strings) -- [ ] Success feedback shown after completion - -## Deploy After Fixes - -```bash -./scripts/deploy-to-target.sh --live -``` - -Test each form with: valid input, invalid input, empty input, whitespace-only input, rapid double-click on submit. diff --git a/.claude/skills/polish-loading/SKILL.md b/.claude/skills/polish-loading/SKILL.md deleted file mode 100644 index 04db16c8..00000000 --- a/.claude/skills/polish-loading/SKILL.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -name: polish-loading -description: Add skeleton loaders, loading indicators, timeout warnings, and empty states to all Archipelago async views. Use when user says "polish loading", "add skeletons", "loading states", "empty states", or "blank screen fix". ---- - -# Skill: Polish Loading States - -Add skeleton loaders, loading indicators, timeout warnings, and empty states to all async views. No view should ever show a blank screen. - -## Skeleton Loader Component - -Create or use a `SkeletonLoader.vue` component with the glassmorphism style: -- Background: `bg-white/5` with shimmer animation -- Rounded corners matching the card it replaces -- Animate with CSS `@keyframes shimmer` (translate gradient left to right) -- Must use global classes from style.css, not inline Tailwind - -## Views to Fix - -For EACH view in `neode-ui/src/views/`, verify these states exist: - -### 1. Loading State -- Show skeleton placeholders immediately on mount -- Pattern: - ```vue - - ``` - -### 2. Empty State -- When data loads but is empty (zero items) -- Show helpful message with CTA -- Pattern: - ```vue -
-

No apps installed yet

- Browse Marketplace -
- ``` - -### 3. Timeout Warning -- After 15 seconds of loading, show "Taking longer than expected..." -- After 30 seconds, show troubleshooting options -- Pattern: - ```typescript - const loadingTooLong = ref(false) - let timeout: ReturnType - - onMounted(() => { - timeout = setTimeout(() => { loadingTooLong.value = true }, 15000) - }) - - watch(isLoading, (val) => { if (!val) clearTimeout(timeout) }) - ``` - -## Priority Views (must have all 3 states) - -1. **Apps.vue** — app grid skeleton, "No apps installed" empty state -2. **AppDetails.vue** — detail card skeleton, loading indicator -3. **Marketplace.vue** — app card grid skeleton, "Loading apps..." with timeout -4. **Dashboard.vue** — metric card skeletons -5. **Cloud.vue** — file list skeleton, "No files" empty state -6. **Settings.vue** — settings section skeleton -7. **Server.vue** — server info skeleton - -## Verification - -For each view, confirm: -- [ ] `isLoading` ref exists and is set properly -- [ ] Template has `v-if="isLoading"` skeleton section -- [ ] Template has empty state for zero-data case -- [ ] Loading timeout warning after 15s -- [ ] Skeleton uses global classes, not inline Tailwind - -## Deploy After Fixes - -Always deploy and verify on live server: -```bash -./scripts/deploy-to-target.sh --live -``` - -Test by throttling network in browser DevTools to observe loading states. diff --git a/.claude/skills/polish-security/SKILL.md b/.claude/skills/polish-security/SKILL.md deleted file mode 100644 index b7a4c4ff..00000000 --- a/.claude/skills/polish-security/SKILL.md +++ /dev/null @@ -1,162 +0,0 @@ ---- -name: polish-security -description: Security hardening for Archipelago systemd services, nginx headers, secrets management, and rate limiting. Use when user says "polish security", "harden services", "security headers", "rate limiting", or "secrets management". ---- - -# Skill: Polish Security - -Security hardening pass for systemd, nginx, secrets management, and rate limiting. - -## 1. Systemd Service Hardening - -File: `image-recipe/configs/archipelago.service` - -Add these directives to the `[Service]` section: -```ini -PrivateTmp=yes -NoNewPrivileges=true -ProtectSystem=strict -ProtectHome=yes -ReadWritePaths=/var/lib/archipelago -SystemCallFilter=@system-service -SystemCallFilter=~@privileged @resources -``` - -After editing, sync to server and verify: -```bash -ssh archipelago@192.168.1.228 "sudo systemd-analyze security archipelago" -``` - -## 2. Nginx Security Headers - -File: `image-recipe/configs/nginx-archipelago.conf` - -### Add HSTS (HTTPS block only): -```nginx -add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; -``` - -### Fix CSP (remove unsafe-inline): -Replace: -```nginx -add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; connect-src 'self' ws: wss:; frame-src 'self' http://localhost:* http://192.168.*:*;" always; -``` - -With CSP that uses nonces or hashes for inline scripts/styles. If inline scripts can't be removed yet, document which ones and plan their removal. - -### Add rate limiting zones: -```nginx -# In http block: -limit_req_zone $binary_remote_addr zone=api:10m rate=30r/s; -limit_req_zone $binary_remote_addr zone=auth:10m rate=5r/m; - -# On login/auth endpoints: -limit_req zone=auth burst=3 nodelay; - -# On API endpoints: -limit_req zone=api burst=50 nodelay; -``` - -### Custom log format (strip tokens): -```nginx -log_format no_tokens '$remote_addr - $remote_user [$time_local] "$request_method $uri $server_protocol" $status $body_bytes_sent "$http_referer"'; -access_log /var/log/nginx/archipelago_access.log no_tokens; -``` - -## 3. Secrets Management - -### Remove hardcoded RPC credentials from scripts -File: `scripts/deploy-to-target.sh` - -Replace: -```bash --e CORE_RPC_USERNAME=archipelago -e CORE_RPC_PASSWORD=archipelago123 -``` - -With: -```bash --e CORE_RPC_USERNAME=archipelago -e CORE_RPC_PASSWORD=$(cat /var/lib/archipelago/secrets/bitcoin-rpc-pass) -``` - -### Generate secrets on first boot -File: `scripts/first-boot-containers.sh` - -Add at the top: -```bash -SECRETS_DIR="/var/lib/archipelago/secrets" -mkdir -p "$SECRETS_DIR" -chmod 700 "$SECRETS_DIR" - -# Generate Bitcoin RPC password if not exists -if [ ! -f "$SECRETS_DIR/bitcoin-rpc-pass" ]; then - openssl rand -base64 24 > "$SECRETS_DIR/bitcoin-rpc-pass" - chmod 600 "$SECRETS_DIR/bitcoin-rpc-pass" -fi -``` - -### Remove hardcoded credentials from Rust backend -File: `core/archipelago/src/api/rpc/bitcoin.rs` - -Replace: -```rust -.basic_auth("archipelago", Some("archipelago123")) -``` - -With: -```rust -let rpc_user = std::env::var("ARCHIPELAGO_BITCOIN_RPC_USER").unwrap_or_else(|_| "archipelago".into()); -let rpc_pass = std::env::var("ARCHIPELAGO_BITCOIN_RPC_PASS").unwrap_or_else(|_| "archipelago123".into()); -.basic_auth(&rpc_user, Some(&rpc_pass)) -``` - -## 4. Rate Limiting on Backend - -File: `core/archipelago/src/api/handler.rs` - -Add rate limiting to unauthenticated endpoints: -- `/archipelago/node-message` — 10 req/min per IP -- `/electrs-status` — 30 req/min per IP - -Use an in-memory `HashMap` with cleanup on access. - -## 5. SSH Hardening - -File: `scripts/deploy-to-target.sh` - -Replace: -```bash -SSH_OPTS="-o StrictHostKeyChecking=no" -``` - -With: -```bash -SSH_OPTS="-o StrictHostKeyChecking=accept-new" -``` - -And add SSH key validation: -```bash -if [ ! -f "$SSH_KEY" ]; then - echo "ERROR: SSH key not found at $SSH_KEY" - exit 1 -fi -``` - -## Verification Checklist - -- [ ] `systemd-analyze security archipelago` score < 5.0 (lower is better) -- [ ] Nginx headers pass: `curl -I http://192.168.1.228 | grep -i 'strict-transport\|content-security\|x-frame'` -- [ ] No hardcoded passwords in scripts: `grep -rn 'archipelago123' scripts/ core/` -- [ ] Rate limiting works: rapid-fire requests get 429 -- [ ] SSH key required (no password fallback) - -## Deploy - -After changes, sync configs and deploy: -```bash -./scripts/deploy-to-target.sh --live -``` - -Then sync to ISO recipe: -```bash -# Run /sync-configs skill -``` diff --git a/.claude/skills/polish-websocket/SKILL.md b/.claude/skills/polish-websocket/SKILL.md deleted file mode 100644 index 424b7b7b..00000000 --- a/.claude/skills/polish-websocket/SKILL.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -name: polish-websocket -description: Improve Archipelago WebSocket reliability, reconnection UX, heartbeat monitoring, session timeout detection, and connection status indicators. Use when user says "polish websocket", "fix reconnection", "websocket reliability", or "connection status". ---- - -# Skill: Polish WebSocket & Real-Time - -Improve WebSocket reliability, reconnection UX, heartbeat, session timeout detection, and connection status indicators. - -## 1. Connection Status Indicator - -### Create or update connection status display -- **Location**: App.vue header or create ConnectionStatus.vue component -- **States**: Connected (green), Reconnecting (amber pulse), Disconnected (red) -- **Data source**: `wsClient.isConnected()` from websocket.ts -- **Style**: Use existing design tokens, small dot + text in header area - -```vue -
-
- - {{ isConnected ? '' : isReconnecting ? 'Reconnecting...' : 'Offline' }} - -
-``` - -### Fix OnlineStatusPill.vue -- Connect to actual WebSocket state instead of hardcoded "Online" -- Use the app store's connection state - -## 2. Reconnection UX - -### Don't silently give up -File: `api/websocket.ts` - -After max reconnect attempts (currently 10), instead of silently stopping: -- Set a `permanentlyDisconnected` flag -- Emit event that App.vue listens to -- Show persistent banner: "Connection lost. Click to retry." or "Refresh page to reconnect." - -```typescript -if (this.reconnectAttempts >= this.maxReconnectAttempts) { - this.shouldReconnect = false - this.notifyConnectionState(false) - // Emit permanent disconnect event - this.onPermanentDisconnect?.() -} -``` - -### Allow manual reconnect -Add a `forceReconnect()` method that resets attempt counter and tries again: -```typescript -forceReconnect() { - this.reconnectAttempts = 0 - this.shouldReconnect = true - this.connect() -} -``` - -## 3. Heartbeat Improvement - -File: `api/websocket.ts` - -Current: 60-second stale detection (passive). -Target: 30-second active ping with 5-second pong timeout. - -```typescript -private startHeartbeat() { - this.heartbeatInterval = setInterval(() => { - if (this.ws?.readyState === WebSocket.OPEN) { - this.ws.send(JSON.stringify({ type: 'ping' })) - this.pongTimeout = setTimeout(() => { - // No pong received — connection is dead - this.ws?.close() - this.handleReconnect() - }, 5000) - } - }, 30000) -} - -// In message handler: -if (data.type === 'pong') { - clearTimeout(this.pongTimeout) - return -} -``` - -Note: Backend must respond to `ping` with `pong`. Check handler.rs WebSocket handler. - -## 4. Session Timeout Detection - -File: `api/rpc-client.ts` - -When RPC returns 401 or 403: -```typescript -if (response.status === 401 || response.status === 403) { - // Session expired — redirect to login - window.location.href = '/login' - return -} -``` - -This should be in the base `call()` method so it applies to all RPC calls. - -## 5. Fix Race Condition on Reconnect - -File: `stores/app.ts` or `api/websocket.ts` - -Problem: `isWsSubscribed` flag doesn't prevent duplicate listeners on rapid reconnect. - -Fix: Use listener deduplication: -```typescript -private listeners = new Map>() - -subscribe(event: string, callback: Function) { - if (!this.listeners.has(event)) { - this.listeners.set(event, new Set()) - } - this.listeners.get(event)!.add(callback) -} -``` - -Or simpler: remove all listeners before reconnect, then re-add: -```typescript -onReconnect() { - // Clear old subscriptions - this.removeAllListeners() - // Re-subscribe - this.setupSubscriptions() -} -``` - -## 6. Message Queuing During Disconnect - -When WebSocket is down, queue subscription requests: -```typescript -private pendingSubscriptions: Array<() => void> = [] - -subscribe(event: string, callback: Function) { - if (this.ws?.readyState !== WebSocket.OPEN) { - this.pendingSubscriptions.push(() => this.subscribe(event, callback)) - return - } - // Normal subscribe logic -} - -onReconnected() { - // Replay pending subscriptions - const pending = [...this.pendingSubscriptions] - this.pendingSubscriptions = [] - pending.forEach(fn => fn()) -} -``` - -## Verification - -1. **Kill backend** → frontend shows "Disconnected" → restart backend → frontend reconnects and shows "Connected" -2. **Toggle wifi** → status indicator updates → wifi back → auto-reconnect -3. **Wait for session timeout** → next RPC call redirects to login -4. **Rapid reconnect** → no duplicate event handlers (check with DevTools) -5. **Leave tab in background** → come back → status is accurate - -## Deploy - -```bash -./scripts/deploy-to-target.sh --live -``` - -Test with browser DevTools Network tab to observe WebSocket frames. diff --git a/.claude/skills/polish/SKILL.md b/.claude/skills/polish/SKILL.md deleted file mode 100644 index 2102075c..00000000 --- a/.claude/skills/polish/SKILL.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -name: polish -description: Production polish orchestrator for Archipelago. Coordinates all polish sub-skills by reading plan.md and executing the current week's tasks. Use when user says "polish", "production polish", "overnight polish", or "run the polish plan". ---- - -# Skill: Production Polish (Overnight Orchestrator) - -Main entry point for the Archipelago production polish plan. Reads `plan.md` at the project root, determines the current week based on today's date, and executes the tasks for that week. - -## How It Works - -1. Read `plan.md` from the project root -2. Determine the current week from the schedule: - - Week 1: March 10–16 — Silent Failures & Error Handling - - Week 2: March 17–23 — Loading States & Visual Feedback - - Week 3: March 24–30 — Form Validation & Input Quality - - Week 4: March 31 – April 6 — Backend Robustness - - Week 5: April 7–13 — WebSocket & Real-Time Quality - - Week 6: April 14–20 — Deployment & Infrastructure Hardening - - Week 7: April 21–27 — Accessibility, Polish & Edge Cases - - Week 8: April 28 – May 4 — Integration Testing, Final Sweep & ISO -3. Execute tasks for the current week, in order -4. After completing tasks, run `/sweep` to verify -5. Deploy and verify with `/deploy` then `/check-server` - -## Execution Flow - -### Step 1: Read the plan -``` -Read plan.md and find the current week's section -``` - -### Step 2: Check what's already done -Run the verification checks for the current week's tasks. For example in Week 1: -- Count remaining `.catch(() => {})` patterns -- Count remaining `console.log` outside dev guards -- Count remaining `unwrap()` in backend production code -- Check if hardcoded credentials still exist - -### Step 3: Work on the next incomplete task -Pick the first task in the current week that still has violations (hasn't met its acceptance criteria). Fix violations one file at a time: -1. Read the file -2. Apply the fix following the pattern described in the task -3. Verify the fix compiles/type-checks -4. Move to the next violation - -### Step 4: Verify after each batch of fixes -After fixing all violations for a task: -- Frontend: `cd neode-ui && npx vue-tsc --noEmit` -- Backend: `ssh archipelago@192.168.1.228 "cd ~/archy && cargo check"` -- Run the task's specific acceptance grep/check - -### Step 5: Deploy when a task is complete -When all violations for a task are fixed and verified: -```bash -./scripts/deploy-to-target.sh --live -``` -Then verify: -```bash -ssh -i ~/.ssh/archipelago-deploy archipelago@192.168.1.228 "systemctl is-active archipelago && curl -s http://localhost:5678/health" -``` - -### Step 6: Move to the next task -Repeat Steps 3-5 for the next incomplete task in the current week. - -### Step 7: When all tasks are done -Run `/sweep` for a full quality report. If clean, the week is complete. - -## Rules - -- **Never change functionality** — only improve quality of existing code -- **Never change the design** — use existing glassmorphism classes, color tokens, and layout patterns -- **Always deploy after changes** — don't leave undeployed code -- **Always verify after deploy** — check server health -- **Build Rust on the dev server** — never compile Rust on macOS -- **Commit after each completed task** — atomic commits with `fix:` or `refactor:` prefix -- **If something breaks, revert** — don't push forward with broken code - -## Arguments - -If `$ARGUMENTS` is provided: -- `week N` — Force execution of week N regardless of date -- `task N.M` — Execute only task N.M (e.g., `task 1.3`) -- `status` — Show completion status for all weeks without executing -- `sweep` — Run sweep only, no fixes - -## Example Usage - -``` -/polish # Auto-detect week, work on next incomplete task -/polish week 1 # Force Week 1 tasks -/polish task 1.3 # Work on just task 1.3 -/polish status # Show what's done and what's left -/polish sweep # Just run the quality sweep -``` - -## For Overnight TUI - -Launch with: -``` -/loop 30m /polish -``` - -Each 30-minute cycle: -1. Checks current week -2. Finds next incomplete task -3. Fixes as many violations as possible in the time available -4. Deploys and verifies -5. Reports progress diff --git a/.claude/skills/polish/references/backend.md b/.claude/skills/polish/references/backend.md deleted file mode 100644 index 2baf63b0..00000000 --- a/.claude/skills/polish/references/backend.md +++ /dev/null @@ -1,27 +0,0 @@ -# Polish: Backend Quality - -All changes built on dev server, not macOS: `./scripts/deploy-to-target.sh --live` - -## Priority 1: Eliminate panics -```bash -ssh archipelago@192.168.1.228 "grep -rn 'unwrap()\|\.expect(' ~/archy/core/archipelago/src/ --include='*.rs' | grep -v test | grep -v '_test.rs'" -``` -Replace with `?` + `.context()` or `.map_err()`. - -## Priority 2: Add timeouts -- Container ops: `tokio::time::timeout(Duration::from_secs(30), op).await` -- HTTP/RPC calls: `reqwest::Client::builder().timeout(Duration::from_secs(10))` - -## Priority 3: Connection pooling -Store reusable `reqwest::Client` in RpcHandler instead of creating per-request. - -## Priority 4: Clippy -```bash -ssh archipelago@192.168.1.228 "cd ~/archy && cargo clippy --all-targets --all-features 2>&1" -``` - -## Priority 5: Replace println with tracing -`println!` → `tracing::info!`, `eprintln!` → `tracing::warn!` - -## Verify -Zero clippy warnings, zero unwrap/expect in prod code, zero println. diff --git a/.claude/skills/polish/references/deploy.md b/.claude/skills/polish/references/deploy.md deleted file mode 100644 index 9ca6c1ce..00000000 --- a/.claude/skills/polish/references/deploy.md +++ /dev/null @@ -1,26 +0,0 @@ -# Polish: Deployment Pipeline - -## Pre-Deploy Checks -Add to deploy-to-target.sh: SSH key exists, target reachable, 2GB free disk space. - -## Backup Before Deploy -```bash -sudo cp /usr/local/bin/archipelago /usr/local/bin/archipelago.backup -sudo cp -a /opt/archipelago/web-ui /opt/archipelago/web-ui.backup -sudo cp /etc/nginx/sites-available/archipelago /etc/nginx/sites-available/archipelago.backup -``` - -## Health Check After Deploy -Loop up to 15 attempts, 2s apart, checking `curl http://localhost:5678/health` returns 200. - -## Rollback on Failure -If health check fails: restore binary, frontend, nginx from .backup files, restart services. - -## Deployment Lock -Use `flock` on `/tmp/archipelago-deploy.lock` to prevent concurrent deploys. - -## Nginx Validation -Always `sudo nginx -t` before reload. If invalid, restore backup config. - -## Integration Flow -1. acquire_lock → 2. pre_deploy_checks → 3. backup_current → 4. build + deploy → 5. validate_nginx → 6. restart services → 7. health_check || rollback diff --git a/.claude/skills/polish/references/errors.md b/.claude/skills/polish/references/errors.md deleted file mode 100644 index d1a9269f..00000000 --- a/.claude/skills/polish/references/errors.md +++ /dev/null @@ -1,23 +0,0 @@ -# Polish: Error Handling - -## Find -- Silent catches: `grep -rn "catch.*=>.*{}" --include="*.vue" --include="*.ts" src/` -- Empty try/catch: `grep -rn "catch.*{$" -A1` looking for immediate `}` -- Missing error states in views: check each view has `errorMessage` ref - -## Fix Pattern -```typescript -.catch((err) => { - console.error('[ComponentName] operation failed:', err) - errorMessage.value = err instanceof Error ? err.message : 'Operation failed' -}) -``` - -Template: `

{{ errorMessage }}

` - -## Backend -- Replace `unwrap_or_default()` on serialization with proper error propagation -- Consistent RPC error structure: `{ error: { code: string, message: string } }` - -## Verify -Both should return zero: silent catches and empty catch blocks. diff --git a/.claude/skills/polish/references/forms.md b/.claude/skills/polish/references/forms.md deleted file mode 100644 index e2d1d9fe..00000000 --- a/.claude/skills/polish/references/forms.md +++ /dev/null @@ -1,30 +0,0 @@ -# Polish: Form Validation - -## Pattern -```typescript -const isSubmitting = ref(false) -const passwordErrors = computed(() => { - const errors: string[] = [] - if (password.value.length > 0 && password.value.length < 8) - errors.push('Must be at least 8 characters') - return errors -}) - -async function submit() { - if (isSubmitting.value) return - isSubmitting.value = true - try { await rpcClient.call(...) } - catch (err) { errorMessage.value = formatError(err) } - finally { isSubmitting.value = false } -} -``` - -## Checklist per form -- Real-time validation as user types (debounced 300ms) -- Submit button disabled during operation and when validation fails -- All text inputs trimmed before submission -- Error messages are user-friendly (no raw error strings) -- TOTP: `inputmode="numeric"`, auto-submit at 6 digits - -## Forms to polish -Login.vue (password setup, TOTP), Settings.vue (password change), any other form inputs. diff --git a/.claude/skills/polish/references/loading.md b/.claude/skills/polish/references/loading.md deleted file mode 100644 index 39f3c0a1..00000000 --- a/.claude/skills/polish/references/loading.md +++ /dev/null @@ -1,26 +0,0 @@ -# Polish: Loading States - -Every async view needs 3 states: loading skeleton, empty state, timeout warning. - -## Skeleton Pattern -```vue -
-
-

No items yet

-
-
-``` - -## Timeout Warning -After 15s show "Taking longer than expected...", after 30s show troubleshooting. -```typescript -const loadingTooLong = ref(false) -const timeout = setTimeout(() => { loadingTooLong.value = true }, 15000) -watch(isLoading, (val) => { if (!val) clearTimeout(timeout) }) -``` - -## Priority Views -Apps.vue, AppDetails.vue, Marketplace.vue, Dashboard.vue, Cloud.vue, Settings.vue, Server.vue - -## Verify -Each view has: `isLoading` ref, skeleton section, empty state, timeout warning. Use global classes only. diff --git a/.claude/skills/polish/references/security.md b/.claude/skills/polish/references/security.md deleted file mode 100644 index 71a05300..00000000 --- a/.claude/skills/polish/references/security.md +++ /dev/null @@ -1,22 +0,0 @@ -# Polish: Security Hardening - -## 1. Systemd Service -Add to `image-recipe/configs/archipelago.service`: -`NoNewPrivileges=true`, `ProtectSystem=strict`, `ReadWritePaths=/var/lib/archipelago` -Verify: `ssh ... "sudo systemd-analyze security archipelago"` — score < 5.0 - -## 2. Nginx Headers -- HSTS (HTTPS only): `add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;` -- Rate limiting zones: `limit_req_zone $binary_remote_addr zone=auth:10m rate=5r/m;` -- Custom log format stripping tokens - -## 3. Secrets Management -Replace hardcoded `archipelago123` with generated secrets: -- Generate on first boot: `openssl rand -base64 24 > /var/lib/archipelago/secrets/bitcoin-rpc-pass` -- Backend reads from env var: `std::env::var("ARCHIPELAGO_BITCOIN_RPC_PASS")` - -## 4. SSH Hardening -Replace `StrictHostKeyChecking=no` with `StrictHostKeyChecking=accept-new` in deploy script. - -## Verify -`grep -rn 'archipelago123' scripts/ core/` should return zero. Nginx headers pass curl check. Rate limiting returns 429 on rapid auth requests. diff --git a/.claude/skills/polish/references/websocket.md b/.claude/skills/polish/references/websocket.md deleted file mode 100644 index ad8aeb7e..00000000 --- a/.claude/skills/polish/references/websocket.md +++ /dev/null @@ -1,25 +0,0 @@ -# Polish: WebSocket & Real-Time - -## 1. Connection Status Indicator -Add to App.vue header: green dot (connected), amber pulse (reconnecting), red (disconnected). -Connect to actual WebSocket state from websocket.ts. - -## 2. Reconnection UX -After max reconnect attempts, show persistent banner "Connection lost. Click to retry." -Add `forceReconnect()` method that resets attempt counter. - -## 3. Heartbeat -Active ping every 30s with 5s pong timeout (replace passive 60s stale detection). -Backend must respond to `ping` with `pong` — check handler.rs. - -## 4. Session Timeout -In rpc-client.ts base `call()`: on 401/403 response, redirect to `/login`. - -## 5. Race Condition Fix -Use listener deduplication (Set) or remove-all-then-resubscribe on reconnect. - -## 6. Message Queuing -Queue subscription requests while disconnected, replay on reconnect. - -## Verify -Kill backend → shows "Disconnected" → restart → auto-reconnects. Toggle wifi → status updates. Session timeout → redirects to login. diff --git a/.claude/skills/refactor/SKILL.md b/.claude/skills/refactor/SKILL.md deleted file mode 100644 index 8c0e036e..00000000 --- a/.claude/skills/refactor/SKILL.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -name: refactor -description: Refactor code for quality, maintainability, and adherence to project standards -disable-model-invocation: true -allowed-tools: Read, Edit, Write, Glob, Grep, Bash -argument-hint: "[file-or-area]" ---- - -Refactor the specified code ($ARGUMENTS) following Archipelago coding standards. - -## Checklist - -### Rust Backend -- [ ] No `unwrap()` or `expect()` — use `?` operator with context -- [ ] Replace `#[allow(dead_code)]` — either use it or remove it -- [ ] Functions under 50 lines, single responsibility -- [ ] Custom error types per module with `thiserror` -- [ ] `tracing` for logging — no `println!` or secrets in logs -- [ ] Split files over 500 lines into focused modules -- [ ] Run `cargo clippy --all-targets --all-features` mentally and fix issues - -### Vue Frontend -- [ ] Extract ALL inline Tailwind to global classes in `neode-ui/src/style.css` -- [ ] Use semantic class names: `.glass-card`, `.info-card`, `.glass-button`, `.path-option-card` -- [ ] Replace ALL `.gradient-button` with `.glass-button` (gradient buttons are BANNED) -- [ ] Replace ALL `.gradient-card` / `.gradient-card-dark` with `.glass-card` or `.path-option-card` -- [ ] Settings.vue is the gold standard — all screens should match its patterns -- [ ] Replace `any` types with proper interfaces or `unknown` -- [ ] Ensure `