diff --git a/core/archipelago/src/api/handler/mod.rs b/core/archipelago/src/api/handler/mod.rs index bd8175dc..4c85d3c4 100644 --- a/core/archipelago/src/api/handler/mod.rs +++ b/core/archipelago/src/api/handler/mod.rs @@ -202,6 +202,27 @@ impl ApiHandler { .unwrap() } + /// A 401 that still carries CORS headers, for endpoints fetched + /// cross-origin by same-node app UIs (e.g. the LND wallet UI on its own + /// port). Without the ACAO header the browser surfaces an opaque CORS + /// error instead of the 401, so the app can't tell it just needs auth. + /// `origin` is the already-validated reflect value from `app_cors_origin` + /// (empty string when the origin isn't allowed → no CORS header added). + fn unauthorized_cors(origin: &str) -> Response { + let body = serde_json::json!({ "error": "Unauthorized" }); + let body_bytes = serde_json::to_vec(&body).unwrap_or_default(); + let mut builder = Response::builder() + .status(StatusCode::UNAUTHORIZED) + .header("Content-Type", "application/json") + .header("Vary", "Origin"); + if !origin.is_empty() { + builder = builder + .header("Access-Control-Allow-Origin", origin) + .header("Access-Control-Allow-Credentials", "true"); + } + builder.body(hyper::Body::from(body_bytes)).unwrap() + } + /// Allowed CORS origins derived from the config host IP. fn allowed_origins(&self) -> Vec { let mut origins = vec![ @@ -501,12 +522,16 @@ impl ApiHandler { Self::handle_container_logs_http(self.rpc_handler.clone(), path, &origin).await } - // LND proxy — requires session + // LND proxy — requires session. The LND wallet UI calls this + // cross-origin from its own app port, so even the 401 must carry + // CORS headers; otherwise the browser reports a bare CORS failure + // ("No 'Access-Control-Allow-Origin' header") instead of a + // readable 401 the UI can act on. (Method::GET, path) if path.starts_with("/proxy/lnd/") => { - if !self.is_authenticated(&headers).await { - return Ok(Self::unauthorized()); - } let origin = self.app_cors_origin(&headers); + if !self.is_authenticated(&headers).await { + return Ok(Self::unauthorized_cors(&origin)); + } Self::handle_lnd_proxy(self.rpc_handler.clone(), path, &origin).await } diff --git a/core/archipelago/src/bootstrap.rs b/core/archipelago/src/bootstrap.rs index 0701f9ca..2288711f 100644 --- a/core/archipelago/src/bootstrap.rs +++ b/core/archipelago/src/bootstrap.rs @@ -521,6 +521,14 @@ async fn run_nginx() -> Result { Ok(changed) } +/// Reflective CORS add_headers that older configs placed inside the +/// `/lnd-connect-info` location. The backend now sets a validated +/// `Access-Control-Allow-Origin` for that endpoint (api/handler/proxy.rs), so +/// leaving these in nginx emits a DUPLICATE header ("contains multiple values +/// … but only one is allowed") and the LND wallet UI's cross-origin fetch is +/// rejected. Stripped during nginx bootstrap so the backend solely owns CORS. +const NGINX_LND_DUP_CORS: &str = " add_header Access-Control-Allow-Origin $http_origin always;\n add_header Access-Control-Allow-Credentials \"true\" always;\n"; + async fn patch_nginx_conf(path: &str) -> Result { let content = fs::read_to_string(path) .await @@ -528,12 +536,19 @@ async fn patch_nginx_conf(path: &str) -> Result { let missing_app_catalog = !content.contains("location /api/app-catalog"); let missing_bitcoin_status = !content.contains("location /bitcoin-status"); let missing_lnd_proxy = !content.contains("location /proxy/lnd/"); - if !missing_app_catalog && !missing_bitcoin_status && !missing_lnd_proxy { + let has_lnd_dup_cors = content.contains(NGINX_LND_DUP_CORS); + if !missing_app_catalog && !missing_bitcoin_status && !missing_lnd_proxy && !has_lnd_dup_cors { return Ok(false); } let mut patched = content.clone(); + if has_lnd_dup_cors { + // Drop the redundant nginx-side CORS headers so the backend's single + // validated Access-Control-Allow-Origin is the only one returned. + patched = patched.replace(NGINX_LND_DUP_CORS, ""); + } + if missing_lnd_proxy { // Prefer the `/lnd-connect-info` anchor (present since 2026-03-17); fall // back to `/electrs-status` (since 2026-03-08) for even older configs. diff --git a/tests/production-quality/TRACKER.md b/tests/production-quality/TRACKER.md new file mode 100644 index 00000000..7c0ee33a --- /dev/null +++ b/tests/production-quality/TRACKER.md @@ -0,0 +1,126 @@ +# Production-Quality Bug Tracker + +Living tracker for the post-v1.7.96 "no new features until production quality" push. +Updated continuously as we investigate → fix → test → pass. Kept in-repo so progress +survives a session cutoff. + +## Rules (from user, 2026-06-15) +- **No new features** until the OS is production / no-bugs quality. +- **Test-harness-first**: build/extend a harness for each bug before fixing. +- **Validate every fix on `.116` + `.198`** (both 192.168.1.x, pw ThisIsWeb54321@) **+ the harness** BEFORE it goes into any release. (.198 still carries the LND CORS nginx duplicate → good for fix-(a) validation; .116 does not.) +- **Priority order**: cloud/federated-nodes + mesh FIRST, then app-specific, then low-pri. + +## Status legend +`TODO` · `INVESTIGATING` · `ROOT-CAUSED` · `FIXING` · `TESTING` (on .116+harness) · `PASSED` · `SHIPPED` + +## Release status +- **v1.7.96-alpha — SHIPPED** (2026-06-15). Live on vps2 (primary OTA): manifest v1.7.96-alpha, assets HTTP 200, `main@8c3c7954` + tag present. Contents: kiosk grid removal + FIPS TCP/UDP anchor selector. NOTE: gitea-local (localhost) mirror push failed (token rejected → /login); non-blocking, needs refreshed token. +- **v1.7.97-alpha — IN PROGRESS** (this push). Will bundle the verified fixes below. + +--- + +## 🔴🔴 TOP PRIORITY + +### B5 — LND "connect your wallet" details/QR broken fleet-wide — ROOT-CAUSED +Origin: user escalation. Symptom: LND connect screen (served on app port :18083) can't load details/QR. +Two distinct root causes (confirmed live): +- **(a) Duplicate ACAO** on `/lnd-connect-info` (seen on .103): backend sets `Access-Control-Allow-Origin` (proxy.rs:108) AND nginx `add_header` adds a second → browser rejects "multiple values". nginx config drift. Fix: bootstrap.rs nginx patch must strip the redundant `add_header` from the `/lnd-connect-info` location (backend owns CORS). +- **(b) No ACAO on `/proxy/lnd/v1/*` 401** (fleet-wide): the unauth/auth-layer 401 is produced before the CORS-adding proxy handler (proxy.rs:135 `handle_lnd_proxy`). Browser → "No 'Access-Control-Allow-Origin' header". Fix: ensure auth-layer/early-return responses for `/proxy/lnd` + `/lnd-connect-info` carry CORS headers. +- `.116` `/lnd-connect-info` returns a single correct ACAO → symptom varies by node's nginx state. +- Backend CORS helper: handler/mod.rs `app_cors_origin()` (:270) — reflects Origin when its host == request host. +- Backend change → ships in .97. **Status: ✅ PASSED — verified on .116, .198, .103 (harness 4/4 each). Ready to bundle into .97.** +- Caveat: bootstrap's nginx dup-strip runs a few seconds AFTER /health goes green (async patch+reload) — converges within ~1 min of restart; not instant. Acceptable. +- **CODE CHANGES MADE (uncommitted):** + - `core/archipelago/src/bootstrap.rs`: added `NGINX_LND_DUP_CORS` const + strip in `patch_nginx_conf()` (removes the duplicate nginx `add_header` ACAO from `/lnd-connect-info` so the backend's single header wins). Idempotent; runs on startup nginx bootstrap. → fixes (a) + - `core/archipelago/src/api/handler/mod.rs`: new `unauthorized_cors(origin)` helper (:~205) + `/proxy/lnd/` route (:~505) computes origin first and returns `unauthorized_cors` so the 401 carries ACAO. → fixes (b) + - Test on **.116** for (b); test on **.103** for (a) [.116 has no dup to strip]. + - **2026-06-15 RESULT — .116 (fix b): harness 4/4 PASS** (sideloaded built binary, restarted). `/proxy/lnd/v1/*` now returns CORS on the 401. ✅ + - (Correction: an earlier "LND container MISSING" reading was a FALSE alarm — `docker` isn't in the non-interactive PATH; runtime is **podman**. Verified `lnd Up 9h` — containers SURVIVED the restart cleanly.) + - Next: deploy to .103 + run harness to confirm fix (a) (nginx dup strip). +- **Harness:** `tests/production-quality/lnd-cors-test.sh ` — asserts single correct ACAO on /lnd-connect-info + ACAO present on /proxy/lnd/v1/{getinfo,channels}. Baseline (2026-06-15): .116 = 2 pass/2 fail (proxy missing ACAO); .103 = 1 pass/3 fail (connect-info dup + proxy missing). +- **FIX PLAN (precise):** + 1. (b) handler/mod.rs:504-508 `/proxy/lnd/` returns `Self::unauthorized()` (401, NO CORS) when session check fails → browser CORS wall. Add CORS (app_cors_origin) to that 401. Same pattern for any other app-origin early-return. + 2. (a) nginx `/lnd-connect-info` location double-adds ACAO (backend + nginx `add_header`). Strip the nginx `add_header Access-Control-Allow-Origin` there; backend owns CORS. Update bootstrap.rs nginx patch to remove it on existing nodes (idempotent). + - Verify: rebuild backend, deploy to .116, run harness → expect 3/3 (or 4 assertions) PASS on .116 AND .103. + +--- + +## 🔴 PRIORITY — cloud / federation / mesh + +### B1 — Trusted-node list not clean — TODO +Dupes, erroneous names, and non-convergent group membership across nodes. Expected: trusted nodes form a transitive group (every node connects to any newly-added trusted node; all nodes show the same set). `.103` has a long/dirty list. + +### B2 — Duplicate chat contact for one node — TODO +Federated peer "sapien" shows TWO chats: one "sapien" WITHOUT archy logo (looks non-federated) + one named by raw DID `did:key:z6MkoSbN5CM7fBaQg2nWbCymEkFXsHnuXvec9Mjo5RtJf9dQ`. Same node keyed by both federated identity and raw DID → merge to one. Code: core/archipelago/src/mesh + mesh/typed_messages.rs (note :233 — meshcore adverts don't carry archy pubkey). + +### B3 — Cloud peer media won't preview/play — TODO +Music/video preview files on peer nodes' cloud don't play (streaming/range/content-type over mesh+Tor peer fetch). + +### B4 — Cloud "my folders" fails (JSON parse / 502) — TODO +`Unexpected token '<', "/` instead of SPA shell. Handle BOTH absent + down. + +### B14 — Trusted/peer cloud browse uses Tor not FIPS — TODO (priority) +Browsing trusted/peer nodes in the Cloud tab connects over Tor instead of FIPS (should prefer FIPS like the rest of mesh; same for peer browsing). cf project_fips_integration, project_tor_node_to_node_works (last_transport should be fips/mesh). + +--- + +## 🟠 APP-SPECIFIC + +### B6 — ElectrumX install button missing "Requires Archival Node" gate — TODO +Show the yellow requirement badge when no full node / only a pruned node is present (reuse existing yellow badge pattern). + +### B7 — ElectrumX UI stuck loader on top — TODO +UI renders but a loader sits on top; possibly stale pre-sync screen not clearing. + +### B9 — IndeedHub keeps stopping on nodes — TODO +Container won't stay running (crash-loop / reconcile stop). Check logs + restart policy + health. + +### B10 — Immich still crashes — TODO +Recurring crash ("still" → prior attempts). Check container logs + resource limits + DB/ML deps. + +### B11 — Companion app: "open in external browser" apps don't work — TODO +Apps meant to open in a new/external browser don't launch from the companion app; need the phone-default-browser request-modal pattern mobile apps use. Relates to v1.7.90 "open in new tab from companion app". + +### B12 — Mempool not connecting to Bitcoin on some nodes — TODO +mempool can't reach the Bitcoin backend on some nodes. Investigate on .116. Check mempool→electrs→bitcoind wiring + deps. + +### B13 — Fedimint UI not applying CSS — TODO +Actual Fedimint UI (not pre-sync) renders unstyled. Likely asset path / proxy base-href (assets rooted at `/` vs `/app/fedimint/`). + +### B15 — Bitcoin UI sync progress lags — TODO +Bitcoin UI doesn't update its sync progress fast enough even though the console clearly already has the block-height data. Likely a polling-interval / reactive-update gap between the status source and the UI. + +### B16 — Bitcoin sync status on Home > System container vanishes — TODO +The bitcoin sync status in the Home > System container disappears when it should persist/cache and show an "updating" state. Related to B15 (Bitcoin UI sync lag). Likely the status component clears on empty/transitional poll instead of retaining last-known + showing updating. + +### B17 — archipelago.service flaps on boot before starting — TODO +On some boots, `[FAILED] Failed to start archipelago.service - Archipelago Backend` prints ~20 times over ~5 min before it finally starts properly. Likely a startup dependency/timing race (DB lock, port bind, crash-recovery, or a dependency not ready) causing systemd restart loop until a precondition is met. Check service Restart=/RestartSec, ExecStartPre gates, and what the early failures log. May tie to B16/crash-recovery. + +### B18 — Apps stop right after install (or become unstartable) — TODO +Many apps install but immediately stop, requiring a manual Start — or become unstartable entirely. Likely the install→start handoff / reconciler doesn't bring them up (or starts then they exit). Related to B9 (IndeedHub stopping), B10 (Immich). Possibly linked to the cgroup-SIGKILL-on-archipelago.service-restart issue (feedback_no_systemctl_deploy_until_quadlet) — but NOTE: on .116 (Quadlet) containers survived a service restart cleanly, so the reconciler may be fine there; reproduce on the affected nodes. Check post-install start sequencing + boot_reconciler + container restart policy + cgroup placement. + +### B19 — Failed download-update lands on Install button (should be Download) — TODO +When an update download fails, the UI sometimes shows the Install button instead of returning to the Download button — a big UX issue (user can't retry the download cleanly). Check the SystemUpdate state machine's error/failure transition. + +### B8 — netbird app doesn't work — TODO (LOW / much later) + +(RETRACTED: CryptPad placeholder-icon — user says cryptpad is fine.) + +--- + +## 📋 vps2 Gitea issues (lfg2025/archy) — imported 2026-06-15 +- G#1 [Bug] Strange peer request behaviour — TODO (likely related to B1/federation) +- G#2 [Bug] Fix flashing USB from kiosk — TODO +- G#3 [Feature] VPN Configuration — DEFERRED (feature; no new features until production quality) +- G#4 [Bug] Bitcoind is slow — TODO +- G#5 [Feature] OpenWRT and TollGate integration — DEFERRED (feature) +- G#6 [Feature] Move dashboard/monitoring link to home screen — DEFERRED (feature) +- G#7 [Bug] Scrolling with Companion app — TODO + +--- + +## Gitea issue mapping (vps2 lfg2025/archy) +All backlog bugs now mirrored as Gitea issues: B1→#8, B2→#9, B3→#10, B4→#11, B5→#12, B6→#13, B7→#14, B8→#15, B9→#16, B10→#17, B11→#18, B12→#19, B13→#20, B14→#21, B15→#22, B16→#23, B17→#24, B18→#25, B19→#26. (Pre-existing G#1–7 remain; some overlap, e.g. G#1 strange-peer ≈ B1.) Close the Gitea issue when a bug is verified+shipped. + +## Progress log +- 2026-06-15: tracker created. v1.7.96-alpha shipped. B5 (LND CORS) root-caused → fixed in code → fix (b) verified on .116 (harness 4/4). All 19 bugs filed as Gitea issues #8–#26. vps2 feature issues (G#3/5/6) deferred (no new features). Next: deploy to .103 to verify fix (a) (nginx dup strip). diff --git a/tests/production-quality/lnd-cors-test.sh b/tests/production-quality/lnd-cors-test.sh new file mode 100755 index 00000000..950e872c --- /dev/null +++ b/tests/production-quality/lnd-cors-test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# lnd-cors-test.sh — assert the LND "connect your wallet" endpoints return +# correct CORS headers for the cross-origin call from the LND UI app (:18083). +# +# Bug B5: /lnd-connect-info duplicated ACAO on some nodes; /proxy/lnd/v1/* 401 +# carries no ACAO fleet-wide. Browser blocks both. +# +# Usage: ./lnd-cors-test.sh (e.g. 192.168.1.116 or 100.102.169.103) +# Exit 0 = all assertions pass. + +set -uo pipefail +HOST="${1:?usage: lnd-cors-test.sh }" +ORIGIN="http://${HOST}:18083" +BASE="http://${HOST}" +PASS=0; FAIL=0 +say() { printf '%s\n' "$*"; } +ok() { PASS=$((PASS+1)); say " PASS: $1"; } +bad() { FAIL=$((FAIL+1)); say " FAIL: $1"; } + +# Count ACAO header lines (case-insensitive) in a header dump. +acao_count() { grep -ci '^access-control-allow-origin:' <<<"$1"; } +acao_value() { grep -i '^access-control-allow-origin:' <<<"$1" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r'; } + +say "== B5 LND CORS — node ${HOST} (origin ${ORIGIN}) ==" + +# 1) /lnd-connect-info — exactly ONE ACAO, value == origin +H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/lnd-connect-info" 2>/dev/null) +N=$(acao_count "$H"); V=$(acao_value "$H") +[ "$N" = "1" ] && ok "/lnd-connect-info has exactly 1 ACAO header" || bad "/lnd-connect-info ACAO count=$N (want 1)" +[ "$V" = "$ORIGIN" ] && ok "/lnd-connect-info ACAO value == origin" || bad "/lnd-connect-info ACAO='$V' (want '$ORIGIN')" + +# 2) /proxy/lnd/v1/getinfo — ACAO present even on 401 (unauth) +H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/proxy/lnd/v1/getinfo" 2>/dev/null) +N=$(acao_count "$H") +[ "$N" -ge 1 ] && ok "/proxy/lnd/v1/getinfo has ACAO (even unauth)" || bad "/proxy/lnd/v1/getinfo missing ACAO (count=$N)" +[ "$N" -le 1 ] || bad "/proxy/lnd/v1/getinfo duplicate ACAO (count=$N)" + +# 3) /proxy/lnd/v1/channels — same +H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/proxy/lnd/v1/channels" 2>/dev/null) +N=$(acao_count "$H") +[ "$N" = "1" ] && ok "/proxy/lnd/v1/channels has exactly 1 ACAO" || bad "/proxy/lnd/v1/channels ACAO count=$N (want 1)" + +say "" +say "== ${HOST}: ${PASS} passed, ${FAIL} failed ==" +[ "$FAIL" -eq 0 ]