fix(lnd): repair fleet-wide CORS on LND connect-wallet endpoints (B5)
The LND wallet UI (served on its own app port) fetches /lnd-connect-info
and /proxy/lnd/* cross-origin, so both need correct CORS headers.
(a) Older nginx configs add their own Access-Control-Allow-Origin in the
/lnd-connect-info location on top of the one the backend sets, yielding
a DUPLICATE header that browsers reject ("multiple values"). bootstrap
now strips that redundant nginx add_header (backend owns CORS).
(b) /proxy/lnd/* returned a 401 with no CORS headers when the session
check failed, so the browser saw an opaque CORS error instead of a
readable 401. Add unauthorized_cors() and use it on that path.
Adds tests/production-quality/ (bug tracker + lnd-cors-test.sh harness).
Verified: harness 4/4 on .116, .198, .103.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8c3c79543e
commit
1db720af13
@ -202,6 +202,27 @@ impl ApiHandler {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// A 401 that still carries CORS headers, for endpoints fetched
|
||||
/// cross-origin by same-node app UIs (e.g. the LND wallet UI on its own
|
||||
/// port). Without the ACAO header the browser surfaces an opaque CORS
|
||||
/// error instead of the 401, so the app can't tell it just needs auth.
|
||||
/// `origin` is the already-validated reflect value from `app_cors_origin`
|
||||
/// (empty string when the origin isn't allowed → no CORS header added).
|
||||
fn unauthorized_cors(origin: &str) -> Response<hyper::Body> {
|
||||
let body = serde_json::json!({ "error": "Unauthorized" });
|
||||
let body_bytes = serde_json::to_vec(&body).unwrap_or_default();
|
||||
let mut builder = Response::builder()
|
||||
.status(StatusCode::UNAUTHORIZED)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Vary", "Origin");
|
||||
if !origin.is_empty() {
|
||||
builder = builder
|
||||
.header("Access-Control-Allow-Origin", origin)
|
||||
.header("Access-Control-Allow-Credentials", "true");
|
||||
}
|
||||
builder.body(hyper::Body::from(body_bytes)).unwrap()
|
||||
}
|
||||
|
||||
/// Allowed CORS origins derived from the config host IP.
|
||||
fn allowed_origins(&self) -> Vec<String> {
|
||||
let mut origins = vec![
|
||||
@ -501,12 +522,16 @@ impl ApiHandler {
|
||||
Self::handle_container_logs_http(self.rpc_handler.clone(), path, &origin).await
|
||||
}
|
||||
|
||||
// LND proxy — requires session
|
||||
// LND proxy — requires session. The LND wallet UI calls this
|
||||
// cross-origin from its own app port, so even the 401 must carry
|
||||
// CORS headers; otherwise the browser reports a bare CORS failure
|
||||
// ("No 'Access-Control-Allow-Origin' header") instead of a
|
||||
// readable 401 the UI can act on.
|
||||
(Method::GET, path) if path.starts_with("/proxy/lnd/") => {
|
||||
if !self.is_authenticated(&headers).await {
|
||||
return Ok(Self::unauthorized());
|
||||
}
|
||||
let origin = self.app_cors_origin(&headers);
|
||||
if !self.is_authenticated(&headers).await {
|
||||
return Ok(Self::unauthorized_cors(&origin));
|
||||
}
|
||||
Self::handle_lnd_proxy(self.rpc_handler.clone(), path, &origin).await
|
||||
}
|
||||
|
||||
|
||||
@ -521,6 +521,14 @@ async fn run_nginx() -> Result<bool> {
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
/// Reflective CORS add_headers that older configs placed inside the
|
||||
/// `/lnd-connect-info` location. The backend now sets a validated
|
||||
/// `Access-Control-Allow-Origin` for that endpoint (api/handler/proxy.rs), so
|
||||
/// leaving these in nginx emits a DUPLICATE header ("contains multiple values
|
||||
/// … but only one is allowed") and the LND wallet UI's cross-origin fetch is
|
||||
/// rejected. Stripped during nginx bootstrap so the backend solely owns CORS.
|
||||
const NGINX_LND_DUP_CORS: &str = " add_header Access-Control-Allow-Origin $http_origin always;\n add_header Access-Control-Allow-Credentials \"true\" always;\n";
|
||||
|
||||
async fn patch_nginx_conf(path: &str) -> Result<bool> {
|
||||
let content = fs::read_to_string(path)
|
||||
.await
|
||||
@ -528,12 +536,19 @@ async fn patch_nginx_conf(path: &str) -> Result<bool> {
|
||||
let missing_app_catalog = !content.contains("location /api/app-catalog");
|
||||
let missing_bitcoin_status = !content.contains("location /bitcoin-status");
|
||||
let missing_lnd_proxy = !content.contains("location /proxy/lnd/");
|
||||
if !missing_app_catalog && !missing_bitcoin_status && !missing_lnd_proxy {
|
||||
let has_lnd_dup_cors = content.contains(NGINX_LND_DUP_CORS);
|
||||
if !missing_app_catalog && !missing_bitcoin_status && !missing_lnd_proxy && !has_lnd_dup_cors {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let mut patched = content.clone();
|
||||
|
||||
if has_lnd_dup_cors {
|
||||
// Drop the redundant nginx-side CORS headers so the backend's single
|
||||
// validated Access-Control-Allow-Origin is the only one returned.
|
||||
patched = patched.replace(NGINX_LND_DUP_CORS, "");
|
||||
}
|
||||
|
||||
if missing_lnd_proxy {
|
||||
// Prefer the `/lnd-connect-info` anchor (present since 2026-03-17); fall
|
||||
// back to `/electrs-status` (since 2026-03-08) for even older configs.
|
||||
|
||||
126
tests/production-quality/TRACKER.md
Normal file
126
tests/production-quality/TRACKER.md
Normal file
@ -0,0 +1,126 @@
|
||||
# Production-Quality Bug Tracker
|
||||
|
||||
Living tracker for the post-v1.7.96 "no new features until production quality" push.
|
||||
Updated continuously as we investigate → fix → test → pass. Kept in-repo so progress
|
||||
survives a session cutoff.
|
||||
|
||||
## Rules (from user, 2026-06-15)
|
||||
- **No new features** until the OS is production / no-bugs quality.
|
||||
- **Test-harness-first**: build/extend a harness for each bug before fixing.
|
||||
- **Validate every fix on `.116` + `.198`** (both 192.168.1.x, pw ThisIsWeb54321@) **+ the harness** BEFORE it goes into any release. (.198 still carries the LND CORS nginx duplicate → good for fix-(a) validation; .116 does not.)
|
||||
- **Priority order**: cloud/federated-nodes + mesh FIRST, then app-specific, then low-pri.
|
||||
|
||||
## Status legend
|
||||
`TODO` · `INVESTIGATING` · `ROOT-CAUSED` · `FIXING` · `TESTING` (on .116+harness) · `PASSED` · `SHIPPED`
|
||||
|
||||
## Release status
|
||||
- **v1.7.96-alpha — SHIPPED** (2026-06-15). Live on vps2 (primary OTA): manifest v1.7.96-alpha, assets HTTP 200, `main@8c3c7954` + tag present. Contents: kiosk grid removal + FIPS TCP/UDP anchor selector. NOTE: gitea-local (localhost) mirror push failed (token rejected → /login); non-blocking, needs refreshed token.
|
||||
- **v1.7.97-alpha — IN PROGRESS** (this push). Will bundle the verified fixes below.
|
||||
|
||||
---
|
||||
|
||||
## 🔴🔴 TOP PRIORITY
|
||||
|
||||
### B5 — LND "connect your wallet" details/QR broken fleet-wide — ROOT-CAUSED
|
||||
Origin: user escalation. Symptom: LND connect screen (served on app port :18083) can't load details/QR.
|
||||
Two distinct root causes (confirmed live):
|
||||
- **(a) Duplicate ACAO** on `/lnd-connect-info` (seen on .103): backend sets `Access-Control-Allow-Origin` (proxy.rs:108) AND nginx `add_header` adds a second → browser rejects "multiple values". nginx config drift. Fix: bootstrap.rs nginx patch must strip the redundant `add_header` from the `/lnd-connect-info` location (backend owns CORS).
|
||||
- **(b) No ACAO on `/proxy/lnd/v1/*` 401** (fleet-wide): the unauth/auth-layer 401 is produced before the CORS-adding proxy handler (proxy.rs:135 `handle_lnd_proxy`). Browser → "No 'Access-Control-Allow-Origin' header". Fix: ensure auth-layer/early-return responses for `/proxy/lnd` + `/lnd-connect-info` carry CORS headers.
|
||||
- `.116` `/lnd-connect-info` returns a single correct ACAO → symptom varies by node's nginx state.
|
||||
- Backend CORS helper: handler/mod.rs `app_cors_origin()` (:270) — reflects Origin when its host == request host.
|
||||
- Backend change → ships in .97. **Status: ✅ PASSED — verified on .116, .198, .103 (harness 4/4 each). Ready to bundle into .97.**
|
||||
- Caveat: bootstrap's nginx dup-strip runs a few seconds AFTER /health goes green (async patch+reload) — converges within ~1 min of restart; not instant. Acceptable.
|
||||
- **CODE CHANGES MADE (uncommitted):**
|
||||
- `core/archipelago/src/bootstrap.rs`: added `NGINX_LND_DUP_CORS` const + strip in `patch_nginx_conf()` (removes the duplicate nginx `add_header` ACAO from `/lnd-connect-info` so the backend's single header wins). Idempotent; runs on startup nginx bootstrap. → fixes (a)
|
||||
- `core/archipelago/src/api/handler/mod.rs`: new `unauthorized_cors(origin)` helper (:~205) + `/proxy/lnd/` route (:~505) computes origin first and returns `unauthorized_cors` so the 401 carries ACAO. → fixes (b)
|
||||
- Test on **.116** for (b); test on **.103** for (a) [.116 has no dup to strip].
|
||||
- **2026-06-15 RESULT — .116 (fix b): harness 4/4 PASS** (sideloaded built binary, restarted). `/proxy/lnd/v1/*` now returns CORS on the 401. ✅
|
||||
- (Correction: an earlier "LND container MISSING" reading was a FALSE alarm — `docker` isn't in the non-interactive PATH; runtime is **podman**. Verified `lnd Up 9h` — containers SURVIVED the restart cleanly.)
|
||||
- Next: deploy to .103 + run harness to confirm fix (a) (nginx dup strip).
|
||||
- **Harness:** `tests/production-quality/lnd-cors-test.sh <node>` — asserts single correct ACAO on /lnd-connect-info + ACAO present on /proxy/lnd/v1/{getinfo,channels}. Baseline (2026-06-15): .116 = 2 pass/2 fail (proxy missing ACAO); .103 = 1 pass/3 fail (connect-info dup + proxy missing).
|
||||
- **FIX PLAN (precise):**
|
||||
1. (b) handler/mod.rs:504-508 `/proxy/lnd/` returns `Self::unauthorized()` (401, NO CORS) when session check fails → browser CORS wall. Add CORS (app_cors_origin) to that 401. Same pattern for any other app-origin early-return.
|
||||
2. (a) nginx `/lnd-connect-info` location double-adds ACAO (backend + nginx `add_header`). Strip the nginx `add_header Access-Control-Allow-Origin` there; backend owns CORS. Update bootstrap.rs nginx patch to remove it on existing nodes (idempotent).
|
||||
- Verify: rebuild backend, deploy to .116, run harness → expect 3/3 (or 4 assertions) PASS on .116 AND .103.
|
||||
|
||||
---
|
||||
|
||||
## 🔴 PRIORITY — cloud / federation / mesh
|
||||
|
||||
### B1 — Trusted-node list not clean — TODO
|
||||
Dupes, erroneous names, and non-convergent group membership across nodes. Expected: trusted nodes form a transitive group (every node connects to any newly-added trusted node; all nodes show the same set). `.103` has a long/dirty list.
|
||||
|
||||
### B2 — Duplicate chat contact for one node — TODO
|
||||
Federated peer "sapien" shows TWO chats: one "sapien" WITHOUT archy logo (looks non-federated) + one named by raw DID `did:key:z6MkoSbN5CM7fBaQg2nWbCymEkFXsHnuXvec9Mjo5RtJf9dQ`. Same node keyed by both federated identity and raw DID → merge to one. Code: core/archipelago/src/mesh + mesh/typed_messages.rs (note :233 — meshcore adverts don't carry archy pubkey).
|
||||
|
||||
### B3 — Cloud peer media won't preview/play — TODO
|
||||
Music/video preview files on peer nodes' cloud don't play (streaming/range/content-type over mesh+Tor peer fetch).
|
||||
|
||||
### B4 — Cloud "my folders" fails (JSON parse / 502) — TODO
|
||||
`Unexpected token '<', "<!doctype"` when FileBrowser absent (`/app/filebrowser/api/resources` → SPA index.html), and **502** when FileBrowser is down (seen on .103). filebrowser-client.ts:102/:106. Fix: detect FileBrowser unavailable, friendly prompt; consider nginx returning JSON 404/502 for missing `/app/<app>/` instead of SPA shell. Handle BOTH absent + down.
|
||||
|
||||
### B14 — Trusted/peer cloud browse uses Tor not FIPS — TODO (priority)
|
||||
Browsing trusted/peer nodes in the Cloud tab connects over Tor instead of FIPS (should prefer FIPS like the rest of mesh; same for peer browsing). cf project_fips_integration, project_tor_node_to_node_works (last_transport should be fips/mesh).
|
||||
|
||||
---
|
||||
|
||||
## 🟠 APP-SPECIFIC
|
||||
|
||||
### B6 — ElectrumX install button missing "Requires Archival Node" gate — TODO
|
||||
Show the yellow requirement badge when no full node / only a pruned node is present (reuse existing yellow badge pattern).
|
||||
|
||||
### B7 — ElectrumX UI stuck loader on top — TODO
|
||||
UI renders but a loader sits on top; possibly stale pre-sync screen not clearing.
|
||||
|
||||
### B9 — IndeedHub keeps stopping on nodes — TODO
|
||||
Container won't stay running (crash-loop / reconcile stop). Check logs + restart policy + health.
|
||||
|
||||
### B10 — Immich still crashes — TODO
|
||||
Recurring crash ("still" → prior attempts). Check container logs + resource limits + DB/ML deps.
|
||||
|
||||
### B11 — Companion app: "open in external browser" apps don't work — TODO
|
||||
Apps meant to open in a new/external browser don't launch from the companion app; need the phone-default-browser request-modal pattern mobile apps use. Relates to v1.7.90 "open in new tab from companion app".
|
||||
|
||||
### B12 — Mempool not connecting to Bitcoin on some nodes — TODO
|
||||
mempool can't reach the Bitcoin backend on some nodes. Investigate on .116. Check mempool→electrs→bitcoind wiring + deps.
|
||||
|
||||
### B13 — Fedimint UI not applying CSS — TODO
|
||||
Actual Fedimint UI (not pre-sync) renders unstyled. Likely asset path / proxy base-href (assets rooted at `/` vs `/app/fedimint/`).
|
||||
|
||||
### B15 — Bitcoin UI sync progress lags — TODO
|
||||
Bitcoin UI doesn't update its sync progress fast enough even though the console clearly already has the block-height data. Likely a polling-interval / reactive-update gap between the status source and the UI.
|
||||
|
||||
### B16 — Bitcoin sync status on Home > System container vanishes — TODO
|
||||
The bitcoin sync status in the Home > System container disappears when it should persist/cache and show an "updating" state. Related to B15 (Bitcoin UI sync lag). Likely the status component clears on empty/transitional poll instead of retaining last-known + showing updating.
|
||||
|
||||
### B17 — archipelago.service flaps on boot before starting — TODO
|
||||
On some boots, `[FAILED] Failed to start archipelago.service - Archipelago Backend` prints ~20 times over ~5 min before it finally starts properly. Likely a startup dependency/timing race (DB lock, port bind, crash-recovery, or a dependency not ready) causing systemd restart loop until a precondition is met. Check service Restart=/RestartSec, ExecStartPre gates, and what the early failures log. May tie to B16/crash-recovery.
|
||||
|
||||
### B18 — Apps stop right after install (or become unstartable) — TODO
|
||||
Many apps install but immediately stop, requiring a manual Start — or become unstartable entirely. Likely the install→start handoff / reconciler doesn't bring them up (or starts then they exit). Related to B9 (IndeedHub stopping), B10 (Immich). Possibly linked to the cgroup-SIGKILL-on-archipelago.service-restart issue (feedback_no_systemctl_deploy_until_quadlet) — but NOTE: on .116 (Quadlet) containers survived a service restart cleanly, so the reconciler may be fine there; reproduce on the affected nodes. Check post-install start sequencing + boot_reconciler + container restart policy + cgroup placement.
|
||||
|
||||
### B19 — Failed download-update lands on Install button (should be Download) — TODO
|
||||
When an update download fails, the UI sometimes shows the Install button instead of returning to the Download button — a big UX issue (user can't retry the download cleanly). Check the SystemUpdate state machine's error/failure transition.
|
||||
|
||||
### B8 — netbird app doesn't work — TODO (LOW / much later)
|
||||
|
||||
(RETRACTED: CryptPad placeholder-icon — user says cryptpad is fine.)
|
||||
|
||||
---
|
||||
|
||||
## 📋 vps2 Gitea issues (lfg2025/archy) — imported 2026-06-15
|
||||
- G#1 [Bug] Strange peer request behaviour — TODO (likely related to B1/federation)
|
||||
- G#2 [Bug] Fix flashing USB from kiosk — TODO
|
||||
- G#3 [Feature] VPN Configuration — DEFERRED (feature; no new features until production quality)
|
||||
- G#4 [Bug] Bitcoind is slow — TODO
|
||||
- G#5 [Feature] OpenWRT and TollGate integration — DEFERRED (feature)
|
||||
- G#6 [Feature] Move dashboard/monitoring link to home screen — DEFERRED (feature)
|
||||
- G#7 [Bug] Scrolling with Companion app — TODO
|
||||
|
||||
---
|
||||
|
||||
## Gitea issue mapping (vps2 lfg2025/archy)
|
||||
All backlog bugs now mirrored as Gitea issues: B1→#8, B2→#9, B3→#10, B4→#11, B5→#12, B6→#13, B7→#14, B8→#15, B9→#16, B10→#17, B11→#18, B12→#19, B13→#20, B14→#21, B15→#22, B16→#23, B17→#24, B18→#25, B19→#26. (Pre-existing G#1–7 remain; some overlap, e.g. G#1 strange-peer ≈ B1.) Close the Gitea issue when a bug is verified+shipped.
|
||||
|
||||
## Progress log
|
||||
- 2026-06-15: tracker created. v1.7.96-alpha shipped. B5 (LND CORS) root-caused → fixed in code → fix (b) verified on .116 (harness 4/4). All 19 bugs filed as Gitea issues #8–#26. vps2 feature issues (G#3/5/6) deferred (no new features). Next: deploy to .103 to verify fix (a) (nginx dup strip).
|
||||
45
tests/production-quality/lnd-cors-test.sh
Executable file
45
tests/production-quality/lnd-cors-test.sh
Executable file
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env bash
|
||||
# lnd-cors-test.sh — assert the LND "connect your wallet" endpoints return
|
||||
# correct CORS headers for the cross-origin call from the LND UI app (:18083).
|
||||
#
|
||||
# Bug B5: /lnd-connect-info duplicated ACAO on some nodes; /proxy/lnd/v1/* 401
|
||||
# carries no ACAO fleet-wide. Browser blocks both.
|
||||
#
|
||||
# Usage: ./lnd-cors-test.sh <node-host> (e.g. 192.168.1.116 or 100.102.169.103)
|
||||
# Exit 0 = all assertions pass.
|
||||
|
||||
set -uo pipefail
|
||||
HOST="${1:?usage: lnd-cors-test.sh <node-host>}"
|
||||
ORIGIN="http://${HOST}:18083"
|
||||
BASE="http://${HOST}"
|
||||
PASS=0; FAIL=0
|
||||
say() { printf '%s\n' "$*"; }
|
||||
ok() { PASS=$((PASS+1)); say " PASS: $1"; }
|
||||
bad() { FAIL=$((FAIL+1)); say " FAIL: $1"; }
|
||||
|
||||
# Count ACAO header lines (case-insensitive) in a header dump.
|
||||
acao_count() { grep -ci '^access-control-allow-origin:' <<<"$1"; }
|
||||
acao_value() { grep -i '^access-control-allow-origin:' <<<"$1" | head -1 | sed 's/^[^:]*:[[:space:]]*//' | tr -d '\r'; }
|
||||
|
||||
say "== B5 LND CORS — node ${HOST} (origin ${ORIGIN}) =="
|
||||
|
||||
# 1) /lnd-connect-info — exactly ONE ACAO, value == origin
|
||||
H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/lnd-connect-info" 2>/dev/null)
|
||||
N=$(acao_count "$H"); V=$(acao_value "$H")
|
||||
[ "$N" = "1" ] && ok "/lnd-connect-info has exactly 1 ACAO header" || bad "/lnd-connect-info ACAO count=$N (want 1)"
|
||||
[ "$V" = "$ORIGIN" ] && ok "/lnd-connect-info ACAO value == origin" || bad "/lnd-connect-info ACAO='$V' (want '$ORIGIN')"
|
||||
|
||||
# 2) /proxy/lnd/v1/getinfo — ACAO present even on 401 (unauth)
|
||||
H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/proxy/lnd/v1/getinfo" 2>/dev/null)
|
||||
N=$(acao_count "$H")
|
||||
[ "$N" -ge 1 ] && ok "/proxy/lnd/v1/getinfo has ACAO (even unauth)" || bad "/proxy/lnd/v1/getinfo missing ACAO (count=$N)"
|
||||
[ "$N" -le 1 ] || bad "/proxy/lnd/v1/getinfo duplicate ACAO (count=$N)"
|
||||
|
||||
# 3) /proxy/lnd/v1/channels — same
|
||||
H=$(curl -s -m 8 -D - -o /dev/null -H "Origin: ${ORIGIN}" "${BASE}/proxy/lnd/v1/channels" 2>/dev/null)
|
||||
N=$(acao_count "$H")
|
||||
[ "$N" = "1" ] && ok "/proxy/lnd/v1/channels has exactly 1 ACAO" || bad "/proxy/lnd/v1/channels ACAO count=$N (want 1)"
|
||||
|
||||
say ""
|
||||
say "== ${HOST}: ${PASS} passed, ${FAIL} failed =="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Loading…
x
Reference in New Issue
Block a user