Compare commits
3 Commits
a721532f55
...
43e700498b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43e700498b | ||
|
|
89d397bb74 | ||
|
|
41e7f500f8 |
@ -323,6 +323,26 @@ fun WebViewScreen(
|
||||
}
|
||||
}
|
||||
|
||||
// Node apps (e.g. NetBird) terminate TLS with a
|
||||
// self-signed cert — the dashboard needs a secure
|
||||
// context for OIDC/window.crypto.subtle (#15). The
|
||||
// WebView default is to CANCEL untrusted certs, so
|
||||
// those apps render blank. The user explicitly trusts
|
||||
// their own node, so proceed for same-host certs only;
|
||||
// reject anything else (don't blanket-trust the web).
|
||||
override fun onReceivedSslError(
|
||||
view: WebView?,
|
||||
handler: android.webkit.SslErrorHandler?,
|
||||
error: android.net.http.SslError?,
|
||||
) {
|
||||
val u = error?.url
|
||||
if (u != null && isSameHost(u, serverUrl)) {
|
||||
handler?.proceed()
|
||||
} else {
|
||||
handler?.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
override fun shouldOverrideUrlLoading(
|
||||
view: WebView?,
|
||||
request: WebResourceRequest?,
|
||||
@ -553,6 +573,23 @@ private fun InAppBrowser(
|
||||
canGoForward = view?.canGoForward() == true
|
||||
}
|
||||
|
||||
// Self-signed TLS on the node's apps (e.g. NetBird on
|
||||
// :8087) would otherwise be cancelled by the WebView
|
||||
// and render blank. Proceed for the user's own node
|
||||
// (same host); reject any other untrusted cert.
|
||||
override fun onReceivedSslError(
|
||||
view: WebView?,
|
||||
handler: android.webkit.SslErrorHandler?,
|
||||
error: android.net.http.SslError?,
|
||||
) {
|
||||
val u = error?.url
|
||||
if (u != null && isSameHost(u, serverUrl)) {
|
||||
handler?.proceed()
|
||||
} else {
|
||||
handler?.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
override fun shouldOverrideUrlLoading(
|
||||
view: WebView?,
|
||||
request: WebResourceRequest?,
|
||||
|
||||
@ -6,7 +6,6 @@
|
||||
use crate::api::rpc::RpcHandler;
|
||||
use crate::data_model::InstallPhase;
|
||||
use anyhow::{Context, Result};
|
||||
use base64::Engine;
|
||||
use std::process::Output;
|
||||
use std::time::Duration;
|
||||
use tracing::info;
|
||||
@ -725,10 +724,6 @@ fn indeedhub_stack_app_ids() -> &'static [&'static str] {
|
||||
|
||||
const REGISTRY: &str = "146.59.87.168:3000/lfg2025";
|
||||
|
||||
const NETBIRD_DASHBOARD_IMAGE: &str = "docker.io/netbirdio/dashboard:v2.38.0";
|
||||
const NETBIRD_SERVER_IMAGE: &str = "docker.io/netbirdio/netbird-server:0.71.2";
|
||||
const NETBIRD_PROXY_IMAGE: &str = "docker.io/library/nginx:1.27-alpine";
|
||||
|
||||
/// Pull an image with retry and exponential backoff (3 attempts).
|
||||
async fn pull_image_with_retry(image: &str) -> Result<()> {
|
||||
let exists = podman_stack_status(&["image", "exists", image], PODMAN_STACK_PROBE_TIMEOUT).await;
|
||||
@ -1846,9 +1841,13 @@ impl RpcHandler {
|
||||
// host facts + the netbird-net gateway. The manifests use the exact live
|
||||
// container names, so on an existing node this ADOPTS the running stack
|
||||
// rather than recreating it (the sqlite store + base64 keys are
|
||||
// preserved — ensure_generated_secrets no-ops on existing files). Falls
|
||||
// back to the legacy installer below only when the orchestrator doesn't
|
||||
// know these app_ids (manifests not yet deployed to the node).
|
||||
// preserved — ensure_generated_secrets no-ops on existing files).
|
||||
//
|
||||
// #20 ph4: the legacy hardcoded `podman run` installer was DELETED — the
|
||||
// signed catalog always ships apps/netbird-*/manifest.yml, so there is no
|
||||
// in-Rust fallback. If the orchestrator doesn't know these app_ids and no
|
||||
// running stack exists to adopt, install errors rather than silently
|
||||
// diverging from the manifest contract.
|
||||
if let Some(orchestrated) =
|
||||
install_stack_via_orchestrator(self, "netbird", netbird_stack_app_ids()).await?
|
||||
{
|
||||
@ -1865,491 +1864,12 @@ impl RpcHandler {
|
||||
return Ok(adopted);
|
||||
}
|
||||
|
||||
install_log("INSTALL START: netbird stack (dashboard + server)").await;
|
||||
info!("Installing self-hosted NetBird stack");
|
||||
|
||||
self.set_install_phase("netbird", InstallPhase::PullingImage)
|
||||
.await;
|
||||
for (i, image) in [
|
||||
NETBIRD_DASHBOARD_IMAGE,
|
||||
NETBIRD_SERVER_IMAGE,
|
||||
NETBIRD_PROXY_IMAGE,
|
||||
]
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
self.set_install_progress("netbird", i as u64, 3).await;
|
||||
pull_image_with_retry(image)
|
||||
.await
|
||||
.with_context(|| format!("Failed to pull NetBird image: {}", image))?;
|
||||
}
|
||||
self.set_install_progress("netbird", 3, 3).await;
|
||||
|
||||
for name in ["netbird", "netbird-dashboard", "netbird-server"] {
|
||||
let _ = podman_stack_status(&["rm", "-f", name], PODMAN_STACK_PROBE_TIMEOUT).await;
|
||||
}
|
||||
let _ = podman_stack_status(
|
||||
&["network", "rm", "-f", "netbird-net"],
|
||||
PODMAN_STACK_PROBE_TIMEOUT,
|
||||
anyhow::bail!(
|
||||
"netbird manifests not available on this node — the signed catalog must provide apps/netbird-*/manifest.yml (legacy hardcoded installer removed in #20 ph4)"
|
||||
)
|
||||
.await;
|
||||
|
||||
self.set_install_phase("netbird", InstallPhase::CreatingContainer)
|
||||
.await;
|
||||
|
||||
tokio::fs::create_dir_all("/var/lib/archipelago/netbird/data")
|
||||
.await
|
||||
.context("Failed to create NetBird data directory")?;
|
||||
|
||||
let host_ip = detect_netbird_public_host_ip()
|
||||
.await
|
||||
.unwrap_or_else(|| self.config.host_ip.clone());
|
||||
|
||||
// Create the network FIRST so we can read back the gateway it was
|
||||
// assigned — that gateway is Podman's aardvark DNS, which the proxy's
|
||||
// nginx needs as an explicit `resolver` to re-resolve container names
|
||||
// (issue #15: without it nginx caches a container IP and 502s forever
|
||||
// once that IP changes on restart/reboot).
|
||||
let _ = podman_stack_status(
|
||||
&["network", "create", "netbird-net"],
|
||||
PODMAN_STACK_PROBE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let resolver_ip = netbird_net_resolver_ip().await;
|
||||
write_netbird_config_files(&host_ip, &self.config.host_ip, &resolver_ip).await?;
|
||||
ensure_netbird_tls_cert(&host_ip).await?;
|
||||
|
||||
let mut server_cmd = tokio::process::Command::new("podman");
|
||||
server_cmd.args([
|
||||
"run",
|
||||
"-d",
|
||||
"--name",
|
||||
"netbird-server",
|
||||
"--network",
|
||||
"netbird-net",
|
||||
"--network-alias",
|
||||
"netbird-server",
|
||||
"--restart=unless-stopped",
|
||||
"-p",
|
||||
"8086:80",
|
||||
"-p",
|
||||
"3478:3478/udp",
|
||||
"-v",
|
||||
"/var/lib/archipelago/netbird/data:/var/lib/netbird",
|
||||
"-v",
|
||||
"/var/lib/archipelago/netbird/config.yaml:/etc/netbird/config.yaml:ro",
|
||||
NETBIRD_SERVER_IMAGE,
|
||||
"--config",
|
||||
"/etc/netbird/config.yaml",
|
||||
]);
|
||||
run_required_stack_command("netbird", "create server", &mut server_cmd).await?;
|
||||
|
||||
self.set_install_phase("netbird", InstallPhase::StartingContainer)
|
||||
.await;
|
||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||
|
||||
let mut dashboard_cmd = tokio::process::Command::new("podman");
|
||||
dashboard_cmd.args([
|
||||
"run",
|
||||
"-d",
|
||||
"--name",
|
||||
"netbird-dashboard",
|
||||
"--network",
|
||||
"netbird-net",
|
||||
// Explicit alias so the proxy can always resolve `netbird-dashboard`
|
||||
// via Podman DNS — don't rely on implicit container-name aliasing.
|
||||
"--network-alias",
|
||||
"netbird-dashboard",
|
||||
"--restart=unless-stopped",
|
||||
"--env-file",
|
||||
"/var/lib/archipelago/netbird/dashboard.env",
|
||||
NETBIRD_DASHBOARD_IMAGE,
|
||||
]);
|
||||
run_required_stack_command("netbird", "create dashboard", &mut dashboard_cmd).await?;
|
||||
|
||||
let mut proxy_cmd = tokio::process::Command::new("podman");
|
||||
proxy_cmd.args([
|
||||
"run",
|
||||
"-d",
|
||||
"--name",
|
||||
"netbird",
|
||||
"--network",
|
||||
"netbird-net",
|
||||
"--restart=unless-stopped",
|
||||
// 8087 publishes the TLS listener — netbird's dashboard requires a
|
||||
// secure context (window.crypto.subtle / OIDC PKCE), issue #15.
|
||||
"-p",
|
||||
"8087:443",
|
||||
"-v",
|
||||
"/var/lib/archipelago/netbird/nginx.conf:/etc/nginx/conf.d/default.conf:ro",
|
||||
"-v",
|
||||
"/var/lib/archipelago/netbird/tls.crt:/etc/nginx/tls.crt:ro",
|
||||
"-v",
|
||||
"/var/lib/archipelago/netbird/tls.key:/etc/nginx/tls.key:ro",
|
||||
NETBIRD_PROXY_IMAGE,
|
||||
]);
|
||||
run_required_stack_command("netbird", "create unified proxy", &mut proxy_cmd).await?;
|
||||
|
||||
wait_for_stack_containers(
|
||||
"netbird",
|
||||
&["netbird-server", "netbird-dashboard", "netbird"],
|
||||
60,
|
||||
)
|
||||
.await?;
|
||||
|
||||
self.set_install_phase("netbird", InstallPhase::WaitingHealthy)
|
||||
.await;
|
||||
// Containers being "running" is NOT the same as the embedded OIDC
|
||||
// provider being ready (#10). The dashboard SPA opens right after install
|
||||
// and, if it loads before /oauth2/.well-known is served, caches a bad
|
||||
// auth state — the user appears logged-in but can't log out until it
|
||||
// self-corrects. Wait (best-effort) for OIDC discovery to answer before
|
||||
// we report Done, so the first dashboard load sees a ready provider.
|
||||
wait_for_netbird_oidc_ready(Duration::from_secs(60)).await;
|
||||
|
||||
self.set_install_phase("netbird", InstallPhase::PostInstall)
|
||||
.await;
|
||||
self.set_install_phase("netbird", InstallPhase::Done).await;
|
||||
self.clear_install_progress("netbird").await;
|
||||
|
||||
install_log("INSTALL OK: netbird stack").await;
|
||||
info!("NetBird stack installed");
|
||||
Ok(serde_json::json!({
|
||||
"success": true,
|
||||
"package_id": "netbird",
|
||||
"message": "NetBird self-hosted stack installed",
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort wait for NetBird's embedded OIDC provider to start serving its
|
||||
/// discovery document. The management server publishes 8086:80 on the host and
|
||||
/// is the issuer at `/oauth2`, so its `.well-known/openid-configuration` is the
|
||||
/// signal that the dashboard's login/logout flow will work. Polls until a 2xx
|
||||
/// or the timeout — NEVER fails the install (the stack is already running; this
|
||||
/// only narrows the post-install race window in #10).
|
||||
async fn wait_for_netbird_oidc_ready(timeout: Duration) {
|
||||
let url = "http://127.0.0.1:8086/oauth2/.well-known/openid-configuration";
|
||||
let client = match reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(5))
|
||||
.build()
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(_) => return,
|
||||
};
|
||||
let deadline = tokio::time::Instant::now() + timeout;
|
||||
loop {
|
||||
if let Ok(resp) = client.get(url).send().await {
|
||||
if resp.status().is_success() {
|
||||
info!("NetBird OIDC discovery is ready");
|
||||
return;
|
||||
}
|
||||
}
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
info!("NetBird OIDC discovery not ready within timeout — proceeding anyway");
|
||||
return;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_or_generate_b64_secret(name: &str) -> String {
|
||||
let path = format!("/var/lib/archipelago/secrets/{}", name);
|
||||
if let Ok(val) = tokio::fs::read_to_string(&path).await {
|
||||
let trimmed = val.trim().to_string();
|
||||
if !trimmed.is_empty() {
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
let mut buf = [0u8; 32];
|
||||
rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf);
|
||||
let secret = base64::engine::general_purpose::STANDARD.encode(buf);
|
||||
let _ = tokio::fs::create_dir_all("/var/lib/archipelago/secrets").await;
|
||||
let _ = tokio::fs::write(&path, &secret).await;
|
||||
secret
|
||||
}
|
||||
|
||||
/// Read the gateway of the `netbird-net` bridge. Podman runs its aardvark DNS
|
||||
/// resolver on this address, so nginx can use it as an explicit `resolver` to
|
||||
/// re-resolve container names at request time. Falls back to Podman's usual
|
||||
/// first-pool gateway if the inspect fails (best effort — config is rewritten
|
||||
/// on every (re)install).
|
||||
async fn netbird_net_resolver_ip() -> String {
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args([
|
||||
"network",
|
||||
"inspect",
|
||||
"netbird-net",
|
||||
"--format",
|
||||
"{{range .Subnets}}{{.Gateway}}{{end}}",
|
||||
])
|
||||
.output()
|
||||
.await;
|
||||
if let Ok(o) = out {
|
||||
let gw = String::from_utf8_lossy(&o.stdout).trim().to_string();
|
||||
if !gw.is_empty() && gw.parse::<std::net::IpAddr>().is_ok() {
|
||||
return gw;
|
||||
}
|
||||
}
|
||||
"10.89.0.1".to_string()
|
||||
}
|
||||
|
||||
/// Generate a self-signed TLS cert for the netbird proxy if absent. The
|
||||
/// dashboard needs a secure context (window.crypto.subtle / OIDC PKCE), so the
|
||||
/// proxy serves HTTPS; a self-signed cert is sufficient (the user accepts it
|
||||
/// once when opening netbird in a tab). SAN covers the LAN IP plus
|
||||
/// localhost/127.0.0.1 so it's valid however the box is reached locally.
|
||||
async fn ensure_netbird_tls_cert(host_ip: &str) -> Result<()> {
|
||||
let dir = "/var/lib/archipelago/netbird";
|
||||
let crt = format!("{dir}/tls.crt");
|
||||
let key = format!("{dir}/tls.key");
|
||||
if tokio::fs::metadata(&crt).await.is_ok() && tokio::fs::metadata(&key).await.is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
let _ = tokio::fs::create_dir_all(dir).await;
|
||||
let san = format!("subjectAltName=IP:{host_ip},IP:127.0.0.1,DNS:localhost");
|
||||
let status = tokio::process::Command::new("openssl")
|
||||
.args([
|
||||
"req",
|
||||
"-x509",
|
||||
"-newkey",
|
||||
"rsa:2048",
|
||||
"-nodes",
|
||||
"-keyout",
|
||||
&key,
|
||||
"-out",
|
||||
&crt,
|
||||
"-days",
|
||||
"3650",
|
||||
"-subj",
|
||||
&format!("/CN={host_ip}"),
|
||||
"-addext",
|
||||
&san,
|
||||
])
|
||||
.status()
|
||||
.await
|
||||
.context("failed to run openssl for netbird TLS cert")?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("openssl failed to generate netbird TLS cert");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn write_netbird_config_files(host_ip: &str, lan_ip: &str, resolver_ip: &str) -> Result<()> {
|
||||
// netbird's dashboard uses window.crypto.subtle (OIDC PKCE), which browsers
|
||||
// only expose in a SECURE context — so the proxy serves HTTPS and every
|
||||
// origin here is https (issue #15: over plain http the dashboard threw
|
||||
// "window.crypto.subtle is unavailable" and never reached login).
|
||||
let public_origin = format!("https://{}:8087", host_ip);
|
||||
let server_origin = format!("http://{}:8086", host_ip);
|
||||
// A single box is reached via several addresses. Allow the OIDC login flow
|
||||
// to redirect back to whichever origin the user actually used, otherwise
|
||||
// post-login lands on the wrong host and the dashboard shows
|
||||
// "Unauthenticated" (issue #15). The browser-side CORS is handled in the
|
||||
// nginx proxy; this covers the redirect-URI allow-list.
|
||||
let lan_origin = format!("https://{}:8087", lan_ip);
|
||||
let mut redirect_origins = vec![public_origin.clone()];
|
||||
if lan_origin != public_origin {
|
||||
redirect_origins.push(lan_origin);
|
||||
}
|
||||
let dashboard_redirect_uris = redirect_origins
|
||||
.iter()
|
||||
.flat_map(|o| {
|
||||
[
|
||||
format!(" - \"{o}/nb-auth\""),
|
||||
format!(" - \"{o}/nb-silent-auth\""),
|
||||
]
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let dashboard_logout_uris = redirect_origins
|
||||
.iter()
|
||||
.map(|o| format!(" - \"{o}/\""))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let relay_secret = read_or_generate_b64_secret("netbird-relay-auth-secret").await;
|
||||
let encryption_key = read_or_generate_b64_secret("netbird-store-encryption-key").await;
|
||||
let config = format!(
|
||||
r#"server:
|
||||
listenAddress: ":80"
|
||||
exposedAddress: "{public_origin}"
|
||||
stunPorts:
|
||||
- 3478
|
||||
metricsPort: 9090
|
||||
healthcheckAddress: ":9000"
|
||||
logLevel: "info"
|
||||
logFile: "console"
|
||||
authSecret: "{relay_secret}"
|
||||
dataDir: "/var/lib/netbird"
|
||||
auth:
|
||||
issuer: "{public_origin}/oauth2"
|
||||
localAuthDisabled: false
|
||||
signKeyRefreshEnabled: false
|
||||
dashboardRedirectURIs:
|
||||
{dashboard_redirect_uris}
|
||||
dashboardPostLogoutRedirectURIs:
|
||||
{dashboard_logout_uris}
|
||||
cliRedirectURIs:
|
||||
- "http://localhost:53000/"
|
||||
store:
|
||||
engine: "sqlite"
|
||||
encryptionKey: "{encryption_key}"
|
||||
"#
|
||||
);
|
||||
tokio::fs::write("/var/lib/archipelago/netbird/config.yaml", config)
|
||||
.await
|
||||
.context("Failed to write NetBird config.yaml")?;
|
||||
|
||||
let dashboard_env = format!(
|
||||
r#"NETBIRD_MGMT_API_ENDPOINT={public_origin}
|
||||
NETBIRD_MGMT_GRPC_API_ENDPOINT={public_origin}
|
||||
AUTH_AUDIENCE=netbird-dashboard
|
||||
AUTH_CLIENT_ID=netbird-dashboard
|
||||
AUTH_CLIENT_SECRET=
|
||||
AUTH_AUTHORITY={public_origin}/oauth2
|
||||
USE_AUTH0=false
|
||||
AUTH_SUPPORTED_SCOPES=openid profile email groups
|
||||
AUTH_REDIRECT_URI=/nb-auth
|
||||
AUTH_SILENT_REDIRECT_URI=/nb-silent-auth
|
||||
NETBIRD_TOKEN_SOURCE=idToken
|
||||
NGINX_SSL_PORT=443
|
||||
LETSENCRYPT_DOMAIN=none
|
||||
"#
|
||||
);
|
||||
tokio::fs::write("/var/lib/archipelago/netbird/dashboard.env", dashboard_env)
|
||||
.await
|
||||
.context("Failed to write NetBird dashboard.env")?;
|
||||
|
||||
let nginx_conf = format!(
|
||||
r#"server {{
|
||||
listen 443 ssl;
|
||||
server_name _;
|
||||
|
||||
# netbird's dashboard needs a secure context (window.crypto.subtle for OIDC
|
||||
# PKCE), so the proxy terminates TLS with a self-signed cert (issue #15).
|
||||
ssl_certificate /etc/nginx/tls.crt;
|
||||
ssl_certificate_key /etc/nginx/tls.key;
|
||||
|
||||
# Rootless Podman can hand a container a new IP across restarts/reboots.
|
||||
# nginx resolves a literal upstream name ONCE at startup and caches it, so
|
||||
# after the IP moves every request 502s with "host unreachable" (issue #15,
|
||||
# observed live on .198: nginx pinned to a dead netbird-dashboard IP). Fix:
|
||||
# point `resolver` at the netbird-net gateway (Podman's aardvark DNS) and
|
||||
# use VARIABLE upstreams, which forces nginx to re-resolve the container
|
||||
# names at request time. Everything is reached container-to-container by
|
||||
# name so nothing depends on host-published ports either.
|
||||
resolver {resolver_ip} valid=10s ipv6=off;
|
||||
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
location ~ ^/(relay|ws-proxy/) {{
|
||||
set $nb_server netbird-server;
|
||||
proxy_pass http://$nb_server:80;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_read_timeout 1d;
|
||||
}}
|
||||
|
||||
location ~ ^/(api|oauth2)(/|$) {{
|
||||
# The dashboard is a SPA whose API/OIDC base URL is baked at build time
|
||||
# to one host:port. A single box is reached via several addresses (LAN
|
||||
# IP, Tailscale 100.x, hostname), so those fetches are cross-origin and
|
||||
# the browser blocks them with no Access-Control-Allow-Origin (issue
|
||||
# #15, observed live on .198). Reflect the caller's Origin so the
|
||||
# self-hosted management/OIDC API is reachable from any of them, and
|
||||
# answer the CORS preflight here.
|
||||
if ($request_method = OPTIONS) {{
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials true always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
||||
add_header Access-Control-Max-Age 86400 always;
|
||||
add_header Content-Length 0;
|
||||
return 204;
|
||||
}}
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials true always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
||||
set $nb_server netbird-server;
|
||||
proxy_pass http://$nb_server:80;
|
||||
}}
|
||||
|
||||
location ~ ^/(signalexchange\.SignalExchange|management\.ManagementService|management\.ProxyService)/ {{
|
||||
set $nb_server netbird-server;
|
||||
grpc_pass grpc://$nb_server:80;
|
||||
grpc_read_timeout 1d;
|
||||
grpc_send_timeout 1d;
|
||||
}}
|
||||
|
||||
# OIDC callback routes are client-side SPA routes with NO prebuilt page in
|
||||
# the dashboard bundle, so proxying them straight through 404s — which
|
||||
# crashes the dashboard's auth init and shows "Unauthenticated" with dead
|
||||
# buttons (issue #15, confirmed live on .198: /nb-auth + /nb-silent-auth
|
||||
# returned 404). Serve the dashboard's index.html at these paths (URL
|
||||
# unchanged) so react-oidc boots and completes the login / silent-SSO.
|
||||
location ~ ^/(nb-auth|nb-silent-auth) {{
|
||||
set $nb_dashboard netbird-dashboard;
|
||||
rewrite ^.*$ /index.html break;
|
||||
proxy_pass http://$nb_dashboard:80;
|
||||
}}
|
||||
|
||||
location / {{
|
||||
set $nb_dashboard netbird-dashboard;
|
||||
proxy_pass http://$nb_dashboard:80;
|
||||
}}
|
||||
}}
|
||||
|
||||
# Direct server remains available for diagnostics at {server_origin}.
|
||||
"#
|
||||
);
|
||||
tokio::fs::write("/var/lib/archipelago/netbird/nginx.conf", nginx_conf)
|
||||
.await
|
||||
.context("Failed to write NetBird nginx.conf")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn detect_netbird_public_host_ip() -> Option<String> {
|
||||
let output = tokio::process::Command::new("hostname")
|
||||
.args(["-I"])
|
||||
.output()
|
||||
.await
|
||||
.ok()?;
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let ips: Vec<&str> = stdout
|
||||
.split_whitespace()
|
||||
.filter(|s| s.contains('.'))
|
||||
.collect();
|
||||
|
||||
// Prefer the LAN address as the canonical origin — that's what users browse
|
||||
// to on the local network. Baking the Tailscale 100.x address here broke
|
||||
// LAN access with cross-origin/redirect mismatches (issue #15). Tailscale
|
||||
// (100.64.0.0/10 CGNAT) is only a fallback for nodes with no LAN IP.
|
||||
let is_private_lan = |ip: &str| {
|
||||
ip.starts_with("192.168.")
|
||||
|| ip.starts_with("10.")
|
||||
|| (ip.starts_with("172.")
|
||||
&& ip
|
||||
.split('.')
|
||||
.nth(1)
|
||||
.and_then(|o| o.parse::<u8>().ok())
|
||||
.map(|o| (16..=31).contains(&o))
|
||||
.unwrap_or(false))
|
||||
};
|
||||
if let Some(lan) = ips.iter().find(|ip| is_private_lan(ip)) {
|
||||
return Some(lan.to_string());
|
||||
}
|
||||
ips.iter()
|
||||
.find(|ip| ip.starts_with("100."))
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{btcpay_stack_app_ids, mempool_stack_app_ids};
|
||||
|
||||
@ -2964,7 +2964,8 @@ impl ProdContainerOrchestrator {
|
||||
}
|
||||
|
||||
/// The gateway IP of the app's podman network — aardvark's DNS resolver
|
||||
/// address. Mirrors the legacy `netbird_net_resolver_ip`; falls back to
|
||||
/// address. (Generalised from the old per-app netbird resolver helper,
|
||||
/// deleted in #20 ph4.) Falls back to
|
||||
/// podman's usual first-pool gateway if the inspect can't be parsed (the
|
||||
/// network was just ensured to exist, so this is a belt-and-braces default).
|
||||
async fn network_gateway(&self, manifest: &AppManifest) -> Result<String> {
|
||||
@ -3004,8 +3005,8 @@ impl ProdContainerOrchestrator {
|
||||
/// entry whose crt+key already exist (idempotent / data-preserving). CN and
|
||||
/// SAN templates are rendered against host facts; when omitted they default
|
||||
/// to the node's host IP plus `127.0.0.1`/`localhost` so the cert is valid
|
||||
/// however the box is reached locally. Mirrors the legacy
|
||||
/// `ensure_netbird_tls_cert` (rsa:2048, 10-year, no per-app Rust).
|
||||
/// however the box is reached locally. (Generalised from the old per-app
|
||||
/// netbird TLS helper, deleted in #20 ph4: rsa:2048, 10-year, no per-app Rust.)
|
||||
async fn ensure_manifest_certs(&self, manifest: &AppManifest) -> Result<()> {
|
||||
let facts = self.detect_host_facts();
|
||||
let render = |s: &str| {
|
||||
|
||||
@ -52,7 +52,12 @@ teardown_file() {
|
||||
# health-monitor bounce during the read-only tier). A genuinely unexposed
|
||||
# immich never publishes 2283, so this still catches real port drift; it only
|
||||
# absorbs the transient null seen under churn.
|
||||
local deadline=$(( $(date +%s) + 30 ))
|
||||
# 90s (not 30s): the immich stack (postgres→redis→server with DB migrations on
|
||||
# boot) can take >30s to publish its host port after a churn-induced recreate,
|
||||
# and the destructive-tier immich tests already allow 180–240s for the same
|
||||
# stack. A genuinely unexposed immich still never publishes 2283, so this keeps
|
||||
# catching real port drift while tolerating slow-but-healthy boots.
|
||||
local deadline=$(( $(date +%s) + 90 ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
run rpc_result container-list
|
||||
[ "$status" -eq 0 ]
|
||||
@ -62,7 +67,7 @@ teardown_file() {
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
echo "immich never reported a lan_address containing 2283 within 30s" >&2
|
||||
echo "immich never reported a lan_address containing 2283 within 90s" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
|
||||
@ -75,12 +75,24 @@ mempool_skip_if_absent() {
|
||||
}
|
||||
|
||||
@test "no orphan mempool-related containers beyond the known set" {
|
||||
local total known
|
||||
total=$(podman ps -a --format '{{.Names}}' \
|
||||
| grep -Ec '^(mempool|archy-mempool)' || true)
|
||||
known=$(podman ps -a --format '{{.Names}}' \
|
||||
| grep -Ec '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' || true)
|
||||
[ "$total" -eq "$known" ]
|
||||
# Poll for steady state (don't single-shot): a stack restart in a prior tier
|
||||
# briefly leaves a recreated member visible alongside its replacement, so a
|
||||
# one-shot count can momentarily see total>known even though the reconciler
|
||||
# converges within seconds. A genuine orphan never clears, so this still
|
||||
# catches it — it just tolerates the transient recreate window.
|
||||
local total known deadline=$(( $(date +%s) + 30 ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
total=$(podman ps -a --format '{{.Names}}' \
|
||||
| grep -Ec '^(mempool|archy-mempool)' || true)
|
||||
known=$(podman ps -a --format '{{.Names}}' \
|
||||
| grep -Ec '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' || true)
|
||||
[ "$total" -eq "$known" ] && return 0
|
||||
sleep 3
|
||||
done
|
||||
echo "orphan mempool container persisted >30s (total=$total known=$known):" >&2
|
||||
podman ps -a --format '{{.Names}}' | grep -E '^(mempool|archy-mempool)' \
|
||||
| grep -vE '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' >&2 || true
|
||||
return 1
|
||||
}
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@ -44,7 +44,12 @@ start=$(date +%s)
|
||||
# run — just delays up to the deadline. Disable with ARCHY_SETTLE=0.
|
||||
settle_stack() {
|
||||
[[ "${ARCHY_SETTLE:-1}" == "1" && "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || return 0
|
||||
local deadline=$(( $(date +%s) + ${ARCHY_SETTLE_SECS:-180} ))
|
||||
# 300s (not 180s): on heavy nodes the immich stack's recovery after the prior
|
||||
# iteration's archipelago-restart test (crash_recovery retries on a ~120s
|
||||
# cadence) can take several minutes, and the next iteration's read-only
|
||||
# lan_address probe false-fails if immich is still mid-boot. The settle is a
|
||||
# cap, not a fixed wait — it returns the instant every probe is green.
|
||||
local deadline=$(( $(date +%s) + ${ARCHY_SETTLE_SECS:-300} ))
|
||||
while (( $(date +%s) < deadline )); do
|
||||
local ok=1
|
||||
# mempool-api + frontend + bitcoin-ui = good proxies for "stack reconnected"
|
||||
@ -53,6 +58,12 @@ settle_stack() {
|
||||
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert \
|
||||
--macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon \
|
||||
--rpcserver localhost:10009 getinfo >/dev/null 2>&1 || ok=0
|
||||
# Only gate on immich where it's actually installed (heavy nodes). Its web
|
||||
# port is the same signal test 64 checks, so settling here keeps the next
|
||||
# iteration's read-only immich probe from racing a still-recovering stack.
|
||||
if podman container exists immich_server 2>/dev/null; then
|
||||
curl -fsS -m 4 -o /dev/null "http://127.0.0.1:2283/" 2>/dev/null || ok=0
|
||||
fi
|
||||
(( ok == 1 )) && { echo " (stack settled)"; return 0; }
|
||||
sleep 4
|
||||
done
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user