Compare commits
3 Commits
a721532f55
...
43e700498b
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43e700498b | ||
|
|
89d397bb74 | ||
|
|
41e7f500f8 |
@ -323,6 +323,26 @@ fun WebViewScreen(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Node apps (e.g. NetBird) terminate TLS with a
|
||||||
|
// self-signed cert — the dashboard needs a secure
|
||||||
|
// context for OIDC/window.crypto.subtle (#15). The
|
||||||
|
// WebView default is to CANCEL untrusted certs, so
|
||||||
|
// those apps render blank. The user explicitly trusts
|
||||||
|
// their own node, so proceed for same-host certs only;
|
||||||
|
// reject anything else (don't blanket-trust the web).
|
||||||
|
override fun onReceivedSslError(
|
||||||
|
view: WebView?,
|
||||||
|
handler: android.webkit.SslErrorHandler?,
|
||||||
|
error: android.net.http.SslError?,
|
||||||
|
) {
|
||||||
|
val u = error?.url
|
||||||
|
if (u != null && isSameHost(u, serverUrl)) {
|
||||||
|
handler?.proceed()
|
||||||
|
} else {
|
||||||
|
handler?.cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
override fun shouldOverrideUrlLoading(
|
override fun shouldOverrideUrlLoading(
|
||||||
view: WebView?,
|
view: WebView?,
|
||||||
request: WebResourceRequest?,
|
request: WebResourceRequest?,
|
||||||
@ -553,6 +573,23 @@ private fun InAppBrowser(
|
|||||||
canGoForward = view?.canGoForward() == true
|
canGoForward = view?.canGoForward() == true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Self-signed TLS on the node's apps (e.g. NetBird on
|
||||||
|
// :8087) would otherwise be cancelled by the WebView
|
||||||
|
// and render blank. Proceed for the user's own node
|
||||||
|
// (same host); reject any other untrusted cert.
|
||||||
|
override fun onReceivedSslError(
|
||||||
|
view: WebView?,
|
||||||
|
handler: android.webkit.SslErrorHandler?,
|
||||||
|
error: android.net.http.SslError?,
|
||||||
|
) {
|
||||||
|
val u = error?.url
|
||||||
|
if (u != null && isSameHost(u, serverUrl)) {
|
||||||
|
handler?.proceed()
|
||||||
|
} else {
|
||||||
|
handler?.cancel()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
override fun shouldOverrideUrlLoading(
|
override fun shouldOverrideUrlLoading(
|
||||||
view: WebView?,
|
view: WebView?,
|
||||||
request: WebResourceRequest?,
|
request: WebResourceRequest?,
|
||||||
|
|||||||
@ -6,7 +6,6 @@
|
|||||||
use crate::api::rpc::RpcHandler;
|
use crate::api::rpc::RpcHandler;
|
||||||
use crate::data_model::InstallPhase;
|
use crate::data_model::InstallPhase;
|
||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result};
|
||||||
use base64::Engine;
|
|
||||||
use std::process::Output;
|
use std::process::Output;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
@ -725,10 +724,6 @@ fn indeedhub_stack_app_ids() -> &'static [&'static str] {
|
|||||||
|
|
||||||
const REGISTRY: &str = "146.59.87.168:3000/lfg2025";
|
const REGISTRY: &str = "146.59.87.168:3000/lfg2025";
|
||||||
|
|
||||||
const NETBIRD_DASHBOARD_IMAGE: &str = "docker.io/netbirdio/dashboard:v2.38.0";
|
|
||||||
const NETBIRD_SERVER_IMAGE: &str = "docker.io/netbirdio/netbird-server:0.71.2";
|
|
||||||
const NETBIRD_PROXY_IMAGE: &str = "docker.io/library/nginx:1.27-alpine";
|
|
||||||
|
|
||||||
/// Pull an image with retry and exponential backoff (3 attempts).
|
/// Pull an image with retry and exponential backoff (3 attempts).
|
||||||
async fn pull_image_with_retry(image: &str) -> Result<()> {
|
async fn pull_image_with_retry(image: &str) -> Result<()> {
|
||||||
let exists = podman_stack_status(&["image", "exists", image], PODMAN_STACK_PROBE_TIMEOUT).await;
|
let exists = podman_stack_status(&["image", "exists", image], PODMAN_STACK_PROBE_TIMEOUT).await;
|
||||||
@ -1846,9 +1841,13 @@ impl RpcHandler {
|
|||||||
// host facts + the netbird-net gateway. The manifests use the exact live
|
// host facts + the netbird-net gateway. The manifests use the exact live
|
||||||
// container names, so on an existing node this ADOPTS the running stack
|
// container names, so on an existing node this ADOPTS the running stack
|
||||||
// rather than recreating it (the sqlite store + base64 keys are
|
// rather than recreating it (the sqlite store + base64 keys are
|
||||||
// preserved — ensure_generated_secrets no-ops on existing files). Falls
|
// preserved — ensure_generated_secrets no-ops on existing files).
|
||||||
// back to the legacy installer below only when the orchestrator doesn't
|
//
|
||||||
// know these app_ids (manifests not yet deployed to the node).
|
// #20 ph4: the legacy hardcoded `podman run` installer was DELETED — the
|
||||||
|
// signed catalog always ships apps/netbird-*/manifest.yml, so there is no
|
||||||
|
// in-Rust fallback. If the orchestrator doesn't know these app_ids and no
|
||||||
|
// running stack exists to adopt, install errors rather than silently
|
||||||
|
// diverging from the manifest contract.
|
||||||
if let Some(orchestrated) =
|
if let Some(orchestrated) =
|
||||||
install_stack_via_orchestrator(self, "netbird", netbird_stack_app_ids()).await?
|
install_stack_via_orchestrator(self, "netbird", netbird_stack_app_ids()).await?
|
||||||
{
|
{
|
||||||
@ -1865,491 +1864,12 @@ impl RpcHandler {
|
|||||||
return Ok(adopted);
|
return Ok(adopted);
|
||||||
}
|
}
|
||||||
|
|
||||||
install_log("INSTALL START: netbird stack (dashboard + server)").await;
|
anyhow::bail!(
|
||||||
info!("Installing self-hosted NetBird stack");
|
"netbird manifests not available on this node — the signed catalog must provide apps/netbird-*/manifest.yml (legacy hardcoded installer removed in #20 ph4)"
|
||||||
|
|
||||||
self.set_install_phase("netbird", InstallPhase::PullingImage)
|
|
||||||
.await;
|
|
||||||
for (i, image) in [
|
|
||||||
NETBIRD_DASHBOARD_IMAGE,
|
|
||||||
NETBIRD_SERVER_IMAGE,
|
|
||||||
NETBIRD_PROXY_IMAGE,
|
|
||||||
]
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
{
|
|
||||||
self.set_install_progress("netbird", i as u64, 3).await;
|
|
||||||
pull_image_with_retry(image)
|
|
||||||
.await
|
|
||||||
.with_context(|| format!("Failed to pull NetBird image: {}", image))?;
|
|
||||||
}
|
|
||||||
self.set_install_progress("netbird", 3, 3).await;
|
|
||||||
|
|
||||||
for name in ["netbird", "netbird-dashboard", "netbird-server"] {
|
|
||||||
let _ = podman_stack_status(&["rm", "-f", name], PODMAN_STACK_PROBE_TIMEOUT).await;
|
|
||||||
}
|
|
||||||
let _ = podman_stack_status(
|
|
||||||
&["network", "rm", "-f", "netbird-net"],
|
|
||||||
PODMAN_STACK_PROBE_TIMEOUT,
|
|
||||||
)
|
)
|
||||||
.await;
|
|
||||||
|
|
||||||
self.set_install_phase("netbird", InstallPhase::CreatingContainer)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
tokio::fs::create_dir_all("/var/lib/archipelago/netbird/data")
|
|
||||||
.await
|
|
||||||
.context("Failed to create NetBird data directory")?;
|
|
||||||
|
|
||||||
let host_ip = detect_netbird_public_host_ip()
|
|
||||||
.await
|
|
||||||
.unwrap_or_else(|| self.config.host_ip.clone());
|
|
||||||
|
|
||||||
// Create the network FIRST so we can read back the gateway it was
|
|
||||||
// assigned — that gateway is Podman's aardvark DNS, which the proxy's
|
|
||||||
// nginx needs as an explicit `resolver` to re-resolve container names
|
|
||||||
// (issue #15: without it nginx caches a container IP and 502s forever
|
|
||||||
// once that IP changes on restart/reboot).
|
|
||||||
let _ = podman_stack_status(
|
|
||||||
&["network", "create", "netbird-net"],
|
|
||||||
PODMAN_STACK_PROBE_TIMEOUT,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
let resolver_ip = netbird_net_resolver_ip().await;
|
|
||||||
write_netbird_config_files(&host_ip, &self.config.host_ip, &resolver_ip).await?;
|
|
||||||
ensure_netbird_tls_cert(&host_ip).await?;
|
|
||||||
|
|
||||||
let mut server_cmd = tokio::process::Command::new("podman");
|
|
||||||
server_cmd.args([
|
|
||||||
"run",
|
|
||||||
"-d",
|
|
||||||
"--name",
|
|
||||||
"netbird-server",
|
|
||||||
"--network",
|
|
||||||
"netbird-net",
|
|
||||||
"--network-alias",
|
|
||||||
"netbird-server",
|
|
||||||
"--restart=unless-stopped",
|
|
||||||
"-p",
|
|
||||||
"8086:80",
|
|
||||||
"-p",
|
|
||||||
"3478:3478/udp",
|
|
||||||
"-v",
|
|
||||||
"/var/lib/archipelago/netbird/data:/var/lib/netbird",
|
|
||||||
"-v",
|
|
||||||
"/var/lib/archipelago/netbird/config.yaml:/etc/netbird/config.yaml:ro",
|
|
||||||
NETBIRD_SERVER_IMAGE,
|
|
||||||
"--config",
|
|
||||||
"/etc/netbird/config.yaml",
|
|
||||||
]);
|
|
||||||
run_required_stack_command("netbird", "create server", &mut server_cmd).await?;
|
|
||||||
|
|
||||||
self.set_install_phase("netbird", InstallPhase::StartingContainer)
|
|
||||||
.await;
|
|
||||||
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
|
||||||
|
|
||||||
let mut dashboard_cmd = tokio::process::Command::new("podman");
|
|
||||||
dashboard_cmd.args([
|
|
||||||
"run",
|
|
||||||
"-d",
|
|
||||||
"--name",
|
|
||||||
"netbird-dashboard",
|
|
||||||
"--network",
|
|
||||||
"netbird-net",
|
|
||||||
// Explicit alias so the proxy can always resolve `netbird-dashboard`
|
|
||||||
// via Podman DNS — don't rely on implicit container-name aliasing.
|
|
||||||
"--network-alias",
|
|
||||||
"netbird-dashboard",
|
|
||||||
"--restart=unless-stopped",
|
|
||||||
"--env-file",
|
|
||||||
"/var/lib/archipelago/netbird/dashboard.env",
|
|
||||||
NETBIRD_DASHBOARD_IMAGE,
|
|
||||||
]);
|
|
||||||
run_required_stack_command("netbird", "create dashboard", &mut dashboard_cmd).await?;
|
|
||||||
|
|
||||||
let mut proxy_cmd = tokio::process::Command::new("podman");
|
|
||||||
proxy_cmd.args([
|
|
||||||
"run",
|
|
||||||
"-d",
|
|
||||||
"--name",
|
|
||||||
"netbird",
|
|
||||||
"--network",
|
|
||||||
"netbird-net",
|
|
||||||
"--restart=unless-stopped",
|
|
||||||
// 8087 publishes the TLS listener — netbird's dashboard requires a
|
|
||||||
// secure context (window.crypto.subtle / OIDC PKCE), issue #15.
|
|
||||||
"-p",
|
|
||||||
"8087:443",
|
|
||||||
"-v",
|
|
||||||
"/var/lib/archipelago/netbird/nginx.conf:/etc/nginx/conf.d/default.conf:ro",
|
|
||||||
"-v",
|
|
||||||
"/var/lib/archipelago/netbird/tls.crt:/etc/nginx/tls.crt:ro",
|
|
||||||
"-v",
|
|
||||||
"/var/lib/archipelago/netbird/tls.key:/etc/nginx/tls.key:ro",
|
|
||||||
NETBIRD_PROXY_IMAGE,
|
|
||||||
]);
|
|
||||||
run_required_stack_command("netbird", "create unified proxy", &mut proxy_cmd).await?;
|
|
||||||
|
|
||||||
wait_for_stack_containers(
|
|
||||||
"netbird",
|
|
||||||
&["netbird-server", "netbird-dashboard", "netbird"],
|
|
||||||
60,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
self.set_install_phase("netbird", InstallPhase::WaitingHealthy)
|
|
||||||
.await;
|
|
||||||
// Containers being "running" is NOT the same as the embedded OIDC
|
|
||||||
// provider being ready (#10). The dashboard SPA opens right after install
|
|
||||||
// and, if it loads before /oauth2/.well-known is served, caches a bad
|
|
||||||
// auth state — the user appears logged-in but can't log out until it
|
|
||||||
// self-corrects. Wait (best-effort) for OIDC discovery to answer before
|
|
||||||
// we report Done, so the first dashboard load sees a ready provider.
|
|
||||||
wait_for_netbird_oidc_ready(Duration::from_secs(60)).await;
|
|
||||||
|
|
||||||
self.set_install_phase("netbird", InstallPhase::PostInstall)
|
|
||||||
.await;
|
|
||||||
self.set_install_phase("netbird", InstallPhase::Done).await;
|
|
||||||
self.clear_install_progress("netbird").await;
|
|
||||||
|
|
||||||
install_log("INSTALL OK: netbird stack").await;
|
|
||||||
info!("NetBird stack installed");
|
|
||||||
Ok(serde_json::json!({
|
|
||||||
"success": true,
|
|
||||||
"package_id": "netbird",
|
|
||||||
"message": "NetBird self-hosted stack installed",
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Best-effort wait for NetBird's embedded OIDC provider to start serving its
|
|
||||||
/// discovery document. The management server publishes 8086:80 on the host and
|
|
||||||
/// is the issuer at `/oauth2`, so its `.well-known/openid-configuration` is the
|
|
||||||
/// signal that the dashboard's login/logout flow will work. Polls until a 2xx
|
|
||||||
/// or the timeout — NEVER fails the install (the stack is already running; this
|
|
||||||
/// only narrows the post-install race window in #10).
|
|
||||||
async fn wait_for_netbird_oidc_ready(timeout: Duration) {
|
|
||||||
let url = "http://127.0.0.1:8086/oauth2/.well-known/openid-configuration";
|
|
||||||
let client = match reqwest::Client::builder()
|
|
||||||
.timeout(Duration::from_secs(5))
|
|
||||||
.build()
|
|
||||||
{
|
|
||||||
Ok(c) => c,
|
|
||||||
Err(_) => return,
|
|
||||||
};
|
|
||||||
let deadline = tokio::time::Instant::now() + timeout;
|
|
||||||
loop {
|
|
||||||
if let Ok(resp) = client.get(url).send().await {
|
|
||||||
if resp.status().is_success() {
|
|
||||||
info!("NetBird OIDC discovery is ready");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if tokio::time::Instant::now() >= deadline {
|
|
||||||
info!("NetBird OIDC discovery not ready within timeout — proceeding anyway");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn read_or_generate_b64_secret(name: &str) -> String {
|
|
||||||
let path = format!("/var/lib/archipelago/secrets/{}", name);
|
|
||||||
if let Ok(val) = tokio::fs::read_to_string(&path).await {
|
|
||||||
let trimmed = val.trim().to_string();
|
|
||||||
if !trimmed.is_empty() {
|
|
||||||
return trimmed;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let mut buf = [0u8; 32];
|
|
||||||
rand::RngCore::fill_bytes(&mut rand::rngs::OsRng, &mut buf);
|
|
||||||
let secret = base64::engine::general_purpose::STANDARD.encode(buf);
|
|
||||||
let _ = tokio::fs::create_dir_all("/var/lib/archipelago/secrets").await;
|
|
||||||
let _ = tokio::fs::write(&path, &secret).await;
|
|
||||||
secret
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Read the gateway of the `netbird-net` bridge. Podman runs its aardvark DNS
|
|
||||||
/// resolver on this address, so nginx can use it as an explicit `resolver` to
|
|
||||||
/// re-resolve container names at request time. Falls back to Podman's usual
|
|
||||||
/// first-pool gateway if the inspect fails (best effort — config is rewritten
|
|
||||||
/// on every (re)install).
|
|
||||||
async fn netbird_net_resolver_ip() -> String {
|
|
||||||
let out = tokio::process::Command::new("podman")
|
|
||||||
.args([
|
|
||||||
"network",
|
|
||||||
"inspect",
|
|
||||||
"netbird-net",
|
|
||||||
"--format",
|
|
||||||
"{{range .Subnets}}{{.Gateway}}{{end}}",
|
|
||||||
])
|
|
||||||
.output()
|
|
||||||
.await;
|
|
||||||
if let Ok(o) = out {
|
|
||||||
let gw = String::from_utf8_lossy(&o.stdout).trim().to_string();
|
|
||||||
if !gw.is_empty() && gw.parse::<std::net::IpAddr>().is_ok() {
|
|
||||||
return gw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"10.89.0.1".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate a self-signed TLS cert for the netbird proxy if absent. The
|
|
||||||
/// dashboard needs a secure context (window.crypto.subtle / OIDC PKCE), so the
|
|
||||||
/// proxy serves HTTPS; a self-signed cert is sufficient (the user accepts it
|
|
||||||
/// once when opening netbird in a tab). SAN covers the LAN IP plus
|
|
||||||
/// localhost/127.0.0.1 so it's valid however the box is reached locally.
|
|
||||||
async fn ensure_netbird_tls_cert(host_ip: &str) -> Result<()> {
|
|
||||||
let dir = "/var/lib/archipelago/netbird";
|
|
||||||
let crt = format!("{dir}/tls.crt");
|
|
||||||
let key = format!("{dir}/tls.key");
|
|
||||||
if tokio::fs::metadata(&crt).await.is_ok() && tokio::fs::metadata(&key).await.is_ok() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let _ = tokio::fs::create_dir_all(dir).await;
|
|
||||||
let san = format!("subjectAltName=IP:{host_ip},IP:127.0.0.1,DNS:localhost");
|
|
||||||
let status = tokio::process::Command::new("openssl")
|
|
||||||
.args([
|
|
||||||
"req",
|
|
||||||
"-x509",
|
|
||||||
"-newkey",
|
|
||||||
"rsa:2048",
|
|
||||||
"-nodes",
|
|
||||||
"-keyout",
|
|
||||||
&key,
|
|
||||||
"-out",
|
|
||||||
&crt,
|
|
||||||
"-days",
|
|
||||||
"3650",
|
|
||||||
"-subj",
|
|
||||||
&format!("/CN={host_ip}"),
|
|
||||||
"-addext",
|
|
||||||
&san,
|
|
||||||
])
|
|
||||||
.status()
|
|
||||||
.await
|
|
||||||
.context("failed to run openssl for netbird TLS cert")?;
|
|
||||||
if !status.success() {
|
|
||||||
anyhow::bail!("openssl failed to generate netbird TLS cert");
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn write_netbird_config_files(host_ip: &str, lan_ip: &str, resolver_ip: &str) -> Result<()> {
|
|
||||||
// netbird's dashboard uses window.crypto.subtle (OIDC PKCE), which browsers
|
|
||||||
// only expose in a SECURE context — so the proxy serves HTTPS and every
|
|
||||||
// origin here is https (issue #15: over plain http the dashboard threw
|
|
||||||
// "window.crypto.subtle is unavailable" and never reached login).
|
|
||||||
let public_origin = format!("https://{}:8087", host_ip);
|
|
||||||
let server_origin = format!("http://{}:8086", host_ip);
|
|
||||||
// A single box is reached via several addresses. Allow the OIDC login flow
|
|
||||||
// to redirect back to whichever origin the user actually used, otherwise
|
|
||||||
// post-login lands on the wrong host and the dashboard shows
|
|
||||||
// "Unauthenticated" (issue #15). The browser-side CORS is handled in the
|
|
||||||
// nginx proxy; this covers the redirect-URI allow-list.
|
|
||||||
let lan_origin = format!("https://{}:8087", lan_ip);
|
|
||||||
let mut redirect_origins = vec![public_origin.clone()];
|
|
||||||
if lan_origin != public_origin {
|
|
||||||
redirect_origins.push(lan_origin);
|
|
||||||
}
|
|
||||||
let dashboard_redirect_uris = redirect_origins
|
|
||||||
.iter()
|
|
||||||
.flat_map(|o| {
|
|
||||||
[
|
|
||||||
format!(" - \"{o}/nb-auth\""),
|
|
||||||
format!(" - \"{o}/nb-silent-auth\""),
|
|
||||||
]
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("\n");
|
|
||||||
let dashboard_logout_uris = redirect_origins
|
|
||||||
.iter()
|
|
||||||
.map(|o| format!(" - \"{o}/\""))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("\n");
|
|
||||||
let relay_secret = read_or_generate_b64_secret("netbird-relay-auth-secret").await;
|
|
||||||
let encryption_key = read_or_generate_b64_secret("netbird-store-encryption-key").await;
|
|
||||||
let config = format!(
|
|
||||||
r#"server:
|
|
||||||
listenAddress: ":80"
|
|
||||||
exposedAddress: "{public_origin}"
|
|
||||||
stunPorts:
|
|
||||||
- 3478
|
|
||||||
metricsPort: 9090
|
|
||||||
healthcheckAddress: ":9000"
|
|
||||||
logLevel: "info"
|
|
||||||
logFile: "console"
|
|
||||||
authSecret: "{relay_secret}"
|
|
||||||
dataDir: "/var/lib/netbird"
|
|
||||||
auth:
|
|
||||||
issuer: "{public_origin}/oauth2"
|
|
||||||
localAuthDisabled: false
|
|
||||||
signKeyRefreshEnabled: false
|
|
||||||
dashboardRedirectURIs:
|
|
||||||
{dashboard_redirect_uris}
|
|
||||||
dashboardPostLogoutRedirectURIs:
|
|
||||||
{dashboard_logout_uris}
|
|
||||||
cliRedirectURIs:
|
|
||||||
- "http://localhost:53000/"
|
|
||||||
store:
|
|
||||||
engine: "sqlite"
|
|
||||||
encryptionKey: "{encryption_key}"
|
|
||||||
"#
|
|
||||||
);
|
|
||||||
tokio::fs::write("/var/lib/archipelago/netbird/config.yaml", config)
|
|
||||||
.await
|
|
||||||
.context("Failed to write NetBird config.yaml")?;
|
|
||||||
|
|
||||||
let dashboard_env = format!(
|
|
||||||
r#"NETBIRD_MGMT_API_ENDPOINT={public_origin}
|
|
||||||
NETBIRD_MGMT_GRPC_API_ENDPOINT={public_origin}
|
|
||||||
AUTH_AUDIENCE=netbird-dashboard
|
|
||||||
AUTH_CLIENT_ID=netbird-dashboard
|
|
||||||
AUTH_CLIENT_SECRET=
|
|
||||||
AUTH_AUTHORITY={public_origin}/oauth2
|
|
||||||
USE_AUTH0=false
|
|
||||||
AUTH_SUPPORTED_SCOPES=openid profile email groups
|
|
||||||
AUTH_REDIRECT_URI=/nb-auth
|
|
||||||
AUTH_SILENT_REDIRECT_URI=/nb-silent-auth
|
|
||||||
NETBIRD_TOKEN_SOURCE=idToken
|
|
||||||
NGINX_SSL_PORT=443
|
|
||||||
LETSENCRYPT_DOMAIN=none
|
|
||||||
"#
|
|
||||||
);
|
|
||||||
tokio::fs::write("/var/lib/archipelago/netbird/dashboard.env", dashboard_env)
|
|
||||||
.await
|
|
||||||
.context("Failed to write NetBird dashboard.env")?;
|
|
||||||
|
|
||||||
let nginx_conf = format!(
|
|
||||||
r#"server {{
|
|
||||||
listen 443 ssl;
|
|
||||||
server_name _;
|
|
||||||
|
|
||||||
# netbird's dashboard needs a secure context (window.crypto.subtle for OIDC
|
|
||||||
# PKCE), so the proxy terminates TLS with a self-signed cert (issue #15).
|
|
||||||
ssl_certificate /etc/nginx/tls.crt;
|
|
||||||
ssl_certificate_key /etc/nginx/tls.key;
|
|
||||||
|
|
||||||
# Rootless Podman can hand a container a new IP across restarts/reboots.
|
|
||||||
# nginx resolves a literal upstream name ONCE at startup and caches it, so
|
|
||||||
# after the IP moves every request 502s with "host unreachable" (issue #15,
|
|
||||||
# observed live on .198: nginx pinned to a dead netbird-dashboard IP). Fix:
|
|
||||||
# point `resolver` at the netbird-net gateway (Podman's aardvark DNS) and
|
|
||||||
# use VARIABLE upstreams, which forces nginx to re-resolve the container
|
|
||||||
# names at request time. Everything is reached container-to-container by
|
|
||||||
# name so nothing depends on host-published ports either.
|
|
||||||
resolver {resolver_ip} valid=10s ipv6=off;
|
|
||||||
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_set_header X-Real-IP $remote_addr;
|
|
||||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
||||||
proxy_set_header X-Forwarded-Proto $scheme;
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
|
|
||||||
location ~ ^/(relay|ws-proxy/) {{
|
|
||||||
set $nb_server netbird-server;
|
|
||||||
proxy_pass http://$nb_server:80;
|
|
||||||
proxy_set_header Upgrade $http_upgrade;
|
|
||||||
proxy_set_header Connection "upgrade";
|
|
||||||
proxy_read_timeout 1d;
|
|
||||||
}}
|
|
||||||
|
|
||||||
location ~ ^/(api|oauth2)(/|$) {{
|
|
||||||
# The dashboard is a SPA whose API/OIDC base URL is baked at build time
|
|
||||||
# to one host:port. A single box is reached via several addresses (LAN
|
|
||||||
# IP, Tailscale 100.x, hostname), so those fetches are cross-origin and
|
|
||||||
# the browser blocks them with no Access-Control-Allow-Origin (issue
|
|
||||||
# #15, observed live on .198). Reflect the caller's Origin so the
|
|
||||||
# self-hosted management/OIDC API is reachable from any of them, and
|
|
||||||
# answer the CORS preflight here.
|
|
||||||
if ($request_method = OPTIONS) {{
|
|
||||||
add_header Access-Control-Allow-Origin $http_origin always;
|
|
||||||
add_header Access-Control-Allow-Credentials true always;
|
|
||||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
|
||||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
|
||||||
add_header Access-Control-Max-Age 86400 always;
|
|
||||||
add_header Content-Length 0;
|
|
||||||
return 204;
|
|
||||||
}}
|
|
||||||
add_header Access-Control-Allow-Origin $http_origin always;
|
|
||||||
add_header Access-Control-Allow-Credentials true always;
|
|
||||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
|
||||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
|
||||||
set $nb_server netbird-server;
|
|
||||||
proxy_pass http://$nb_server:80;
|
|
||||||
}}
|
|
||||||
|
|
||||||
location ~ ^/(signalexchange\.SignalExchange|management\.ManagementService|management\.ProxyService)/ {{
|
|
||||||
set $nb_server netbird-server;
|
|
||||||
grpc_pass grpc://$nb_server:80;
|
|
||||||
grpc_read_timeout 1d;
|
|
||||||
grpc_send_timeout 1d;
|
|
||||||
}}
|
|
||||||
|
|
||||||
# OIDC callback routes are client-side SPA routes with NO prebuilt page in
|
|
||||||
# the dashboard bundle, so proxying them straight through 404s — which
|
|
||||||
# crashes the dashboard's auth init and shows "Unauthenticated" with dead
|
|
||||||
# buttons (issue #15, confirmed live on .198: /nb-auth + /nb-silent-auth
|
|
||||||
# returned 404). Serve the dashboard's index.html at these paths (URL
|
|
||||||
# unchanged) so react-oidc boots and completes the login / silent-SSO.
|
|
||||||
location ~ ^/(nb-auth|nb-silent-auth) {{
|
|
||||||
set $nb_dashboard netbird-dashboard;
|
|
||||||
rewrite ^.*$ /index.html break;
|
|
||||||
proxy_pass http://$nb_dashboard:80;
|
|
||||||
}}
|
|
||||||
|
|
||||||
location / {{
|
|
||||||
set $nb_dashboard netbird-dashboard;
|
|
||||||
proxy_pass http://$nb_dashboard:80;
|
|
||||||
}}
|
|
||||||
}}
|
|
||||||
|
|
||||||
# Direct server remains available for diagnostics at {server_origin}.
|
|
||||||
"#
|
|
||||||
);
|
|
||||||
tokio::fs::write("/var/lib/archipelago/netbird/nginx.conf", nginx_conf)
|
|
||||||
.await
|
|
||||||
.context("Failed to write NetBird nginx.conf")?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn detect_netbird_public_host_ip() -> Option<String> {
|
|
||||||
let output = tokio::process::Command::new("hostname")
|
|
||||||
.args(["-I"])
|
|
||||||
.output()
|
|
||||||
.await
|
|
||||||
.ok()?;
|
|
||||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
||||||
let ips: Vec<&str> = stdout
|
|
||||||
.split_whitespace()
|
|
||||||
.filter(|s| s.contains('.'))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Prefer the LAN address as the canonical origin — that's what users browse
|
|
||||||
// to on the local network. Baking the Tailscale 100.x address here broke
|
|
||||||
// LAN access with cross-origin/redirect mismatches (issue #15). Tailscale
|
|
||||||
// (100.64.0.0/10 CGNAT) is only a fallback for nodes with no LAN IP.
|
|
||||||
let is_private_lan = |ip: &str| {
|
|
||||||
ip.starts_with("192.168.")
|
|
||||||
|| ip.starts_with("10.")
|
|
||||||
|| (ip.starts_with("172.")
|
|
||||||
&& ip
|
|
||||||
.split('.')
|
|
||||||
.nth(1)
|
|
||||||
.and_then(|o| o.parse::<u8>().ok())
|
|
||||||
.map(|o| (16..=31).contains(&o))
|
|
||||||
.unwrap_or(false))
|
|
||||||
};
|
|
||||||
if let Some(lan) = ips.iter().find(|ip| is_private_lan(ip)) {
|
|
||||||
return Some(lan.to_string());
|
|
||||||
}
|
|
||||||
ips.iter()
|
|
||||||
.find(|ip| ip.starts_with("100."))
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{btcpay_stack_app_ids, mempool_stack_app_ids};
|
use super::{btcpay_stack_app_ids, mempool_stack_app_ids};
|
||||||
|
|||||||
@ -2964,7 +2964,8 @@ impl ProdContainerOrchestrator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The gateway IP of the app's podman network — aardvark's DNS resolver
|
/// The gateway IP of the app's podman network — aardvark's DNS resolver
|
||||||
/// address. Mirrors the legacy `netbird_net_resolver_ip`; falls back to
|
/// address. (Generalised from the old per-app netbird resolver helper,
|
||||||
|
/// deleted in #20 ph4.) Falls back to
|
||||||
/// podman's usual first-pool gateway if the inspect can't be parsed (the
|
/// podman's usual first-pool gateway if the inspect can't be parsed (the
|
||||||
/// network was just ensured to exist, so this is a belt-and-braces default).
|
/// network was just ensured to exist, so this is a belt-and-braces default).
|
||||||
async fn network_gateway(&self, manifest: &AppManifest) -> Result<String> {
|
async fn network_gateway(&self, manifest: &AppManifest) -> Result<String> {
|
||||||
@ -3004,8 +3005,8 @@ impl ProdContainerOrchestrator {
|
|||||||
/// entry whose crt+key already exist (idempotent / data-preserving). CN and
|
/// entry whose crt+key already exist (idempotent / data-preserving). CN and
|
||||||
/// SAN templates are rendered against host facts; when omitted they default
|
/// SAN templates are rendered against host facts; when omitted they default
|
||||||
/// to the node's host IP plus `127.0.0.1`/`localhost` so the cert is valid
|
/// to the node's host IP plus `127.0.0.1`/`localhost` so the cert is valid
|
||||||
/// however the box is reached locally. Mirrors the legacy
|
/// however the box is reached locally. (Generalised from the old per-app
|
||||||
/// `ensure_netbird_tls_cert` (rsa:2048, 10-year, no per-app Rust).
|
/// netbird TLS helper, deleted in #20 ph4: rsa:2048, 10-year, no per-app Rust.)
|
||||||
async fn ensure_manifest_certs(&self, manifest: &AppManifest) -> Result<()> {
|
async fn ensure_manifest_certs(&self, manifest: &AppManifest) -> Result<()> {
|
||||||
let facts = self.detect_host_facts();
|
let facts = self.detect_host_facts();
|
||||||
let render = |s: &str| {
|
let render = |s: &str| {
|
||||||
|
|||||||
@ -52,7 +52,12 @@ teardown_file() {
|
|||||||
# health-monitor bounce during the read-only tier). A genuinely unexposed
|
# health-monitor bounce during the read-only tier). A genuinely unexposed
|
||||||
# immich never publishes 2283, so this still catches real port drift; it only
|
# immich never publishes 2283, so this still catches real port drift; it only
|
||||||
# absorbs the transient null seen under churn.
|
# absorbs the transient null seen under churn.
|
||||||
local deadline=$(( $(date +%s) + 30 ))
|
# 90s (not 30s): the immich stack (postgres→redis→server with DB migrations on
|
||||||
|
# boot) can take >30s to publish its host port after a churn-induced recreate,
|
||||||
|
# and the destructive-tier immich tests already allow 180–240s for the same
|
||||||
|
# stack. A genuinely unexposed immich still never publishes 2283, so this keeps
|
||||||
|
# catching real port drift while tolerating slow-but-healthy boots.
|
||||||
|
local deadline=$(( $(date +%s) + 90 ))
|
||||||
while (( $(date +%s) < deadline )); do
|
while (( $(date +%s) < deadline )); do
|
||||||
run rpc_result container-list
|
run rpc_result container-list
|
||||||
[ "$status" -eq 0 ]
|
[ "$status" -eq 0 ]
|
||||||
@ -62,7 +67,7 @@ teardown_file() {
|
|||||||
fi
|
fi
|
||||||
sleep 3
|
sleep 3
|
||||||
done
|
done
|
||||||
echo "immich never reported a lan_address containing 2283 within 30s" >&2
|
echo "immich never reported a lan_address containing 2283 within 90s" >&2
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -75,12 +75,24 @@ mempool_skip_if_absent() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@test "no orphan mempool-related containers beyond the known set" {
|
@test "no orphan mempool-related containers beyond the known set" {
|
||||||
local total known
|
# Poll for steady state (don't single-shot): a stack restart in a prior tier
|
||||||
total=$(podman ps -a --format '{{.Names}}' \
|
# briefly leaves a recreated member visible alongside its replacement, so a
|
||||||
| grep -Ec '^(mempool|archy-mempool)' || true)
|
# one-shot count can momentarily see total>known even though the reconciler
|
||||||
known=$(podman ps -a --format '{{.Names}}' \
|
# converges within seconds. A genuine orphan never clears, so this still
|
||||||
| grep -Ec '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' || true)
|
# catches it — it just tolerates the transient recreate window.
|
||||||
[ "$total" -eq "$known" ]
|
local total known deadline=$(( $(date +%s) + 30 ))
|
||||||
|
while (( $(date +%s) < deadline )); do
|
||||||
|
total=$(podman ps -a --format '{{.Names}}' \
|
||||||
|
| grep -Ec '^(mempool|archy-mempool)' || true)
|
||||||
|
known=$(podman ps -a --format '{{.Names}}' \
|
||||||
|
| grep -Ec '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' || true)
|
||||||
|
[ "$total" -eq "$known" ] && return 0
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
echo "orphan mempool container persisted >30s (total=$total known=$known):" >&2
|
||||||
|
podman ps -a --format '{{.Names}}' | grep -E '^(mempool|archy-mempool)' \
|
||||||
|
| grep -vE '^(mempool|mempool-api|archy-mempool-db|archy-mempool-web)$' >&2 || true
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
@ -44,7 +44,12 @@ start=$(date +%s)
|
|||||||
# run — just delays up to the deadline. Disable with ARCHY_SETTLE=0.
|
# run — just delays up to the deadline. Disable with ARCHY_SETTLE=0.
|
||||||
settle_stack() {
|
settle_stack() {
|
||||||
[[ "${ARCHY_SETTLE:-1}" == "1" && "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || return 0
|
[[ "${ARCHY_SETTLE:-1}" == "1" && "${ARCHY_ALLOW_DESTRUCTIVE:-0}" == "1" ]] || return 0
|
||||||
local deadline=$(( $(date +%s) + ${ARCHY_SETTLE_SECS:-180} ))
|
# 300s (not 180s): on heavy nodes the immich stack's recovery after the prior
|
||||||
|
# iteration's archipelago-restart test (crash_recovery retries on a ~120s
|
||||||
|
# cadence) can take several minutes, and the next iteration's read-only
|
||||||
|
# lan_address probe false-fails if immich is still mid-boot. The settle is a
|
||||||
|
# cap, not a fixed wait — it returns the instant every probe is green.
|
||||||
|
local deadline=$(( $(date +%s) + ${ARCHY_SETTLE_SECS:-300} ))
|
||||||
while (( $(date +%s) < deadline )); do
|
while (( $(date +%s) < deadline )); do
|
||||||
local ok=1
|
local ok=1
|
||||||
# mempool-api + frontend + bitcoin-ui = good proxies for "stack reconnected"
|
# mempool-api + frontend + bitcoin-ui = good proxies for "stack reconnected"
|
||||||
@ -53,6 +58,12 @@ settle_stack() {
|
|||||||
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert \
|
podman exec lnd lncli --tlscertpath /root/.lnd/tls.cert \
|
||||||
--macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon \
|
--macaroonpath /root/.lnd/data/chain/bitcoin/mainnet/readonly.macaroon \
|
||||||
--rpcserver localhost:10009 getinfo >/dev/null 2>&1 || ok=0
|
--rpcserver localhost:10009 getinfo >/dev/null 2>&1 || ok=0
|
||||||
|
# Only gate on immich where it's actually installed (heavy nodes). Its web
|
||||||
|
# port is the same signal test 64 checks, so settling here keeps the next
|
||||||
|
# iteration's read-only immich probe from racing a still-recovering stack.
|
||||||
|
if podman container exists immich_server 2>/dev/null; then
|
||||||
|
curl -fsS -m 4 -o /dev/null "http://127.0.0.1:2283/" 2>/dev/null || ok=0
|
||||||
|
fi
|
||||||
(( ok == 1 )) && { echo " (stack settled)"; return 0; }
|
(( ok == 1 )) && { echo " (stack settled)"; return 0; }
|
||||||
sleep 4
|
sleep 4
|
||||||
done
|
done
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user