From a8b9b0f5e8bc7e67dbc2d6c2bb9700d2718dedf5 Mon Sep 17 00:00:00 2001 From: archipelago Date: Tue, 23 Jun 2026 13:39:53 -0400 Subject: [PATCH] feat(netbird): manifest-driven migration via reusable orchestrator primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate the netbird stack (server/dashboard/proxy) off ~500 lines of per-app Rust to 3 declarative manifests, adding 4 reusable primitives: - SecretGenKind::Base64 (netbird relay authSecret + sqlite store encryptionKey) - GeneratedCert schema + ensure_manifest_certs (self-signed TLS so the dashboard gets a secure context for OIDC PKCE — issue #15; https proxy on 8087 preserved) - templated GeneratedFile render: {{HOST_IP}}/{{HOST_MDNS}}/{{NETWORK_GATEWAY}} (aardvark resolver for the #15 stale-IP fix) /{{secret:NAME}} (never logged) - legacy create_container now honours port.protocol (3478/udp STUN) install_netbird_stack routes via the orchestrator first (legacy kept as fallback, mirroring indeedhub); launch URL derives https://{host_ip}:8087 from host facts. Legacy Rust deletion deferred to post-live-verify. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/netbird-dashboard/manifest.yml | 77 +++++++ apps/netbird-server/manifest.yml | 122 +++++++++++ apps/netbird/manifest.yml | 182 +++++++++++++++++ .../archipelago/src/api/rpc/package/stacks.rs | 27 +++ .../src/container/docker_packages.rs | 33 ++- .../src/container/prod_orchestrator.rs | 192 +++++++++++++++++- core/archipelago/src/container/secrets.rs | 10 + core/container/src/lib.rs | 5 +- core/container/src/manifest.rs | 54 ++++- core/container/src/podman_client.rs | 14 +- 10 files changed, 695 insertions(+), 21 deletions(-) create mode 100644 apps/netbird-dashboard/manifest.yml create mode 100644 apps/netbird-server/manifest.yml create mode 100644 apps/netbird/manifest.yml diff --git a/apps/netbird-dashboard/manifest.yml b/apps/netbird-dashboard/manifest.yml new file mode 100644 index 00000000..dbbb2e67 --- /dev/null +++ b/apps/netbird-dashboard/manifest.yml @@ -0,0 +1,77 @@ +app: + id: netbird-dashboard + name: NetBird Dashboard + version: "2.38.0" + description: NetBird management dashboard (SPA). Internal stack member served through the netbird proxy. + category: networking + + # Hyphen name matches runtime references + the live container (adoption). + # Alias `netbird-dashboard` is the short hostname the proxy's nginx proxies to. + container_name: netbird-dashboard + + container: + image: docker.io/netbirdio/dashboard:v2.38.0 + pull_policy: if-not-present + network: netbird-net + network_aliases: [netbird-dashboard] + # The dashboard SPA bakes its API/OIDC base URL from these at container + # start. They must point at the proxy's public HTTPS origin (8087) so the + # browser uses a secure context (window.crypto.subtle / OIDC PKCE, #15). + # {{HOST_IP}} is the node's primary host IP, resolved at apply time. + derived_env: + - key: NETBIRD_MGMT_API_ENDPOINT + template: "https://{{HOST_IP}}:8087" + - key: NETBIRD_MGMT_GRPC_API_ENDPOINT + template: "https://{{HOST_IP}}:8087" + - key: AUTH_AUTHORITY + template: "https://{{HOST_IP}}:8087/oauth2" + + dependencies: + - app_id: netbird-server + + resources: + memory_limit: 256Mi + + security: + # cap-drop=ALL is applied by the orchestrator. The dashboard image runs + # nginx (master as root, drops workers) binding :80 — needs the worker-drop + # caps + NET_BIND_SERVICE for the privileged port. + capabilities: [CHOWN, DAC_OVERRIDE, SETGID, SETUID, NET_BIND_SERVICE] + readonly_root: false + network_policy: isolated + + # Internal only — reached container-to-container by the proxy via netbird-net. + ports: [] + + volumes: [] + + environment: + - AUTH_AUDIENCE=netbird-dashboard + - AUTH_CLIENT_ID=netbird-dashboard + - AUTH_CLIENT_SECRET= + - USE_AUTH0=false + - AUTH_SUPPORTED_SCOPES=openid profile email groups + - AUTH_REDIRECT_URI=/nb-auth + - AUTH_SILENT_REDIRECT_URI=/nb-silent-auth + - NETBIRD_TOKEN_SOURCE=idToken + - NGINX_SSL_PORT=443 + - LETSENCRYPT_DOMAIN=none + + health_check: + type: tcp + endpoint: localhost:80 + interval: 30s + timeout: 5s + retries: 5 + start_period: 20s + + metadata: + author: NetBird + icon: /assets/img/app-icons/netbird.svg + website: https://netbird.io + repo: https://github.com/netbirdio/dashboard + license: BSD-3-Clause + tags: + - networking + - vpn + - dashboard diff --git a/apps/netbird-server/manifest.yml b/apps/netbird-server/manifest.yml new file mode 100644 index 00000000..cda51af9 --- /dev/null +++ b/apps/netbird-server/manifest.yml @@ -0,0 +1,122 @@ +app: + id: netbird-server + name: NetBird Server + version: "0.71.2" + description: NetBird combined management / signal / relay server with an embedded identity provider and STUN. Backend for the self-hosted NetBird mesh VPN. + category: networking + + # Hyphen name matches the runtime references (crash_recovery / dependencies / + # config startup order) + the live container, so on an existing node the + # orchestrator ADOPTS the running server rather than recreating it (data + + # the sqlite store under /var/lib/netbird preserved). Alias `netbird-server` + # is the short hostname the proxy's nginx proxies/grpc-passes to. + container_name: netbird-server + + container: + image: docker.io/netbirdio/netbird-server:0.71.2 + pull_policy: if-not-present + network: netbird-net + network_aliases: [netbird-server] + # The relay authSecret and the sqlite store encryptionKey are base64 keys + # (the server base64-decodes them to recover raw bytes — hex would decode to + # the wrong value). Generated once and reused: ensure_generated_secrets + # no-ops when the file already exists, so a re-render of config.yaml on an + # adopted node keeps the same keys (regenerating would orphan the store). + generated_secrets: + - name: netbird-relay-auth-secret + kind: base64 + - name: netbird-store-encryption-key + kind: base64 + # Pass the rendered config explicitly, mirroring the legacy `--config` arg. + custom_args: ["--config", "/etc/netbird/config.yaml"] + + dependencies: + - storage: 1Gi + + resources: + memory_limit: 1Gi + + security: + # cap-drop=ALL is applied by the orchestrator. The server binds :80 + # (management/signal/relay HTTP + gRPC) inside the container — a privileged + # port — so it needs NET_BIND_SERVICE. STUN is 3478/udp (unprivileged). + capabilities: [NET_BIND_SERVICE] + readonly_root: false + network_policy: isolated + + ports: + - host: 8086 + container: 80 + protocol: tcp # management API + embedded OIDC issuer (/oauth2) + - host: 3478 + container: 3478 + protocol: udp # STUN — must be UDP; tcp here breaks relay discovery + + volumes: + - type: bind + source: /var/lib/archipelago/netbird/data + target: /var/lib/netbird + options: [rw] + # The rendered config.yaml, read-only. Re-rendered on every reconcile from + # host facts + the base64 secrets; idempotent (stable bytes → no restart). + - type: bind + source: /var/lib/archipelago/netbird/config.yaml + target: /etc/netbird/config.yaml + options: [ro] + + environment: [] + + # The server's config. {{HOST_IP}} is the node's primary host IP (the proxy's + # public origin is https on 8087 — the dashboard needs a secure context for + # OIDC PKCE, issue #15). {{secret:...}} are read 0600 from the secrets dir. + files: + - path: /var/lib/archipelago/netbird/config.yaml + overwrite: true + content: | + server: + listenAddress: ":80" + exposedAddress: "https://{{HOST_IP}}:8087" + stunPorts: + - 3478 + metricsPort: 9090 + healthcheckAddress: ":9000" + logLevel: "info" + logFile: "console" + authSecret: "{{secret:netbird-relay-auth-secret}}" + dataDir: "/var/lib/netbird" + auth: + issuer: "https://{{HOST_IP}}:8087/oauth2" + localAuthDisabled: false + signKeyRefreshEnabled: false + dashboardRedirectURIs: + - "https://{{HOST_IP}}:8087/nb-auth" + - "https://{{HOST_IP}}:8087/nb-silent-auth" + dashboardPostLogoutRedirectURIs: + - "https://{{HOST_IP}}:8087/" + cliRedirectURIs: + - "http://localhost:53000/" + store: + engine: "sqlite" + encryptionKey: "{{secret:netbird-store-encryption-key}}" + + # TCP liveness on the management port. Binds at startup, stays green; an http + # check of /oauth2 would false-fail while the issuer warms up. + health_check: + type: tcp + endpoint: localhost:80 + interval: 30s + timeout: 5s + retries: 10 + start_period: 30s + + metadata: + author: NetBird + icon: /assets/img/app-icons/netbird.svg + website: https://netbird.io + repo: https://github.com/netbirdio/netbird + license: BSD-3-Clause + tags: + - networking + - vpn + - wireguard + - mesh diff --git a/apps/netbird/manifest.yml b/apps/netbird/manifest.yml new file mode 100644 index 00000000..6464335a --- /dev/null +++ b/apps/netbird/manifest.yml @@ -0,0 +1,182 @@ +app: + id: netbird + name: NetBird + version: "2.38.0" + description: Self-hosted WireGuard mesh VPN control plane with dashboard, embedded identity provider, management API, signal, relay, and STUN. The user-facing entry point — a TLS proxy in front of the dashboard + server. + category: networking + + # The user-facing launcher (app_id + container both "netbird", matching the + # runtime references + the live container so the orchestrator adopts it). This + # is the nginx that terminates TLS on 8087 and fans out to the dashboard + + # server by their short aliases on netbird-net. + container_name: netbird + + container: + image: docker.io/library/nginx:1.27-alpine + pull_policy: if-not-present + network: netbird-net + # Self-signed TLS cert materialised before create — the dashboard needs a + # secure context (window.crypto.subtle / OIDC PKCE, issue #15), so the proxy + # serves HTTPS. Idempotent: kept as-is when crt+key already exist (a user + # accepts it once). SAN defaults to the host IP + 127.0.0.1 + localhost. + generated_certs: + - crt: /var/lib/archipelago/netbird/tls.crt + key: /var/lib/archipelago/netbird/tls.key + + dependencies: + - app_id: netbird-server + - app_id: netbird-dashboard + - storage: 1Gi + + resources: + memory_limit: 256Mi + + security: + # cap-drop=ALL is applied by the orchestrator. nginx (master as root, drops + # workers) binds :443 — needs the worker-drop caps + NET_BIND_SERVICE. + capabilities: [CHOWN, DAC_OVERRIDE, SETGID, SETUID, NET_BIND_SERVICE] + readonly_root: false + network_policy: isolated + + ports: + # 8087 publishes the TLS listener (container :443). HTTPS is required for the + # dashboard's secure context (issue #15). + - host: 8087 + container: 443 + protocol: tcp + + volumes: + - type: bind + source: /var/lib/archipelago/netbird/nginx.conf + target: /etc/nginx/conf.d/default.conf + options: [ro] + - type: bind + source: /var/lib/archipelago/netbird/tls.crt + target: /etc/nginx/tls.crt + options: [ro] + - type: bind + source: /var/lib/archipelago/netbird/tls.key + target: /etc/nginx/tls.key + options: [ro] + + environment: [] + + # The proxy config. {{NETWORK_GATEWAY}} is the netbird-net bridge gateway = + # Podman's aardvark DNS. nginx uses it as an explicit `resolver` with VARIABLE + # upstreams so it re-resolves container names per request — without it nginx + # pins a container IP at startup and 502s forever once that IP moves on a + # restart/reboot (issue #15, observed live on .198). Every #15 fix below + # (CORS $http_origin reflect, grpc pass, nb-auth/nb-silent-auth rewrite to + # index.html, /relay websocket) is preserved verbatim from the legacy config. + files: + - path: /var/lib/archipelago/netbird/nginx.conf + overwrite: true + content: | + server { + listen 443 ssl; + server_name _; + + # netbird's dashboard needs a secure context (window.crypto.subtle for + # OIDC PKCE), so the proxy terminates TLS with a self-signed cert (#15). + ssl_certificate /etc/nginx/tls.crt; + ssl_certificate_key /etc/nginx/tls.key; + + # Rootless Podman can hand a container a new IP across restarts/reboots. + # nginx resolves a literal upstream name ONCE at startup and caches it, + # so after the IP moves every request 502s with "host unreachable" + # (issue #15, observed live on .198: nginx pinned to a dead + # netbird-dashboard IP). Fix: point `resolver` at the netbird-net + # gateway (Podman's aardvark DNS) and use VARIABLE upstreams, which + # forces nginx to re-resolve the container names at request time. + resolver {{NETWORK_GATEWAY}} valid=10s ipv6=off; + + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_http_version 1.1; + + location ~ ^/(relay|ws-proxy/) { + set $nb_server netbird-server; + proxy_pass http://$nb_server:80; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_read_timeout 1d; + } + + location ~ ^/(api|oauth2)(/|$) { + # The dashboard is a SPA whose API/OIDC base URL is baked at build + # time to one host:port. A single box is reached via several + # addresses, so those fetches are cross-origin and the browser + # blocks them with no Access-Control-Allow-Origin (#15, live on + # .198). Reflect the caller's Origin and answer the CORS preflight. + if ($request_method = OPTIONS) { + add_header Access-Control-Allow-Origin $http_origin always; + add_header Access-Control-Allow-Credentials true always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always; + add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always; + add_header Access-Control-Max-Age 86400 always; + add_header Content-Length 0; + return 204; + } + add_header Access-Control-Allow-Origin $http_origin always; + add_header Access-Control-Allow-Credentials true always; + add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always; + add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always; + set $nb_server netbird-server; + proxy_pass http://$nb_server:80; + } + + location ~ ^/(signalexchange\.SignalExchange|management\.ManagementService|management\.ProxyService)/ { + set $nb_server netbird-server; + grpc_pass grpc://$nb_server:80; + grpc_read_timeout 1d; + grpc_send_timeout 1d; + } + + # OIDC callback routes are client-side SPA routes with NO prebuilt page + # in the dashboard bundle, so proxying them straight through 404s — + # which crashes the dashboard's auth init and shows "Unauthenticated" + # with dead buttons (#15, live on .198: /nb-auth + /nb-silent-auth + # returned 404). Serve index.html at these paths (URL unchanged) so + # react-oidc boots and completes the login / silent-SSO. + location ~ ^/(nb-auth|nb-silent-auth) { + set $nb_dashboard netbird-dashboard; + rewrite ^.*$ /index.html break; + proxy_pass http://$nb_dashboard:80; + } + + location / { + set $nb_dashboard netbird-dashboard; + proxy_pass http://$nb_dashboard:80; + } + } + + health_check: + type: tcp + endpoint: localhost:443 + interval: 30s + timeout: 5s + retries: 5 + start_period: 20s + + interfaces: + main: + name: Dashboard + description: Manage your self-hosted NetBird mesh VPN + type: ui + port: 8087 + protocol: https + path: / + + metadata: + author: NetBird + icon: /assets/img/app-icons/netbird.svg + website: https://netbird.io + repo: https://github.com/netbirdio/netbird + license: BSD-3-Clause + tags: + - networking + - vpn + - wireguard + - mesh diff --git a/core/archipelago/src/api/rpc/package/stacks.rs b/core/archipelago/src/api/rpc/package/stacks.rs index ed088964..0c010098 100644 --- a/core/archipelago/src/api/rpc/package/stacks.rs +++ b/core/archipelago/src/api/rpc/package/stacks.rs @@ -696,6 +696,16 @@ fn immich_stack_app_ids() -> &'static [&'static str] { &["immich-postgres", "immich-redis", "immich"] } +fn netbird_stack_app_ids() -> &'static [&'static str] { + // Dependency/startup order: the combined management/signal/relay server + // first (it owns the base64 relay/store secrets + the sqlite store, and is + // the OIDC issuer the others point at), then the dashboard SPA, then the + // user-facing TLS proxy ("netbird", which carries the self-signed cert + + // the templated nginx.conf and is the launcher). Mirrors the netbird + // startup_order in dependencies.rs. + &["netbird-server", "netbird-dashboard", "netbird"] +} + fn indeedhub_stack_app_ids() -> &'static [&'static str] { // Dependency order: backends + their generated secrets first, then the api // (owns indeedhub-jwt; reads the db/minio secrets the backends materialised), @@ -1828,6 +1838,23 @@ impl RpcHandler { /// Install self-hosted NetBird (dashboard + combined management/signal/relay server). pub(super) async fn install_netbird_stack(&self) -> Result { + // Manifest-driven path (#20 phase 4): render the 3-member stack from + // apps/netbird-*/manifest.yml via the orchestrator — dedicated + // netbird-net + network_aliases, base64 generated_secrets, a self-signed + // TLS cert (generated_certs) so the dashboard gets a secure context for + // OIDC PKCE (#15), and templated config.yaml/nginx.conf rendered from + // host facts + the netbird-net gateway. The manifests use the exact live + // container names, so on an existing node this ADOPTS the running stack + // rather than recreating it (the sqlite store + base64 keys are + // preserved — ensure_generated_secrets no-ops on existing files). Falls + // back to the legacy installer below only when the orchestrator doesn't + // know these app_ids (manifests not yet deployed to the node). + if let Some(orchestrated) = + install_stack_via_orchestrator(self, "netbird", netbird_stack_app_ids()).await? + { + return Ok(orchestrated); + } + if let Some(adopted) = adopt_stack_if_exists( "netbird", "netbird", diff --git a/core/archipelago/src/container/docker_packages.rs b/core/archipelago/src/container/docker_packages.rs index ef927217..e6b06c65 100644 --- a/core/archipelago/src/container/docker_packages.rs +++ b/core/archipelago/src/container/docker_packages.rs @@ -691,16 +691,37 @@ fn extract_lan_address(ports: &[String]) -> Option { None } +/// netbird's dashboard launch URL: HTTPS on 8087 (the proxy terminates TLS — +/// the dashboard needs a secure context for OIDC PKCE, issue #15) at the node's +/// primary host IP so it's reachable from the LAN. Manifest-driven netbird no +/// longer writes `dashboard.env`, so this is derived from host facts (the same +/// `{{HOST_IP}}` the orchestrator bakes into the cert/config); it falls back to +/// the static localhost mapping when the host IP can't be read. URL shape is +/// identical to the legacy installer's, so the existing https reachability +/// wrapper still applies. async fn netbird_configured_launch_url() -> Option { - let env = tokio::fs::read_to_string("/var/lib/archipelago/netbird/dashboard.env") + if let Some(ip) = first_host_ip().await { + return Some(format!("https://{ip}:8087")); + } + PodmanClient::lan_address_for("netbird") +} + +/// First address from `hostname -I` — the node's primary host IP. Mirrors the +/// orchestrator's `detect_host_ip` so launch URLs match the cert/config the +/// orchestrator renders for `{{HOST_IP}}`. +async fn first_host_ip() -> Option { + let out = tokio::process::Command::new("hostname") + .arg("-I") + .output() .await .ok()?; - env.lines() - .find_map(|line| line.strip_prefix("NETBIRD_MGMT_API_ENDPOINT=")) - .map(str::trim) - .filter(|s| !s.is_empty()) + if !out.status.success() { + return None; + } + String::from_utf8_lossy(&out.stdout) + .split_whitespace() + .next() .map(ToOwned::to_owned) - .or_else(|| PodmanClient::lan_address_for("netbird")) } async fn reachable_lan_address(app_id: &str, candidate: Option) -> Option { diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 2223a020..26d9852f 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -26,7 +26,7 @@ use anyhow::{Context, Result}; use archipelago_container::{ AppManifest, ContainerRuntime as ContainerRuntimeTrait, ContainerState, ContainerStatus, - Dependency, GeneratedFile, HostFacts, ManifestError, ResolvedSource, SecretsProvider, + Dependency, HostFacts, ManifestError, ResolvedSource, SecretsProvider, }; use async_trait::async_trait; use std::collections::{HashMap, HashSet}; @@ -1809,6 +1809,9 @@ impl ProdContainerOrchestrator { self.run_pre_start_hooks(&manifest.app.id).await?; self.ensure_bind_mount_sockets(manifest).await?; self.ensure_bind_mount_dirs(manifest).await?; + // Certs before files: a templated file may not need the cert, but the + // container's bind-mounts expect both present before create_container. + self.ensure_manifest_certs(manifest).await?; self.ensure_manifest_files(manifest).await?; self.apply_data_uid(manifest).await?; self.run_post_data_uid_hooks(&manifest.app.id).await?; @@ -2750,7 +2753,14 @@ impl ProdContainerOrchestrator { async fn ensure_manifest_files(&self, manifest: &AppManifest) -> Result { let mut outcome = HookOutcome::Unchanged; for file in &manifest.app.files { - if ensure_generated_file(file) + // Render templated placeholders before comparing/writing so the + // idempotency check is against the FINAL bytes (not the template), + // otherwise a rendered file would be rewritten every reconcile. + let rendered = self + .render_file_placeholders(manifest, &file.content) + .await + .with_context(|| format!("rendering manifest file {}", file.path))?; + if ensure_rendered_file(&file.path, &rendered, file.overwrite) .await .with_context(|| format!("ensure manifest file {}", file.path))? == HookOutcome::Rewritten @@ -2760,23 +2770,185 @@ impl ProdContainerOrchestrator { } Ok(outcome) } + + /// Substitute the allow-listed placeholders a manifest `GeneratedFile` may + /// carry. Keeps runtime-derived config (netbird's `config.yaml`/`nginx.conf`) + /// declarative instead of generated by per-app Rust: + /// - `{{HOST_IP}}` / `{{HOST_MDNS}}` — host facts (`hostname -I` / `.local`). + /// - `{{NETWORK_GATEWAY}}` — the gateway of the app's podman network, i.e. + /// aardvark's DNS address. nginx uses it as an explicit `resolver` so it + /// re-resolves container names per request instead of pinning a stale IP + /// and 502-ing after a restart/reboot (issue #15). The network is ensured + /// to exist first so the gateway is readable on a fresh install (this runs + /// before `install_fresh`'s own `ensure_container_network`; both idempotent). + /// - `{{secret:NAME}}` — a `0600` secret read from the service-owned secrets + /// dir (e.g. netbird's base64 relay/store keys). NEVER logged. + async fn render_file_placeholders( + &self, + manifest: &AppManifest, + content: &str, + ) -> Result { + let mut out = content.to_string(); + if out.contains("{{HOST_IP}}") || out.contains("{{HOST_MDNS}}") { + let facts = self.detect_host_facts(); + out = out + .replace("{{HOST_IP}}", &facts.host_ip) + .replace("{{HOST_MDNS}}", &facts.host_mdns); + } + if out.contains("{{NETWORK_GATEWAY}}") { + self.ensure_container_network(manifest).await?; + let gw = self.network_gateway(manifest).await?; + out = out.replace("{{NETWORK_GATEWAY}}", &gw); + } + out = self.render_secret_placeholders(&out).await?; + Ok(out) + } + + /// Replace every `{{secret:NAME}}` with the trimmed contents of + /// `/NAME`. `NAME` must be a bare filename (the same safety bar + /// as `secret_env`). The secret value is never placed in an error or log. + async fn render_secret_placeholders(&self, content: &str) -> Result { + const OPEN: &str = "{{secret:"; + let mut out = String::with_capacity(content.len()); + let mut rest = content; + while let Some(start) = rest.find(OPEN) { + out.push_str(&rest[..start]); + let after = &rest[start + OPEN.len()..]; + let end = after + .find("}}") + .ok_or_else(|| anyhow::anyhow!("unterminated {{secret:...}} placeholder"))?; + let name = &after[..end]; + if name.is_empty() || name.contains('/') || name.contains("..") { + anyhow::bail!("invalid secret placeholder name '{name}' (must be a bare filename)"); + } + let value = tokio::fs::read_to_string(self.secrets_dir.join(name)) + .await + .map_err(|_| { + // Do not surface the path-with-value or io detail beyond the name. + anyhow::anyhow!("secret '{name}' referenced by a manifest file is missing") + })?; + out.push_str(value.trim()); + rest = &after[end + 2..]; + } + out.push_str(rest); + Ok(out) + } + + /// The gateway IP of the app's podman network — aardvark's DNS resolver + /// address. Mirrors the legacy `netbird_net_resolver_ip`; falls back to + /// podman's usual first-pool gateway if the inspect can't be parsed (the + /// network was just ensured to exist, so this is a belt-and-braces default). + async fn network_gateway(&self, manifest: &AppManifest) -> Result { + let network = manifest + .app + .container + .network + .as_deref() + .filter(|n| !n.is_empty() && !is_builtin_network_mode(n)) + .ok_or_else(|| { + anyhow::anyhow!("{{NETWORK_GATEWAY}} used but app has no dedicated network") + })?; + let out = tokio::process::Command::new("podman") + .args([ + "network", + "inspect", + network, + "--format", + "{{range .Subnets}}{{.Gateway}}{{end}}", + ]) + .output() + .await + .with_context(|| format!("inspecting podman network {network} for gateway"))?; + let gw = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if !gw.is_empty() && gw.parse::().is_ok() { + return Ok(gw); + } + tracing::warn!( + network, + "could not read network gateway; falling back to 10.89.0.1" + ); + Ok("10.89.0.1".to_string()) + } + + /// Materialise manifest-declared self-signed TLS certs before the container + /// is created (so a bind-mounted cert path resolves to a real file). Skips an + /// entry whose crt+key already exist (idempotent / data-preserving). CN and + /// SAN templates are rendered against host facts; when omitted they default + /// to the node's host IP plus `127.0.0.1`/`localhost` so the cert is valid + /// however the box is reached locally. Mirrors the legacy + /// `ensure_netbird_tls_cert` (rsa:2048, 10-year, no per-app Rust). + async fn ensure_manifest_certs(&self, manifest: &AppManifest) -> Result<()> { + let facts = self.detect_host_facts(); + let render = |s: &str| { + s.replace("{{HOST_IP}}", &facts.host_ip) + .replace("{{HOST_MDNS}}", &facts.host_mdns) + }; + for cert in &manifest.app.container.generated_certs { + if tokio::fs::metadata(&cert.crt).await.is_ok() + && tokio::fs::metadata(&cert.key).await.is_ok() + { + continue; + } + if let Some(parent) = Path::new(&cert.crt).parent() { + create_dir_all_or_sudo(parent).await?; + } + if let Some(parent) = Path::new(&cert.key).parent() { + create_dir_all_or_sudo(parent).await?; + } + let cn = render(cert.common_name.as_deref().unwrap_or("{{HOST_IP}}")); + let san = if cert.sans.is_empty() { + format!("IP:{},IP:127.0.0.1,DNS:localhost", facts.host_ip) + } else { + cert.sans + .iter() + .map(|s| render(s)) + .collect::>() + .join(",") + }; + let status = tokio::process::Command::new("openssl") + .args([ + "req", + "-x509", + "-newkey", + "rsa:2048", + "-nodes", + "-keyout", + &cert.key, + "-out", + &cert.crt, + "-days", + "3650", + "-subj", + &format!("/CN={cn}"), + "-addext", + &format!("subjectAltName={san}"), + ]) + .status() + .await + .with_context(|| format!("running openssl for manifest cert {}", cert.crt))?; + if !status.success() { + anyhow::bail!("openssl failed to generate manifest cert {}", cert.crt); + } + } + Ok(()) + } } -async fn ensure_generated_file(file: &GeneratedFile) -> Result { - let path = Path::new(&file.path); - if let Ok(existing) = tokio::fs::read_to_string(path).await { - if existing == file.content || !file.overwrite { +async fn ensure_rendered_file(path: &str, content: &str, overwrite: bool) -> Result { + let p = Path::new(path); + if let Ok(existing) = tokio::fs::read_to_string(p).await { + if existing == content || !overwrite { return Ok(HookOutcome::Unchanged); } - } else if path.exists() && !file.overwrite { + } else if p.exists() && !overwrite { return Ok(HookOutcome::Unchanged); } - let parent = path + let parent = p .parent() - .ok_or_else(|| anyhow::anyhow!("generated file path has no parent: {}", file.path))?; + .ok_or_else(|| anyhow::anyhow!("generated file path has no parent: {}", path))?; create_dir_all_or_sudo(parent).await?; - write_generated_file_atomically(path, &file.content).await?; + write_generated_file_atomically(p, content).await?; Ok(HookOutcome::Rewritten) } diff --git a/core/archipelago/src/container/secrets.rs b/core/archipelago/src/container/secrets.rs index 5328ff29..099897cf 100644 --- a/core/archipelago/src/container/secrets.rs +++ b/core/archipelago/src/container/secrets.rs @@ -66,6 +66,7 @@ fn ensure_one(dir: &Path, gs: &GeneratedSecret) -> Result<()> { match gs.kind { SecretGenKind::Hex16 => write_secret(&dir.join(&gs.name), &random_hex(16))?, SecretGenKind::Hex32 => write_secret(&dir.join(&gs.name), &random_hex(32))?, + SecretGenKind::Base64 => write_secret(&dir.join(&gs.name), &random_base64(32))?, SecretGenKind::Bcrypt => { let password = random_hex(BCRYPT_PASSWORD_BYTES); let hash = bcrypt::hash(&password, bcrypt::DEFAULT_COST) @@ -92,6 +93,15 @@ fn random_hex(bytes: usize) -> String { hex::encode(buf) } +/// `bytes` of entropy, standard base64 (with padding). For keys that a service +/// base64-decodes to recover the raw bytes (e.g. netbird's store encryptionKey). +fn random_base64(bytes: usize) -> String { + use base64::Engine as _; + let mut buf = vec![0u8; bytes]; + rand::thread_rng().fill_bytes(&mut buf); + base64::engine::general_purpose::STANDARD.encode(buf) +} + /// Atomically write a `0600` secret: a temp file in the same dir (so the rename /// is atomic), fsynced, then renamed over the target. fn write_secret(path: &Path, value: &str) -> Result<()> { diff --git a/core/container/src/lib.rs b/core/container/src/lib.rs index 45afa02c..bf7fdadc 100644 --- a/core/container/src/lib.rs +++ b/core/container/src/lib.rs @@ -8,8 +8,9 @@ pub mod runtime; pub use bitcoin_simulator::{BitcoinSimulationMode, BitcoinSimulator}; pub use health_monitor::HealthMonitor; pub use manifest::{ - AppInterface, AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, GeneratedFile, - GeneratedSecret, HealthCheck, HookStep, HostCopy, HostFacts, LifecycleHooks, ManifestError, + AppInterface, AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, GeneratedCert, + GeneratedFile, GeneratedSecret, HealthCheck, HookStep, HostCopy, HostFacts, LifecycleHooks, + ManifestError, ResolvedSource, ResourceLimits, SecretEnv, SecretGenKind, SecretsProvider, SecurityPolicy, Volume, }; diff --git a/core/container/src/manifest.rs b/core/container/src/manifest.rs index fa6d4a11..627efab1 100644 --- a/core/container/src/manifest.rs +++ b/core/container/src/manifest.rs @@ -223,6 +223,19 @@ pub struct ContainerConfig { #[serde(default)] pub generated_secrets: Vec, + /// Self-signed TLS certificates the orchestrator materialises before the + /// container is created (so a bind-mounted cert path resolves to a real + /// file, not a stale/missing path). Like `generated_secrets`, this keeps an + /// app data-driven: a service that needs a secure context (e.g. netbird's + /// dashboard — OIDC PKCE / `window.crypto.subtle` only works over HTTPS, + /// issue #15) declares the cert here instead of relying on per-app Rust. + /// Idempotent: an entry whose `crt` and `key` already exist is left + /// untouched. SAN/CN templates are rendered against host facts at apply time. + /// + /// Example: `- { crt: /var/lib/archipelago/netbird/tls.crt, key: /var/lib/archipelago/netbird/tls.key }` + #[serde(default)] + pub generated_certs: Vec, + /// Rootless-mapped UID:GID applied to the container's data directory /// (the `bind`-mounted host path with `target` inside the container's /// data root) before creation. Mirrors `SPEC_DATA_UID`. @@ -261,6 +274,11 @@ pub enum SecretGenKind { Hex16, /// 32 random bytes, lowercase hex (64 chars). Longer keys/cookies. Hex32, + /// 32 random bytes, standard base64 (44 chars incl. padding). For services + /// that require a base64-encoded key rather than hex — e.g. netbird's relay + /// `authSecret` and the SQLite store `encryptionKey`, which base64-decode + /// their configured value (hex would decode to the wrong bytes). + Base64, /// A random password and its bcrypt hash. `` holds the bcrypt hash /// (what a server is configured with); the plaintext is stored alongside as /// `.pw` for any client that must authenticate. `secret_env` injects @@ -282,12 +300,31 @@ impl GeneratedSecret { /// (primary first). A consumer references one of these via `secret_env`. pub fn target_files(&self) -> Vec { match self.kind { - SecretGenKind::Hex16 | SecretGenKind::Hex32 => vec![self.name.clone()], + SecretGenKind::Hex16 | SecretGenKind::Hex32 | SecretGenKind::Base64 => { + vec![self.name.clone()] + } SecretGenKind::Bcrypt => vec![self.name.clone(), format!("{}.pw", self.name)], } } } +/// A self-signed TLS certificate materialised by the orchestrator. See +/// [`ContainerConfig::generated_certs`]. `crt`/`key` are absolute host paths +/// (typically under `/var/lib/archipelago//`) that the container +/// bind-mounts read-only. `common_name` and `sans` are rendered against host +/// facts (`{{HOST_IP}}`) at apply time; when omitted they default to the +/// node's host IP plus `IP:127.0.0.1,DNS:localhost` so the cert is valid for +/// however the box is reached locally. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct GeneratedCert { + pub crt: String, + pub key: String, + #[serde(default)] + pub common_name: Option, + #[serde(default)] + pub sans: Vec, +} + fn default_pull_policy() -> String { "if-not-present".to_string() } @@ -665,6 +702,18 @@ impl AppManifest { } } + // generated_certs: crt/key must be non-empty absolute paths with no + // traversal (they become bind-mount sources, same safety bar as files). + for (i, c) in self.app.container.generated_certs.iter().enumerate() { + for (field, val) in [("crt", &c.crt), ("key", &c.key)] { + if val.is_empty() || !val.starts_with('/') || val.contains("..") { + return Err(ManifestError::Invalid(format!( + "container.generated_certs[{i}].{field} must be an absolute path with no '..', got '{val}'" + ))); + } + } + } + // data_uid: if set, must look like "NNNNN:NNNNN". if let Some(u) = &self.app.container.data_uid { let parts: Vec<&str> = u.split(':').collect(); @@ -1711,6 +1760,7 @@ app: ], secret_env: vec![], generated_secrets: vec![], + generated_certs: vec![], data_uid: None, }; let facts = HostFacts { @@ -1762,6 +1812,7 @@ app: }, ], generated_secrets: vec![], + generated_certs: vec![], data_uid: None, }; let p = MapSecretsProvider { @@ -1799,6 +1850,7 @@ app: secret_file: "bitcoin-rpc-password".to_string(), }], generated_secrets: vec![], + generated_certs: vec![], data_uid: None, }; let p = MapSecretsProvider { diff --git a/core/container/src/podman_client.rs b/core/container/src/podman_client.rs index fb208e71..f5cfa103 100644 --- a/core/container/src/podman_client.rs +++ b/core/container/src/podman_client.rs @@ -124,7 +124,9 @@ impl PodmanClient { "nginx-proxy-manager" => "http://localhost:8081", "fedimint-gateway" => "http://localhost:8176", "endurain" => "http://localhost:8080", - "netbird" => "http://localhost:8087", + // HTTPS: netbird's dashboard needs a secure context for OIDC PKCE + // (window.crypto.subtle), so the proxy serves TLS on 8087 (issue #15). + "netbird" => "https://localhost:8087", "electrs" | "archy-electrs-ui" => "http://localhost:50002", _ => return None, }; @@ -275,10 +277,18 @@ impl PodmanClient { // Build the container spec for the API let mut port_mappings = Vec::new(); for port in &manifest.app.ports { + // Honour the manifest's protocol (default tcp). netbird's STUN port + // is 3478/udp; forcing tcp here would publish the wrong protocol and + // silently break relay discovery. + let protocol = match port.protocol.to_ascii_lowercase().as_str() { + "udp" => "udp", + "sctp" => "sctp", + _ => "tcp", + }; port_mappings.push(serde_json::json!({ "container_port": port.container, "host_port": port.host, - "protocol": "tcp", + "protocol": protocol, })); }