feat(netbird): manifest-driven migration via reusable orchestrator primitives
Migrate the netbird stack (server/dashboard/proxy) off ~500 lines of per-app Rust to 3 declarative manifests, adding 4 reusable primitives: - SecretGenKind::Base64 (netbird relay authSecret + sqlite store encryptionKey) - GeneratedCert schema + ensure_manifest_certs (self-signed TLS so the dashboard gets a secure context for OIDC PKCE — issue #15; https proxy on 8087 preserved) - templated GeneratedFile render: {{HOST_IP}}/{{HOST_MDNS}}/{{NETWORK_GATEWAY}} (aardvark resolver for the #15 stale-IP fix) /{{secret:NAME}} (never logged) - legacy create_container now honours port.protocol (3478/udp STUN) install_netbird_stack routes via the orchestrator first (legacy kept as fallback, mirroring indeedhub); launch URL derives https://{host_ip}:8087 from host facts. Legacy Rust deletion deferred to post-live-verify. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3c36cf1c40
commit
a8b9b0f5e8
77
apps/netbird-dashboard/manifest.yml
Normal file
77
apps/netbird-dashboard/manifest.yml
Normal file
@ -0,0 +1,77 @@
|
||||
app:
|
||||
id: netbird-dashboard
|
||||
name: NetBird Dashboard
|
||||
version: "2.38.0"
|
||||
description: NetBird management dashboard (SPA). Internal stack member served through the netbird proxy.
|
||||
category: networking
|
||||
|
||||
# Hyphen name matches runtime references + the live container (adoption).
|
||||
# Alias `netbird-dashboard` is the short hostname the proxy's nginx proxies to.
|
||||
container_name: netbird-dashboard
|
||||
|
||||
container:
|
||||
image: docker.io/netbirdio/dashboard:v2.38.0
|
||||
pull_policy: if-not-present
|
||||
network: netbird-net
|
||||
network_aliases: [netbird-dashboard]
|
||||
# The dashboard SPA bakes its API/OIDC base URL from these at container
|
||||
# start. They must point at the proxy's public HTTPS origin (8087) so the
|
||||
# browser uses a secure context (window.crypto.subtle / OIDC PKCE, #15).
|
||||
# {{HOST_IP}} is the node's primary host IP, resolved at apply time.
|
||||
derived_env:
|
||||
- key: NETBIRD_MGMT_API_ENDPOINT
|
||||
template: "https://{{HOST_IP}}:8087"
|
||||
- key: NETBIRD_MGMT_GRPC_API_ENDPOINT
|
||||
template: "https://{{HOST_IP}}:8087"
|
||||
- key: AUTH_AUTHORITY
|
||||
template: "https://{{HOST_IP}}:8087/oauth2"
|
||||
|
||||
dependencies:
|
||||
- app_id: netbird-server
|
||||
|
||||
resources:
|
||||
memory_limit: 256Mi
|
||||
|
||||
security:
|
||||
# cap-drop=ALL is applied by the orchestrator. The dashboard image runs
|
||||
# nginx (master as root, drops workers) binding :80 — needs the worker-drop
|
||||
# caps + NET_BIND_SERVICE for the privileged port.
|
||||
capabilities: [CHOWN, DAC_OVERRIDE, SETGID, SETUID, NET_BIND_SERVICE]
|
||||
readonly_root: false
|
||||
network_policy: isolated
|
||||
|
||||
# Internal only — reached container-to-container by the proxy via netbird-net.
|
||||
ports: []
|
||||
|
||||
volumes: []
|
||||
|
||||
environment:
|
||||
- AUTH_AUDIENCE=netbird-dashboard
|
||||
- AUTH_CLIENT_ID=netbird-dashboard
|
||||
- AUTH_CLIENT_SECRET=
|
||||
- USE_AUTH0=false
|
||||
- AUTH_SUPPORTED_SCOPES=openid profile email groups
|
||||
- AUTH_REDIRECT_URI=/nb-auth
|
||||
- AUTH_SILENT_REDIRECT_URI=/nb-silent-auth
|
||||
- NETBIRD_TOKEN_SOURCE=idToken
|
||||
- NGINX_SSL_PORT=443
|
||||
- LETSENCRYPT_DOMAIN=none
|
||||
|
||||
health_check:
|
||||
type: tcp
|
||||
endpoint: localhost:80
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 20s
|
||||
|
||||
metadata:
|
||||
author: NetBird
|
||||
icon: /assets/img/app-icons/netbird.svg
|
||||
website: https://netbird.io
|
||||
repo: https://github.com/netbirdio/dashboard
|
||||
license: BSD-3-Clause
|
||||
tags:
|
||||
- networking
|
||||
- vpn
|
||||
- dashboard
|
||||
122
apps/netbird-server/manifest.yml
Normal file
122
apps/netbird-server/manifest.yml
Normal file
@ -0,0 +1,122 @@
|
||||
app:
|
||||
id: netbird-server
|
||||
name: NetBird Server
|
||||
version: "0.71.2"
|
||||
description: NetBird combined management / signal / relay server with an embedded identity provider and STUN. Backend for the self-hosted NetBird mesh VPN.
|
||||
category: networking
|
||||
|
||||
# Hyphen name matches the runtime references (crash_recovery / dependencies /
|
||||
# config startup order) + the live container, so on an existing node the
|
||||
# orchestrator ADOPTS the running server rather than recreating it (data +
|
||||
# the sqlite store under /var/lib/netbird preserved). Alias `netbird-server`
|
||||
# is the short hostname the proxy's nginx proxies/grpc-passes to.
|
||||
container_name: netbird-server
|
||||
|
||||
container:
|
||||
image: docker.io/netbirdio/netbird-server:0.71.2
|
||||
pull_policy: if-not-present
|
||||
network: netbird-net
|
||||
network_aliases: [netbird-server]
|
||||
# The relay authSecret and the sqlite store encryptionKey are base64 keys
|
||||
# (the server base64-decodes them to recover raw bytes — hex would decode to
|
||||
# the wrong value). Generated once and reused: ensure_generated_secrets
|
||||
# no-ops when the file already exists, so a re-render of config.yaml on an
|
||||
# adopted node keeps the same keys (regenerating would orphan the store).
|
||||
generated_secrets:
|
||||
- name: netbird-relay-auth-secret
|
||||
kind: base64
|
||||
- name: netbird-store-encryption-key
|
||||
kind: base64
|
||||
# Pass the rendered config explicitly, mirroring the legacy `--config` arg.
|
||||
custom_args: ["--config", "/etc/netbird/config.yaml"]
|
||||
|
||||
dependencies:
|
||||
- storage: 1Gi
|
||||
|
||||
resources:
|
||||
memory_limit: 1Gi
|
||||
|
||||
security:
|
||||
# cap-drop=ALL is applied by the orchestrator. The server binds :80
|
||||
# (management/signal/relay HTTP + gRPC) inside the container — a privileged
|
||||
# port — so it needs NET_BIND_SERVICE. STUN is 3478/udp (unprivileged).
|
||||
capabilities: [NET_BIND_SERVICE]
|
||||
readonly_root: false
|
||||
network_policy: isolated
|
||||
|
||||
ports:
|
||||
- host: 8086
|
||||
container: 80
|
||||
protocol: tcp # management API + embedded OIDC issuer (/oauth2)
|
||||
- host: 3478
|
||||
container: 3478
|
||||
protocol: udp # STUN — must be UDP; tcp here breaks relay discovery
|
||||
|
||||
volumes:
|
||||
- type: bind
|
||||
source: /var/lib/archipelago/netbird/data
|
||||
target: /var/lib/netbird
|
||||
options: [rw]
|
||||
# The rendered config.yaml, read-only. Re-rendered on every reconcile from
|
||||
# host facts + the base64 secrets; idempotent (stable bytes → no restart).
|
||||
- type: bind
|
||||
source: /var/lib/archipelago/netbird/config.yaml
|
||||
target: /etc/netbird/config.yaml
|
||||
options: [ro]
|
||||
|
||||
environment: []
|
||||
|
||||
# The server's config. {{HOST_IP}} is the node's primary host IP (the proxy's
|
||||
# public origin is https on 8087 — the dashboard needs a secure context for
|
||||
# OIDC PKCE, issue #15). {{secret:...}} are read 0600 from the secrets dir.
|
||||
files:
|
||||
- path: /var/lib/archipelago/netbird/config.yaml
|
||||
overwrite: true
|
||||
content: |
|
||||
server:
|
||||
listenAddress: ":80"
|
||||
exposedAddress: "https://{{HOST_IP}}:8087"
|
||||
stunPorts:
|
||||
- 3478
|
||||
metricsPort: 9090
|
||||
healthcheckAddress: ":9000"
|
||||
logLevel: "info"
|
||||
logFile: "console"
|
||||
authSecret: "{{secret:netbird-relay-auth-secret}}"
|
||||
dataDir: "/var/lib/netbird"
|
||||
auth:
|
||||
issuer: "https://{{HOST_IP}}:8087/oauth2"
|
||||
localAuthDisabled: false
|
||||
signKeyRefreshEnabled: false
|
||||
dashboardRedirectURIs:
|
||||
- "https://{{HOST_IP}}:8087/nb-auth"
|
||||
- "https://{{HOST_IP}}:8087/nb-silent-auth"
|
||||
dashboardPostLogoutRedirectURIs:
|
||||
- "https://{{HOST_IP}}:8087/"
|
||||
cliRedirectURIs:
|
||||
- "http://localhost:53000/"
|
||||
store:
|
||||
engine: "sqlite"
|
||||
encryptionKey: "{{secret:netbird-store-encryption-key}}"
|
||||
|
||||
# TCP liveness on the management port. Binds at startup, stays green; an http
|
||||
# check of /oauth2 would false-fail while the issuer warms up.
|
||||
health_check:
|
||||
type: tcp
|
||||
endpoint: localhost:80
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
|
||||
metadata:
|
||||
author: NetBird
|
||||
icon: /assets/img/app-icons/netbird.svg
|
||||
website: https://netbird.io
|
||||
repo: https://github.com/netbirdio/netbird
|
||||
license: BSD-3-Clause
|
||||
tags:
|
||||
- networking
|
||||
- vpn
|
||||
- wireguard
|
||||
- mesh
|
||||
182
apps/netbird/manifest.yml
Normal file
182
apps/netbird/manifest.yml
Normal file
@ -0,0 +1,182 @@
|
||||
app:
|
||||
id: netbird
|
||||
name: NetBird
|
||||
version: "2.38.0"
|
||||
description: Self-hosted WireGuard mesh VPN control plane with dashboard, embedded identity provider, management API, signal, relay, and STUN. The user-facing entry point — a TLS proxy in front of the dashboard + server.
|
||||
category: networking
|
||||
|
||||
# The user-facing launcher (app_id + container both "netbird", matching the
|
||||
# runtime references + the live container so the orchestrator adopts it). This
|
||||
# is the nginx that terminates TLS on 8087 and fans out to the dashboard +
|
||||
# server by their short aliases on netbird-net.
|
||||
container_name: netbird
|
||||
|
||||
container:
|
||||
image: docker.io/library/nginx:1.27-alpine
|
||||
pull_policy: if-not-present
|
||||
network: netbird-net
|
||||
# Self-signed TLS cert materialised before create — the dashboard needs a
|
||||
# secure context (window.crypto.subtle / OIDC PKCE, issue #15), so the proxy
|
||||
# serves HTTPS. Idempotent: kept as-is when crt+key already exist (a user
|
||||
# accepts it once). SAN defaults to the host IP + 127.0.0.1 + localhost.
|
||||
generated_certs:
|
||||
- crt: /var/lib/archipelago/netbird/tls.crt
|
||||
key: /var/lib/archipelago/netbird/tls.key
|
||||
|
||||
dependencies:
|
||||
- app_id: netbird-server
|
||||
- app_id: netbird-dashboard
|
||||
- storage: 1Gi
|
||||
|
||||
resources:
|
||||
memory_limit: 256Mi
|
||||
|
||||
security:
|
||||
# cap-drop=ALL is applied by the orchestrator. nginx (master as root, drops
|
||||
# workers) binds :443 — needs the worker-drop caps + NET_BIND_SERVICE.
|
||||
capabilities: [CHOWN, DAC_OVERRIDE, SETGID, SETUID, NET_BIND_SERVICE]
|
||||
readonly_root: false
|
||||
network_policy: isolated
|
||||
|
||||
ports:
|
||||
# 8087 publishes the TLS listener (container :443). HTTPS is required for the
|
||||
# dashboard's secure context (issue #15).
|
||||
- host: 8087
|
||||
container: 443
|
||||
protocol: tcp
|
||||
|
||||
volumes:
|
||||
- type: bind
|
||||
source: /var/lib/archipelago/netbird/nginx.conf
|
||||
target: /etc/nginx/conf.d/default.conf
|
||||
options: [ro]
|
||||
- type: bind
|
||||
source: /var/lib/archipelago/netbird/tls.crt
|
||||
target: /etc/nginx/tls.crt
|
||||
options: [ro]
|
||||
- type: bind
|
||||
source: /var/lib/archipelago/netbird/tls.key
|
||||
target: /etc/nginx/tls.key
|
||||
options: [ro]
|
||||
|
||||
environment: []
|
||||
|
||||
# The proxy config. {{NETWORK_GATEWAY}} is the netbird-net bridge gateway =
|
||||
# Podman's aardvark DNS. nginx uses it as an explicit `resolver` with VARIABLE
|
||||
# upstreams so it re-resolves container names per request — without it nginx
|
||||
# pins a container IP at startup and 502s forever once that IP moves on a
|
||||
# restart/reboot (issue #15, observed live on .198). Every #15 fix below
|
||||
# (CORS $http_origin reflect, grpc pass, nb-auth/nb-silent-auth rewrite to
|
||||
# index.html, /relay websocket) is preserved verbatim from the legacy config.
|
||||
files:
|
||||
- path: /var/lib/archipelago/netbird/nginx.conf
|
||||
overwrite: true
|
||||
content: |
|
||||
server {
|
||||
listen 443 ssl;
|
||||
server_name _;
|
||||
|
||||
# netbird's dashboard needs a secure context (window.crypto.subtle for
|
||||
# OIDC PKCE), so the proxy terminates TLS with a self-signed cert (#15).
|
||||
ssl_certificate /etc/nginx/tls.crt;
|
||||
ssl_certificate_key /etc/nginx/tls.key;
|
||||
|
||||
# Rootless Podman can hand a container a new IP across restarts/reboots.
|
||||
# nginx resolves a literal upstream name ONCE at startup and caches it,
|
||||
# so after the IP moves every request 502s with "host unreachable"
|
||||
# (issue #15, observed live on .198: nginx pinned to a dead
|
||||
# netbird-dashboard IP). Fix: point `resolver` at the netbird-net
|
||||
# gateway (Podman's aardvark DNS) and use VARIABLE upstreams, which
|
||||
# forces nginx to re-resolve the container names at request time.
|
||||
resolver {{NETWORK_GATEWAY}} valid=10s ipv6=off;
|
||||
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_http_version 1.1;
|
||||
|
||||
location ~ ^/(relay|ws-proxy/) {
|
||||
set $nb_server netbird-server;
|
||||
proxy_pass http://$nb_server:80;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
proxy_read_timeout 1d;
|
||||
}
|
||||
|
||||
location ~ ^/(api|oauth2)(/|$) {
|
||||
# The dashboard is a SPA whose API/OIDC base URL is baked at build
|
||||
# time to one host:port. A single box is reached via several
|
||||
# addresses, so those fetches are cross-origin and the browser
|
||||
# blocks them with no Access-Control-Allow-Origin (#15, live on
|
||||
# .198). Reflect the caller's Origin and answer the CORS preflight.
|
||||
if ($request_method = OPTIONS) {
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials true always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
||||
add_header Access-Control-Max-Age 86400 always;
|
||||
add_header Content-Length 0;
|
||||
return 204;
|
||||
}
|
||||
add_header Access-Control-Allow-Origin $http_origin always;
|
||||
add_header Access-Control-Allow-Credentials true always;
|
||||
add_header Access-Control-Allow-Methods "GET, POST, PUT, PATCH, DELETE, OPTIONS" always;
|
||||
add_header Access-Control-Allow-Headers "Authorization, Content-Type, Accept" always;
|
||||
set $nb_server netbird-server;
|
||||
proxy_pass http://$nb_server:80;
|
||||
}
|
||||
|
||||
location ~ ^/(signalexchange\.SignalExchange|management\.ManagementService|management\.ProxyService)/ {
|
||||
set $nb_server netbird-server;
|
||||
grpc_pass grpc://$nb_server:80;
|
||||
grpc_read_timeout 1d;
|
||||
grpc_send_timeout 1d;
|
||||
}
|
||||
|
||||
# OIDC callback routes are client-side SPA routes with NO prebuilt page
|
||||
# in the dashboard bundle, so proxying them straight through 404s —
|
||||
# which crashes the dashboard's auth init and shows "Unauthenticated"
|
||||
# with dead buttons (#15, live on .198: /nb-auth + /nb-silent-auth
|
||||
# returned 404). Serve index.html at these paths (URL unchanged) so
|
||||
# react-oidc boots and completes the login / silent-SSO.
|
||||
location ~ ^/(nb-auth|nb-silent-auth) {
|
||||
set $nb_dashboard netbird-dashboard;
|
||||
rewrite ^.*$ /index.html break;
|
||||
proxy_pass http://$nb_dashboard:80;
|
||||
}
|
||||
|
||||
location / {
|
||||
set $nb_dashboard netbird-dashboard;
|
||||
proxy_pass http://$nb_dashboard:80;
|
||||
}
|
||||
}
|
||||
|
||||
health_check:
|
||||
type: tcp
|
||||
endpoint: localhost:443
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 20s
|
||||
|
||||
interfaces:
|
||||
main:
|
||||
name: Dashboard
|
||||
description: Manage your self-hosted NetBird mesh VPN
|
||||
type: ui
|
||||
port: 8087
|
||||
protocol: https
|
||||
path: /
|
||||
|
||||
metadata:
|
||||
author: NetBird
|
||||
icon: /assets/img/app-icons/netbird.svg
|
||||
website: https://netbird.io
|
||||
repo: https://github.com/netbirdio/netbird
|
||||
license: BSD-3-Clause
|
||||
tags:
|
||||
- networking
|
||||
- vpn
|
||||
- wireguard
|
||||
- mesh
|
||||
@ -696,6 +696,16 @@ fn immich_stack_app_ids() -> &'static [&'static str] {
|
||||
&["immich-postgres", "immich-redis", "immich"]
|
||||
}
|
||||
|
||||
fn netbird_stack_app_ids() -> &'static [&'static str] {
|
||||
// Dependency/startup order: the combined management/signal/relay server
|
||||
// first (it owns the base64 relay/store secrets + the sqlite store, and is
|
||||
// the OIDC issuer the others point at), then the dashboard SPA, then the
|
||||
// user-facing TLS proxy ("netbird", which carries the self-signed cert +
|
||||
// the templated nginx.conf and is the launcher). Mirrors the netbird
|
||||
// startup_order in dependencies.rs.
|
||||
&["netbird-server", "netbird-dashboard", "netbird"]
|
||||
}
|
||||
|
||||
fn indeedhub_stack_app_ids() -> &'static [&'static str] {
|
||||
// Dependency order: backends + their generated secrets first, then the api
|
||||
// (owns indeedhub-jwt; reads the db/minio secrets the backends materialised),
|
||||
@ -1828,6 +1838,23 @@ impl RpcHandler {
|
||||
|
||||
/// Install self-hosted NetBird (dashboard + combined management/signal/relay server).
|
||||
pub(super) async fn install_netbird_stack(&self) -> Result<serde_json::Value> {
|
||||
// Manifest-driven path (#20 phase 4): render the 3-member stack from
|
||||
// apps/netbird-*/manifest.yml via the orchestrator — dedicated
|
||||
// netbird-net + network_aliases, base64 generated_secrets, a self-signed
|
||||
// TLS cert (generated_certs) so the dashboard gets a secure context for
|
||||
// OIDC PKCE (#15), and templated config.yaml/nginx.conf rendered from
|
||||
// host facts + the netbird-net gateway. The manifests use the exact live
|
||||
// container names, so on an existing node this ADOPTS the running stack
|
||||
// rather than recreating it (the sqlite store + base64 keys are
|
||||
// preserved — ensure_generated_secrets no-ops on existing files). Falls
|
||||
// back to the legacy installer below only when the orchestrator doesn't
|
||||
// know these app_ids (manifests not yet deployed to the node).
|
||||
if let Some(orchestrated) =
|
||||
install_stack_via_orchestrator(self, "netbird", netbird_stack_app_ids()).await?
|
||||
{
|
||||
return Ok(orchestrated);
|
||||
}
|
||||
|
||||
if let Some(adopted) = adopt_stack_if_exists(
|
||||
"netbird",
|
||||
"netbird",
|
||||
|
||||
@ -691,16 +691,37 @@ fn extract_lan_address(ports: &[String]) -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
/// netbird's dashboard launch URL: HTTPS on 8087 (the proxy terminates TLS —
|
||||
/// the dashboard needs a secure context for OIDC PKCE, issue #15) at the node's
|
||||
/// primary host IP so it's reachable from the LAN. Manifest-driven netbird no
|
||||
/// longer writes `dashboard.env`, so this is derived from host facts (the same
|
||||
/// `{{HOST_IP}}` the orchestrator bakes into the cert/config); it falls back to
|
||||
/// the static localhost mapping when the host IP can't be read. URL shape is
|
||||
/// identical to the legacy installer's, so the existing https reachability
|
||||
/// wrapper still applies.
|
||||
async fn netbird_configured_launch_url() -> Option<String> {
|
||||
let env = tokio::fs::read_to_string("/var/lib/archipelago/netbird/dashboard.env")
|
||||
if let Some(ip) = first_host_ip().await {
|
||||
return Some(format!("https://{ip}:8087"));
|
||||
}
|
||||
PodmanClient::lan_address_for("netbird")
|
||||
}
|
||||
|
||||
/// First address from `hostname -I` — the node's primary host IP. Mirrors the
|
||||
/// orchestrator's `detect_host_ip` so launch URLs match the cert/config the
|
||||
/// orchestrator renders for `{{HOST_IP}}`.
|
||||
async fn first_host_ip() -> Option<String> {
|
||||
let out = tokio::process::Command::new("hostname")
|
||||
.arg("-I")
|
||||
.output()
|
||||
.await
|
||||
.ok()?;
|
||||
env.lines()
|
||||
.find_map(|line| line.strip_prefix("NETBIRD_MGMT_API_ENDPOINT="))
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
if !out.status.success() {
|
||||
return None;
|
||||
}
|
||||
String::from_utf8_lossy(&out.stdout)
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.map(ToOwned::to_owned)
|
||||
.or_else(|| PodmanClient::lan_address_for("netbird"))
|
||||
}
|
||||
|
||||
async fn reachable_lan_address(app_id: &str, candidate: Option<String>) -> Option<String> {
|
||||
|
||||
@ -26,7 +26,7 @@
|
||||
use anyhow::{Context, Result};
|
||||
use archipelago_container::{
|
||||
AppManifest, ContainerRuntime as ContainerRuntimeTrait, ContainerState, ContainerStatus,
|
||||
Dependency, GeneratedFile, HostFacts, ManifestError, ResolvedSource, SecretsProvider,
|
||||
Dependency, HostFacts, ManifestError, ResolvedSource, SecretsProvider,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
@ -1809,6 +1809,9 @@ impl ProdContainerOrchestrator {
|
||||
self.run_pre_start_hooks(&manifest.app.id).await?;
|
||||
self.ensure_bind_mount_sockets(manifest).await?;
|
||||
self.ensure_bind_mount_dirs(manifest).await?;
|
||||
// Certs before files: a templated file may not need the cert, but the
|
||||
// container's bind-mounts expect both present before create_container.
|
||||
self.ensure_manifest_certs(manifest).await?;
|
||||
self.ensure_manifest_files(manifest).await?;
|
||||
self.apply_data_uid(manifest).await?;
|
||||
self.run_post_data_uid_hooks(&manifest.app.id).await?;
|
||||
@ -2750,7 +2753,14 @@ impl ProdContainerOrchestrator {
|
||||
async fn ensure_manifest_files(&self, manifest: &AppManifest) -> Result<HookOutcome> {
|
||||
let mut outcome = HookOutcome::Unchanged;
|
||||
for file in &manifest.app.files {
|
||||
if ensure_generated_file(file)
|
||||
// Render templated placeholders before comparing/writing so the
|
||||
// idempotency check is against the FINAL bytes (not the template),
|
||||
// otherwise a rendered file would be rewritten every reconcile.
|
||||
let rendered = self
|
||||
.render_file_placeholders(manifest, &file.content)
|
||||
.await
|
||||
.with_context(|| format!("rendering manifest file {}", file.path))?;
|
||||
if ensure_rendered_file(&file.path, &rendered, file.overwrite)
|
||||
.await
|
||||
.with_context(|| format!("ensure manifest file {}", file.path))?
|
||||
== HookOutcome::Rewritten
|
||||
@ -2760,23 +2770,185 @@ impl ProdContainerOrchestrator {
|
||||
}
|
||||
Ok(outcome)
|
||||
}
|
||||
|
||||
/// Substitute the allow-listed placeholders a manifest `GeneratedFile` may
|
||||
/// carry. Keeps runtime-derived config (netbird's `config.yaml`/`nginx.conf`)
|
||||
/// declarative instead of generated by per-app Rust:
|
||||
/// - `{{HOST_IP}}` / `{{HOST_MDNS}}` — host facts (`hostname -I` / `.local`).
|
||||
/// - `{{NETWORK_GATEWAY}}` — the gateway of the app's podman network, i.e.
|
||||
/// aardvark's DNS address. nginx uses it as an explicit `resolver` so it
|
||||
/// re-resolves container names per request instead of pinning a stale IP
|
||||
/// and 502-ing after a restart/reboot (issue #15). The network is ensured
|
||||
/// to exist first so the gateway is readable on a fresh install (this runs
|
||||
/// before `install_fresh`'s own `ensure_container_network`; both idempotent).
|
||||
/// - `{{secret:NAME}}` — a `0600` secret read from the service-owned secrets
|
||||
/// dir (e.g. netbird's base64 relay/store keys). NEVER logged.
|
||||
async fn render_file_placeholders(
|
||||
&self,
|
||||
manifest: &AppManifest,
|
||||
content: &str,
|
||||
) -> Result<String> {
|
||||
let mut out = content.to_string();
|
||||
if out.contains("{{HOST_IP}}") || out.contains("{{HOST_MDNS}}") {
|
||||
let facts = self.detect_host_facts();
|
||||
out = out
|
||||
.replace("{{HOST_IP}}", &facts.host_ip)
|
||||
.replace("{{HOST_MDNS}}", &facts.host_mdns);
|
||||
}
|
||||
if out.contains("{{NETWORK_GATEWAY}}") {
|
||||
self.ensure_container_network(manifest).await?;
|
||||
let gw = self.network_gateway(manifest).await?;
|
||||
out = out.replace("{{NETWORK_GATEWAY}}", &gw);
|
||||
}
|
||||
out = self.render_secret_placeholders(&out).await?;
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
async fn ensure_generated_file(file: &GeneratedFile) -> Result<HookOutcome> {
|
||||
let path = Path::new(&file.path);
|
||||
if let Ok(existing) = tokio::fs::read_to_string(path).await {
|
||||
if existing == file.content || !file.overwrite {
|
||||
return Ok(HookOutcome::Unchanged);
|
||||
/// Replace every `{{secret:NAME}}` with the trimmed contents of
|
||||
/// `<secrets_dir>/NAME`. `NAME` must be a bare filename (the same safety bar
|
||||
/// as `secret_env`). The secret value is never placed in an error or log.
|
||||
async fn render_secret_placeholders(&self, content: &str) -> Result<String> {
|
||||
const OPEN: &str = "{{secret:";
|
||||
let mut out = String::with_capacity(content.len());
|
||||
let mut rest = content;
|
||||
while let Some(start) = rest.find(OPEN) {
|
||||
out.push_str(&rest[..start]);
|
||||
let after = &rest[start + OPEN.len()..];
|
||||
let end = after
|
||||
.find("}}")
|
||||
.ok_or_else(|| anyhow::anyhow!("unterminated {{secret:...}} placeholder"))?;
|
||||
let name = &after[..end];
|
||||
if name.is_empty() || name.contains('/') || name.contains("..") {
|
||||
anyhow::bail!("invalid secret placeholder name '{name}' (must be a bare filename)");
|
||||
}
|
||||
} else if path.exists() && !file.overwrite {
|
||||
return Ok(HookOutcome::Unchanged);
|
||||
let value = tokio::fs::read_to_string(self.secrets_dir.join(name))
|
||||
.await
|
||||
.map_err(|_| {
|
||||
// Do not surface the path-with-value or io detail beyond the name.
|
||||
anyhow::anyhow!("secret '{name}' referenced by a manifest file is missing")
|
||||
})?;
|
||||
out.push_str(value.trim());
|
||||
rest = &after[end + 2..];
|
||||
}
|
||||
out.push_str(rest);
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
let parent = path
|
||||
.parent()
|
||||
.ok_or_else(|| anyhow::anyhow!("generated file path has no parent: {}", file.path))?;
|
||||
/// The gateway IP of the app's podman network — aardvark's DNS resolver
|
||||
/// address. Mirrors the legacy `netbird_net_resolver_ip`; falls back to
|
||||
/// podman's usual first-pool gateway if the inspect can't be parsed (the
|
||||
/// network was just ensured to exist, so this is a belt-and-braces default).
|
||||
async fn network_gateway(&self, manifest: &AppManifest) -> Result<String> {
|
||||
let network = manifest
|
||||
.app
|
||||
.container
|
||||
.network
|
||||
.as_deref()
|
||||
.filter(|n| !n.is_empty() && !is_builtin_network_mode(n))
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!("{{NETWORK_GATEWAY}} used but app has no dedicated network")
|
||||
})?;
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args([
|
||||
"network",
|
||||
"inspect",
|
||||
network,
|
||||
"--format",
|
||||
"{{range .Subnets}}{{.Gateway}}{{end}}",
|
||||
])
|
||||
.output()
|
||||
.await
|
||||
.with_context(|| format!("inspecting podman network {network} for gateway"))?;
|
||||
let gw = String::from_utf8_lossy(&out.stdout).trim().to_string();
|
||||
if !gw.is_empty() && gw.parse::<std::net::IpAddr>().is_ok() {
|
||||
return Ok(gw);
|
||||
}
|
||||
tracing::warn!(
|
||||
network,
|
||||
"could not read network gateway; falling back to 10.89.0.1"
|
||||
);
|
||||
Ok("10.89.0.1".to_string())
|
||||
}
|
||||
|
||||
/// Materialise manifest-declared self-signed TLS certs before the container
|
||||
/// is created (so a bind-mounted cert path resolves to a real file). Skips an
|
||||
/// entry whose crt+key already exist (idempotent / data-preserving). CN and
|
||||
/// SAN templates are rendered against host facts; when omitted they default
|
||||
/// to the node's host IP plus `127.0.0.1`/`localhost` so the cert is valid
|
||||
/// however the box is reached locally. Mirrors the legacy
|
||||
/// `ensure_netbird_tls_cert` (rsa:2048, 10-year, no per-app Rust).
|
||||
async fn ensure_manifest_certs(&self, manifest: &AppManifest) -> Result<()> {
|
||||
let facts = self.detect_host_facts();
|
||||
let render = |s: &str| {
|
||||
s.replace("{{HOST_IP}}", &facts.host_ip)
|
||||
.replace("{{HOST_MDNS}}", &facts.host_mdns)
|
||||
};
|
||||
for cert in &manifest.app.container.generated_certs {
|
||||
if tokio::fs::metadata(&cert.crt).await.is_ok()
|
||||
&& tokio::fs::metadata(&cert.key).await.is_ok()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let Some(parent) = Path::new(&cert.crt).parent() {
|
||||
create_dir_all_or_sudo(parent).await?;
|
||||
write_generated_file_atomically(path, &file.content).await?;
|
||||
}
|
||||
if let Some(parent) = Path::new(&cert.key).parent() {
|
||||
create_dir_all_or_sudo(parent).await?;
|
||||
}
|
||||
let cn = render(cert.common_name.as_deref().unwrap_or("{{HOST_IP}}"));
|
||||
let san = if cert.sans.is_empty() {
|
||||
format!("IP:{},IP:127.0.0.1,DNS:localhost", facts.host_ip)
|
||||
} else {
|
||||
cert.sans
|
||||
.iter()
|
||||
.map(|s| render(s))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
};
|
||||
let status = tokio::process::Command::new("openssl")
|
||||
.args([
|
||||
"req",
|
||||
"-x509",
|
||||
"-newkey",
|
||||
"rsa:2048",
|
||||
"-nodes",
|
||||
"-keyout",
|
||||
&cert.key,
|
||||
"-out",
|
||||
&cert.crt,
|
||||
"-days",
|
||||
"3650",
|
||||
"-subj",
|
||||
&format!("/CN={cn}"),
|
||||
"-addext",
|
||||
&format!("subjectAltName={san}"),
|
||||
])
|
||||
.status()
|
||||
.await
|
||||
.with_context(|| format!("running openssl for manifest cert {}", cert.crt))?;
|
||||
if !status.success() {
|
||||
anyhow::bail!("openssl failed to generate manifest cert {}", cert.crt);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
async fn ensure_rendered_file(path: &str, content: &str, overwrite: bool) -> Result<HookOutcome> {
|
||||
let p = Path::new(path);
|
||||
if let Ok(existing) = tokio::fs::read_to_string(p).await {
|
||||
if existing == content || !overwrite {
|
||||
return Ok(HookOutcome::Unchanged);
|
||||
}
|
||||
} else if p.exists() && !overwrite {
|
||||
return Ok(HookOutcome::Unchanged);
|
||||
}
|
||||
|
||||
let parent = p
|
||||
.parent()
|
||||
.ok_or_else(|| anyhow::anyhow!("generated file path has no parent: {}", path))?;
|
||||
create_dir_all_or_sudo(parent).await?;
|
||||
write_generated_file_atomically(p, content).await?;
|
||||
Ok(HookOutcome::Rewritten)
|
||||
}
|
||||
|
||||
|
||||
@ -66,6 +66,7 @@ fn ensure_one(dir: &Path, gs: &GeneratedSecret) -> Result<()> {
|
||||
match gs.kind {
|
||||
SecretGenKind::Hex16 => write_secret(&dir.join(&gs.name), &random_hex(16))?,
|
||||
SecretGenKind::Hex32 => write_secret(&dir.join(&gs.name), &random_hex(32))?,
|
||||
SecretGenKind::Base64 => write_secret(&dir.join(&gs.name), &random_base64(32))?,
|
||||
SecretGenKind::Bcrypt => {
|
||||
let password = random_hex(BCRYPT_PASSWORD_BYTES);
|
||||
let hash = bcrypt::hash(&password, bcrypt::DEFAULT_COST)
|
||||
@ -92,6 +93,15 @@ fn random_hex(bytes: usize) -> String {
|
||||
hex::encode(buf)
|
||||
}
|
||||
|
||||
/// `bytes` of entropy, standard base64 (with padding). For keys that a service
|
||||
/// base64-decodes to recover the raw bytes (e.g. netbird's store encryptionKey).
|
||||
fn random_base64(bytes: usize) -> String {
|
||||
use base64::Engine as _;
|
||||
let mut buf = vec![0u8; bytes];
|
||||
rand::thread_rng().fill_bytes(&mut buf);
|
||||
base64::engine::general_purpose::STANDARD.encode(buf)
|
||||
}
|
||||
|
||||
/// Atomically write a `0600` secret: a temp file in the same dir (so the rename
|
||||
/// is atomic), fsynced, then renamed over the target.
|
||||
fn write_secret(path: &Path, value: &str) -> Result<()> {
|
||||
|
||||
@ -8,8 +8,9 @@ pub mod runtime;
|
||||
pub use bitcoin_simulator::{BitcoinSimulationMode, BitcoinSimulator};
|
||||
pub use health_monitor::HealthMonitor;
|
||||
pub use manifest::{
|
||||
AppInterface, AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, GeneratedFile,
|
||||
GeneratedSecret, HealthCheck, HookStep, HostCopy, HostFacts, LifecycleHooks, ManifestError,
|
||||
AppInterface, AppManifest, BuildConfig, ContainerConfig, Dependency, DerivedEnv, GeneratedCert,
|
||||
GeneratedFile, GeneratedSecret, HealthCheck, HookStep, HostCopy, HostFacts, LifecycleHooks,
|
||||
ManifestError,
|
||||
ResolvedSource, ResourceLimits, SecretEnv, SecretGenKind, SecretsProvider, SecurityPolicy,
|
||||
Volume,
|
||||
};
|
||||
|
||||
@ -223,6 +223,19 @@ pub struct ContainerConfig {
|
||||
#[serde(default)]
|
||||
pub generated_secrets: Vec<GeneratedSecret>,
|
||||
|
||||
/// Self-signed TLS certificates the orchestrator materialises before the
|
||||
/// container is created (so a bind-mounted cert path resolves to a real
|
||||
/// file, not a stale/missing path). Like `generated_secrets`, this keeps an
|
||||
/// app data-driven: a service that needs a secure context (e.g. netbird's
|
||||
/// dashboard — OIDC PKCE / `window.crypto.subtle` only works over HTTPS,
|
||||
/// issue #15) declares the cert here instead of relying on per-app Rust.
|
||||
/// Idempotent: an entry whose `crt` and `key` already exist is left
|
||||
/// untouched. SAN/CN templates are rendered against host facts at apply time.
|
||||
///
|
||||
/// Example: `- { crt: /var/lib/archipelago/netbird/tls.crt, key: /var/lib/archipelago/netbird/tls.key }`
|
||||
#[serde(default)]
|
||||
pub generated_certs: Vec<GeneratedCert>,
|
||||
|
||||
/// Rootless-mapped UID:GID applied to the container's data directory
|
||||
/// (the `bind`-mounted host path with `target` inside the container's
|
||||
/// data root) before creation. Mirrors `SPEC_DATA_UID`.
|
||||
@ -261,6 +274,11 @@ pub enum SecretGenKind {
|
||||
Hex16,
|
||||
/// 32 random bytes, lowercase hex (64 chars). Longer keys/cookies.
|
||||
Hex32,
|
||||
/// 32 random bytes, standard base64 (44 chars incl. padding). For services
|
||||
/// that require a base64-encoded key rather than hex — e.g. netbird's relay
|
||||
/// `authSecret` and the SQLite store `encryptionKey`, which base64-decode
|
||||
/// their configured value (hex would decode to the wrong bytes).
|
||||
Base64,
|
||||
/// A random password and its bcrypt hash. `<name>` holds the bcrypt hash
|
||||
/// (what a server is configured with); the plaintext is stored alongside as
|
||||
/// `<name>.pw` for any client that must authenticate. `secret_env` injects
|
||||
@ -282,12 +300,31 @@ impl GeneratedSecret {
|
||||
/// (primary first). A consumer references one of these via `secret_env`.
|
||||
pub fn target_files(&self) -> Vec<String> {
|
||||
match self.kind {
|
||||
SecretGenKind::Hex16 | SecretGenKind::Hex32 => vec![self.name.clone()],
|
||||
SecretGenKind::Hex16 | SecretGenKind::Hex32 | SecretGenKind::Base64 => {
|
||||
vec![self.name.clone()]
|
||||
}
|
||||
SecretGenKind::Bcrypt => vec![self.name.clone(), format!("{}.pw", self.name)],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A self-signed TLS certificate materialised by the orchestrator. See
|
||||
/// [`ContainerConfig::generated_certs`]. `crt`/`key` are absolute host paths
|
||||
/// (typically under `/var/lib/archipelago/<app>/`) that the container
|
||||
/// bind-mounts read-only. `common_name` and `sans` are rendered against host
|
||||
/// facts (`{{HOST_IP}}`) at apply time; when omitted they default to the
|
||||
/// node's host IP plus `IP:127.0.0.1,DNS:localhost` so the cert is valid for
|
||||
/// however the box is reached locally.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GeneratedCert {
|
||||
pub crt: String,
|
||||
pub key: String,
|
||||
#[serde(default)]
|
||||
pub common_name: Option<String>,
|
||||
#[serde(default)]
|
||||
pub sans: Vec<String>,
|
||||
}
|
||||
|
||||
fn default_pull_policy() -> String {
|
||||
"if-not-present".to_string()
|
||||
}
|
||||
@ -665,6 +702,18 @@ impl AppManifest {
|
||||
}
|
||||
}
|
||||
|
||||
// generated_certs: crt/key must be non-empty absolute paths with no
|
||||
// traversal (they become bind-mount sources, same safety bar as files).
|
||||
for (i, c) in self.app.container.generated_certs.iter().enumerate() {
|
||||
for (field, val) in [("crt", &c.crt), ("key", &c.key)] {
|
||||
if val.is_empty() || !val.starts_with('/') || val.contains("..") {
|
||||
return Err(ManifestError::Invalid(format!(
|
||||
"container.generated_certs[{i}].{field} must be an absolute path with no '..', got '{val}'"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// data_uid: if set, must look like "NNNNN:NNNNN".
|
||||
if let Some(u) = &self.app.container.data_uid {
|
||||
let parts: Vec<&str> = u.split(':').collect();
|
||||
@ -1711,6 +1760,7 @@ app:
|
||||
],
|
||||
secret_env: vec![],
|
||||
generated_secrets: vec![],
|
||||
generated_certs: vec![],
|
||||
data_uid: None,
|
||||
};
|
||||
let facts = HostFacts {
|
||||
@ -1762,6 +1812,7 @@ app:
|
||||
},
|
||||
],
|
||||
generated_secrets: vec![],
|
||||
generated_certs: vec![],
|
||||
data_uid: None,
|
||||
};
|
||||
let p = MapSecretsProvider {
|
||||
@ -1799,6 +1850,7 @@ app:
|
||||
secret_file: "bitcoin-rpc-password".to_string(),
|
||||
}],
|
||||
generated_secrets: vec![],
|
||||
generated_certs: vec![],
|
||||
data_uid: None,
|
||||
};
|
||||
let p = MapSecretsProvider {
|
||||
|
||||
@ -124,7 +124,9 @@ impl PodmanClient {
|
||||
"nginx-proxy-manager" => "http://localhost:8081",
|
||||
"fedimint-gateway" => "http://localhost:8176",
|
||||
"endurain" => "http://localhost:8080",
|
||||
"netbird" => "http://localhost:8087",
|
||||
// HTTPS: netbird's dashboard needs a secure context for OIDC PKCE
|
||||
// (window.crypto.subtle), so the proxy serves TLS on 8087 (issue #15).
|
||||
"netbird" => "https://localhost:8087",
|
||||
"electrs" | "archy-electrs-ui" => "http://localhost:50002",
|
||||
_ => return None,
|
||||
};
|
||||
@ -275,10 +277,18 @@ impl PodmanClient {
|
||||
// Build the container spec for the API
|
||||
let mut port_mappings = Vec::new();
|
||||
for port in &manifest.app.ports {
|
||||
// Honour the manifest's protocol (default tcp). netbird's STUN port
|
||||
// is 3478/udp; forcing tcp here would publish the wrong protocol and
|
||||
// silently break relay discovery.
|
||||
let protocol = match port.protocol.to_ascii_lowercase().as_str() {
|
||||
"udp" => "udp",
|
||||
"sctp" => "sctp",
|
||||
_ => "tcp",
|
||||
};
|
||||
port_mappings.push(serde_json::json!({
|
||||
"container_port": port.container,
|
||||
"host_port": port.host,
|
||||
"protocol": "tcp",
|
||||
"protocol": protocol,
|
||||
}));
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user