fix(vpn,reconcile): restore WG peers on boot + filebrowser spec drift
Follow-up to 8b7cb002 (no version bump — same v1.7.0-alpha manifest): * WireGuard peer persistence. Kernel peer state is ephemeral; the add-peer RPC wrote each peer to data_dir/nostr-vpn/peers/*.json but nothing re-pushed them on reboot. Result on .198: wg0 came up listening with zero peers after last night's reboot. Added vpn::restore_wg_peers() — reads the peers dir, waits up to 30s for wg0 to exist, then replays each via `archipelago-wg add-peer`. Spawned from main.rs alongside the other startup tasks. * Reconcile + filebrowser drift. scripts/container-specs.sh load_spec_ filebrowser now declares SPEC_NETWORK="archy-net" (to match what first-boot-containers.sh creates) and pins the filebrowser-data volume + wget-style healthcheck so the reconciler stops reporting network drift. Without this, reconcile would kill the healthy first-boot filebrowser container and recreate it on bridge, breaking the archy-net DNS name the backend proxies to. Manifest binary sha/size refreshed: 6c178a76…3582cc, 40361912 bytes. Rebuilt ISO at image-recipe/results/archipelago-installer-unbundled-x86_64.iso (Apr 20 07:10) carries both fixes baked in. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
8b7cb0029f
commit
56d4875b35
@ -176,6 +176,14 @@ async fn main() -> Result<()> {
|
||||
// Spawn disk space monitor (warns at 85%, auto-cleans at 90%)
|
||||
disk_monitor::spawn_disk_monitor(config.data_dir.clone());
|
||||
|
||||
// Restore WireGuard peers into wg0 (kernel loses them on every reboot).
|
||||
{
|
||||
let data_dir = config.data_dir.clone();
|
||||
tokio::spawn(async move {
|
||||
vpn::restore_wg_peers(&data_dir).await;
|
||||
});
|
||||
}
|
||||
|
||||
// Spawn ElectrumX status cache (refreshes every 15s, serves cached data to avoid race conditions)
|
||||
electrs_status::spawn_status_cache();
|
||||
|
||||
|
||||
@ -708,6 +708,89 @@ pub async fn configure_wireguard(
|
||||
Ok(wg_config)
|
||||
}
|
||||
|
||||
/// Restore WireGuard peers from `data_dir/nostr-vpn/peers/*.json` into the
|
||||
/// kernel after a reboot.
|
||||
///
|
||||
/// Kernel peer state is ephemeral. The add-peer RPC persists each peer to
|
||||
/// a JSON file but only pushes it into wg0 at creation time — without this
|
||||
/// restore step, every reboot drops all peers and the user has to re-add
|
||||
/// them via QR.
|
||||
pub async fn restore_wg_peers(data_dir: &Path) {
|
||||
let peers_dir = data_dir.join("nostr-vpn/peers");
|
||||
let mut entries = match fs::read_dir(&peers_dir).await {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
|
||||
// archipelago-wg.service may race us on boot; wait up to 30s for wg0.
|
||||
for _ in 0..30 {
|
||||
let up = tokio::process::Command::new("ip")
|
||||
.args(["link", "show", "wg0"])
|
||||
.output()
|
||||
.await
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false);
|
||||
if up {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
}
|
||||
|
||||
let mut restored = 0usize;
|
||||
while let Ok(Some(entry)) = entries.next_entry().await {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) != Some("json") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let content = match fs::read_to_string(&path).await {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!("VPN restore: read {:?} failed: {}", path, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let v: serde_json::Value = match serde_json::from_str(&content) {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::warn!("VPN restore: parse {:?} failed: {}", path, e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let pubkey = v.get("public_key").and_then(|s| s.as_str());
|
||||
let ip = v.get("ip").and_then(|s| s.as_str());
|
||||
let (pubkey, ip) = match (pubkey, ip) {
|
||||
(Some(p), Some(i)) => (p, i),
|
||||
_ => {
|
||||
tracing::warn!("VPN restore: {:?} missing public_key or ip", path);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let out = tokio::process::Command::new("sudo")
|
||||
.args(["archipelago-wg", "add-peer", pubkey, ip])
|
||||
.output()
|
||||
.await;
|
||||
match out {
|
||||
Ok(o) if o.status.success() => restored += 1,
|
||||
Ok(o) => tracing::warn!(
|
||||
"VPN restore: add-peer failed for {}: {}",
|
||||
pubkey,
|
||||
String::from_utf8_lossy(&o.stderr).trim()
|
||||
),
|
||||
Err(e) => tracing::warn!("VPN restore: add-peer spawn failed: {}", e),
|
||||
}
|
||||
}
|
||||
|
||||
if restored > 0 {
|
||||
tracing::info!(
|
||||
"🔐 VPN: restored {} WireGuard peer(s) from {}",
|
||||
restored,
|
||||
peers_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@ -5,7 +5,9 @@
|
||||
"Fixes update.download hard-fail on nodes that have ~/archy checked out (the git-path fleet class: .228, .116). Root cause: handle_update_check's git path returned update_available=true + update_method=\"git\" but never populated state.available_update, so update.download / update.apply RPC calls errored with 'No update available to download' even though the UI advertised one.",
|
||||
"Frontend: SystemUpdate.vue now branches on update_method. When method==\"git\", renders a single 'Pull & Rebuild' action that calls update.git-apply (which runs ~/archy/scripts/self-update.sh: git pull → cargo build --release → frontend rebuild → systemctl restart archipelago). Manifest-path nodes continue to use the existing Download → Apply pipeline. Confirm modal and i18n strings (en + es) added for the git path.",
|
||||
"Forces OTA trigger for nodes already on 1.6.0-alpha (.198, .253) that otherwise saw 'I'm at manifest.version, nothing to do' and skipped the refreshed 1.6 artifacts.",
|
||||
"Container reconciler: scripts/reconcile-containers.sh no longer creates missing containers from the canonical tier spec. SPEC_OPTIONAL now defaults to true in container-specs.sh, so reconcile is strictly a REPAIR tool (fix ownership, restart crashed, recreate on drift). Containers come from exactly two sources: first-boot-containers.sh (baseline filebrowser on unbundled installs) and the package install RPC (every other app). Fixes the bug where fresh unbundled installs woke up 10 minutes after first boot with archy-mempool-db and archy-btcpay-db silently created by the reconcile timer."
|
||||
"Container reconciler: scripts/reconcile-containers.sh no longer creates missing containers from the canonical tier spec. SPEC_OPTIONAL now defaults to true in container-specs.sh, so reconcile is strictly a REPAIR tool (fix ownership, restart crashed, recreate on drift). Containers come from exactly two sources: first-boot-containers.sh (baseline filebrowser on unbundled installs) and the package install RPC (every other app). Fixes the bug where fresh unbundled installs woke up 10 minutes after first boot with archy-mempool-db and archy-btcpay-db silently created by the reconcile timer.",
|
||||
"filebrowser spec: now declares SPEC_NETWORK=archy-net (matching what first-boot-containers.sh creates) and the filebrowser-data volume. Without this the reconciler would see network drift (bridge≠archy-net) and churn the container on every cycle, dropping the archy-net DNS registration the backend uses to reach it.",
|
||||
"VPN: added vpn::restore_wg_peers() that reads data_dir/nostr-vpn/peers/*.json on startup and re-pushes each peer into the wg0 kernel state via `archipelago-wg add-peer`. Fixes the silent-drop-on-reboot bug: kernel peer state is ephemeral, the add-peer RPC only persisted to JSON, and nothing replayed them at boot. Nodes like .198 came back up after reboot with wg0 listening but zero peers."
|
||||
],
|
||||
"components": [
|
||||
{
|
||||
@ -13,8 +15,8 @@
|
||||
"current_version": "1.6.0-alpha",
|
||||
"new_version": "1.7.0-alpha",
|
||||
"download_url": "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.0-alpha/archipelago",
|
||||
"sha256": "87b945bd6d57825d95b7595409580df5d881cfdbb0f944e00c7c050ecce2a6f3",
|
||||
"size_bytes": 40300680
|
||||
"sha256": "6c178a76bf69853b00250f89e9f0c4974f0ecad9d3c10f328cb9661f5f3582cc",
|
||||
"size_bytes": 40361912
|
||||
},
|
||||
{
|
||||
"name": "archipelago-frontend-1.7.0-alpha.tar.gz",
|
||||
|
||||
Binary file not shown.
@ -453,10 +453,11 @@ load_spec_filebrowser() {
|
||||
reset_spec
|
||||
SPEC_NAME="filebrowser"
|
||||
SPEC_IMAGE="${FILEBROWSER_IMAGE}"
|
||||
SPEC_NETWORK="archy-net"
|
||||
SPEC_PORTS="8083:80"
|
||||
SPEC_VOLUMES="/var/lib/archipelago/filebrowser:/srv"
|
||||
SPEC_VOLUMES="/var/lib/archipelago/filebrowser:/srv /var/lib/archipelago/filebrowser-data:/data"
|
||||
SPEC_MEMORY="$(mem_limit filebrowser)"
|
||||
SPEC_HEALTH_CMD="curl -sf http://localhost:80/ || exit 1"
|
||||
SPEC_HEALTH_CMD="wget -q --spider http://localhost:80/health || exit 1"
|
||||
SPEC_TIER="3"
|
||||
SPEC_DATA_DIR="/var/lib/archipelago/filebrowser"
|
||||
SPEC_CAPS=""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user