Dorian 84943aaa04 feat(server): lazy-bind FIPS peer listener so fips.install doesn't
need an archipelago restart

Previously the server checked `fips0` once at startup; if the
interface wasn't up (pre-onboarding, or post-onboarding before the
user clicked Activate FIPS), the peer listener never bound and stayed
unreachable until the next archipelago restart.

Replaced with a `peer_late_bind_loop` background task: polls every
30s for an fd00::/8 address on `fips0` and binds the listener the
moment one appears. First tick fires immediately so the hot path —
fips0 already up at startup — is still zero-cost. Cancellation
cascades through the same `tokio::sync::watch` channel the main
listener uses.

Side effects:
- main.rs no longer computes peer_addr eagerly; dropped the unused
  param from serve_with_shutdown.
- FipsTransport::is_available already caches the service probe so
  the 30s poll doesn't thrash systemctl.

Covers task #21. Unblocks the first-boot + onboarding flow for
fresh ISO installs on .253.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 04:21:20 -04:00

225 lines
7.5 KiB
Rust

// Archipelago Bitcoin Node OS - Native Backend
// Pure Archipelago implementation, no StartOS dependencies
// Crate-level clippy allowances. These are stylistic lints that fire on
// large legacy surfaces and offer no correctness benefit to chase on every
// PR — suppressing them crate-wide keeps CI gating on correctness issues
// without drowning in cleanup noise every time a new toolchain tightens.
#![allow(
clippy::too_many_arguments,
clippy::doc_lazy_continuation,
clippy::type_complexity,
clippy::enum_variant_names,
clippy::wildcard_in_or_patterns,
clippy::assertions_on_constants,
clippy::drop_non_drop,
clippy::unused_io_amount,
clippy::ptr_arg
)]
use anyhow::{Context, Result};
use std::net::SocketAddr;
use tokio::signal;
use tracing::info;
mod api;
mod auth;
mod backup;
mod bitcoin_rpc;
mod blobs;
mod config;
mod constants;
mod container;
mod content_server;
mod crash_recovery;
mod credentials;
mod data_model;
mod disk_monitor;
mod electrs_status;
mod federation;
mod fips;
mod health_monitor;
mod identity;
mod identity_manager;
mod marketplace;
mod mesh;
mod monitoring;
mod names;
mod network;
mod node_message;
mod nostr_discovery;
mod nostr_handshake;
mod nostr_relays;
mod peers;
mod port_allocator;
mod rate_limit;
pub mod seed;
mod server;
mod session;
mod settings;
mod state;
mod streaming;
mod totp;
mod transport;
mod update;
mod vpn;
mod wallet;
mod webhooks;
use config::Config;
use server::Server;
#[tokio::main]
async fn main() -> Result<()> {
let startup_start = std::time::Instant::now();
crash_recovery::init_start_time();
// Initialize tracing
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| "archipelago=debug,info".into()),
)
.init();
info!("Starting Archipelago Bitcoin Node OS");
// Load configuration
let config = Config::load().await?;
info!("📁 Data directory: {}", config.data_dir.display());
// Load user transport preferences so peer-to-peer call sites can
// consult them from any module without threading a handle through
// deep async chains. Missing/corrupt file → default (Auto everywhere).
if let Err(e) = settings::transport::init(&config.data_dir).await {
tracing::warn!(
"Failed to initialise transport preferences: {} — using defaults",
e
);
}
// Write PID marker early so we can detect crashes on next startup
crash_recovery::write_pid_marker(&config.data_dir).await?;
// Crash recovery runs in background so health endpoint is available immediately
{
let data_dir = config.data_dir.clone();
tokio::spawn(async move {
// Check if previous instance shut down cleanly
match crash_recovery::check_for_crash(&data_dir).await {
Ok(Some(containers)) => {
info!(
"🔧 Recovering {} containers from previous crash...",
containers.len()
);
let report = crash_recovery::recover_containers(&containers).await;
info!(
"🔧 Recovery complete: {}/{} containers restarted (failed: {:?})",
report.recovered, report.total, report.failed
);
}
Ok(None) => {}
Err(e) => {
tracing::warn!("Crash recovery check failed: {}", e);
}
}
// Start any stopped containers (handles clean reboot)
// Skips user-stopped containers, uses tier ordering
let boot_report = crash_recovery::start_stopped_containers(&data_dir).await;
if boot_report.total > 0 {
info!(
"🔄 Boot startup: {}/{} containers started (failed: {:?})",
boot_report.recovered, boot_report.total, boot_report.failed
);
}
// Signal to health monitor that boot recovery is done
crash_recovery::mark_recovery_complete();
// Boot reconciliation disabled — the reconciler creates ALL containers
// from specs, which is wrong on unbundled installs where only user-chosen
// apps should exist. The health monitor handles restarting existing
// containers. Run reconcile-containers.sh manually when needed.
// crash_recovery::run_boot_reconciliation().await;
});
}
// Ensure a default user exists so login works after install/onboarding.
// In production, the default password is "password123" (shown during install).
// In dev mode, the dev default password is used.
// Don't auto-create default user — let onboarding flow handle password setup
// via auth.setup RPC. The Login page detects is_setup=false and shows
// "Create Password" form instead of login form.
// Create server
let server = Server::new(config.clone()).await?;
// Start server
let addr: SocketAddr = format!("{}:{}", config.bind_host, config.bind_port)
.parse()
.context("Invalid bind address")?;
// The FIPS peer listener is bound lazily by server::serve_with_shutdown
// on a 30s poll of fips0 — so a post-onboarding fips.install brings it
// online without needing an archipelago restart.
// Spawn background update scheduler
let update_data_dir = config.data_dir.clone();
tokio::spawn(async move {
update::run_update_scheduler(update_data_dir).await;
});
// Spawn periodic container snapshot (for crash recovery)
crash_recovery::spawn_snapshot_task(config.data_dir.clone());
// Spawn disk space monitor (warns at 85%, auto-cleans at 90%)
disk_monitor::spawn_disk_monitor(config.data_dir.clone());
// Spawn ElectrumX status cache (refreshes every 15s, serves cached data to avoid race conditions)
electrs_status::spawn_status_cache();
let startup_ms = startup_start.elapsed().as_millis();
info!(
"Server listening on http://{} (startup: {}ms)",
addr, startup_ms
);
info!("RPC API: http://{}/rpc/v1", addr);
info!("WebSocket: ws://{}/ws", addr);
// Notify systemd that we're ready (Type=notify)
// Note: first param `false` keeps NOTIFY_SOCKET so watchdog pings work
let _ = sd_notify::notify(false, &[sd_notify::NotifyState::Ready]);
// Spawn systemd watchdog ping (WatchdogSec=300, ping every 120s)
tokio::spawn(async {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(120));
loop {
interval.tick().await;
let _ = sd_notify::notify(false, &[sd_notify::NotifyState::Watchdog]);
}
});
// Graceful shutdown: wait for SIGTERM or SIGINT
let mut sigterm = signal::unix::signal(signal::unix::SignalKind::terminate())
.context("Failed to register SIGTERM handler")?;
let shutdown = async move {
tokio::select! {
_ = signal::ctrl_c() => {
info!("Received SIGINT (Ctrl+C), initiating graceful shutdown...");
}
_ = sigterm.recv() => {
info!("Received SIGTERM, initiating graceful shutdown...");
}
}
};
server.serve_with_shutdown(addr, shutdown).await?;
// Clean shutdown: remove PID marker so next startup doesn't trigger recovery
crash_recovery::remove_pid_marker(&config.data_dir).await;
info!("Archipelago shut down cleanly");
Ok(())
}