diff --git a/core/archipelago/src/fips/dial.rs b/core/archipelago/src/fips/dial.rs index d48503d8..3749e457 100644 --- a/core/archipelago/src/fips/dial.rs +++ b/core/archipelago/src/fips/dial.rs @@ -93,17 +93,61 @@ pub async fn peer_base_url(npub: &str) -> Result { Ok(format!("http://[{}]:{}", ip, PEER_PORT)) } -/// Build an HTTP client tuned for FIPS peer-to-peer dialing. No proxy, -/// short timeout — fall back to Tor on failure. +/// Build an HTTP client tuned for FIPS peer-to-peer dialing. No proxy. +/// `connect_timeout` is generous enough to let NAT hole-punching complete on +/// the first dial (FIPS is UDP hole-punched; the path often isn't established +/// until the first packets flow), so a reachable-but-cold peer isn't abandoned +/// to Tor prematurely. Reliability over latency — FIPS is the preferred path. pub fn client() -> reqwest::Client { reqwest::Client::builder() .timeout(Duration::from_secs(20)) - .connect_timeout(Duration::from_secs(5)) + .connect_timeout(Duration::from_secs(8)) .user_agent("archipelago-fips/1") .build() .expect("static reqwest client config") } +/// Send a FIPS request with ONE retry on a connect/timeout error. +/// +/// The first dial to a peer typically triggers NAT hole-punching and can time +/// out before the overlay path is established; a quick retry then lands on the +/// now-warm path. Without this, a single cold-path failure drops the call to +/// Tor even though the peer is FIPS-reachable — the main reason FIPS "isn't +/// robust". Only connect/timeout errors are retried (a real HTTP response, +/// including 4xx/5xx, is returned as-is for the caller to interpret). +async fn send_with_retry(rb: reqwest::RequestBuilder) -> Result { + let retry = rb.try_clone(); + match rb.send().await { + Ok(resp) => Ok(resp), + Err(e) if (e.is_connect() || e.is_timeout()) && retry.is_some() => { + // Brief pause so the hole-punch packets from the first attempt can + // traverse before we re-dial onto the warmed path. + tokio::time::sleep(Duration::from_millis(600)).await; + retry.expect("retry builder present").send().await + } + Err(e) => Err(e), + } +} + +/// Proactively warm the hole-punched FIPS path to a peer: resolve its overlay +/// address and open a short connection to its peer listener. Hole-punched +/// paths and NAT mappings go cold after ~30-60s of no traffic, after which the +/// next real dial pays the full re-punch cost and often falls back to Tor. +/// Keeping the path warm is what makes FIPS the transport that actually gets +/// used. Best-effort: any error (peer offline, UDP blocked) is ignored — the +/// connection attempt itself is what re-punches and refreshes the path. +pub async fn warm_path(npub: &str) { + if !is_service_active().await { + return; + } + let Ok(base) = peer_base_url(npub).await else { + return; + }; + let c = client(); + // The response status is irrelevant; establishing the connection warms it. + let _ = tokio::time::timeout(Duration::from_secs(8), c.get(&base).send()).await; +} + // ── DNS wire-format helpers ───────────────────────────────────────────── fn encode_query(id: u16, npub: &str) -> Result> { @@ -374,10 +418,14 @@ impl<'a> PeerRequest<'a> { for (k, v) in &self.headers { rb = rb.header(*k, v); } - match rb.send().await { + match send_with_retry(rb).await { Ok(r) => Ok(Some(r)), Err(e) => { - tracing::debug!("FIPS POST {} failed: {}, falling back to Tor", url, e); + tracing::debug!( + "FIPS POST {} failed after retry: {}, falling back to Tor", + url, + e + ); Ok(None) } } @@ -403,10 +451,14 @@ impl<'a> PeerRequest<'a> { for (k, v) in &self.headers { rb = rb.header(*k, v); } - match rb.send().await { + match send_with_retry(rb).await { Ok(r) => Ok(Some(r)), Err(e) => { - tracing::debug!("FIPS GET {} failed: {}, falling back to Tor", url, e); + tracing::debug!( + "FIPS GET {} failed after retry: {}, falling back to Tor", + url, + e + ); Ok(None) } } diff --git a/core/archipelago/src/fips/mod.rs b/core/archipelago/src/fips/mod.rs index b28da0f2..90e4ee4e 100644 --- a/core/archipelago/src/fips/mod.rs +++ b/core/archipelago/src/fips/mod.rs @@ -33,6 +33,63 @@ pub mod service; pub mod update; use serde::{Deserialize, Serialize}; + +/// Auto-activate FIPS with no user interaction. Once seed onboarding has +/// materialised the fips key, install the daemon config + start the service if +/// it isn't already up. Idempotent and best-effort: FIPS is the preferred +/// transport and should come up on its own — the UI "Activate" button is now a +/// manual fallback, not a requirement. No-op pre-onboarding (no key yet) or +/// when the service is already active. +pub async fn ensure_activated(data_dir: &std::path::Path) { + let identity_dir = identity_dir_from(data_dir); + if !identity_dir.join("fips_key").exists() { + return; // pre-onboarding: nothing to activate yet + } + if dial::is_service_active().await { + return; // already up + } + tracing::info!("FIPS inactive — auto-activating (no user interaction needed)"); + if let Err(e) = config::install(&identity_dir).await { + tracing::warn!("FIPS auto-activate: config install failed: {:#}", e); + return; + } + if let Err(e) = service::activate(SERVICE_UNIT).await { + tracing::warn!("FIPS auto-activate: service activate failed: {:#}", e); + return; + } + tracing::info!("FIPS auto-activated"); +} + +/// Spawn the FIPS supervisor: every 45s it (1) auto-activates FIPS if onboarding +/// is done but the service is down — so it comes up with zero user interaction, +/// and (2) keeps hole-punched paths to known federation peers warm, so on-demand +/// dials land on FIPS instead of falling back to Tor. Warms peers concurrently +/// so one slow/offline peer doesn't delay the rest. +pub fn spawn_fips_supervisor(data_dir: std::path::PathBuf) { + tokio::spawn(async move { + let mut tick = tokio::time::interval(std::time::Duration::from_secs(45)); + loop { + tick.tick().await; + // Bring FIPS up on its own once onboarding has materialised the key. + ensure_activated(&data_dir).await; + if !dial::is_service_active().await { + continue; + } + let nodes = crate::federation::load_nodes(&data_dir) + .await + .unwrap_or_default(); + let mut handles = Vec::new(); + for node in nodes { + if let Some(npub) = node.fips_npub.clone() { + handles.push(tokio::spawn(async move { dial::warm_path(&npub).await })); + } + } + for h in handles { + let _ = h.await; + } + } + }); +} use std::path::{Path, PathBuf}; /// Systemd unit name supervised by archipelago. diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index b78720fe..a18db1cb 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -311,6 +311,11 @@ async fn main() -> Result<()> { electrs_status::spawn_status_cache(); bitcoin_status::spawn_status_cache(); + // FIPS supervisor: auto-activate FIPS after onboarding (no Activate button + // needed) and keep hole-punched paths to federation peers warm so peer dials + // land on FIPS (the preferred transport) instead of falling back to Tor. + fips::spawn_fips_supervisor(config.data_dir.clone()); + let startup_ms = startup_start.elapsed().as_millis(); info!( "Server listening on http://{} (startup: {}ms)",