diff --git a/core/archipelago/src/api/rpc/bitcoin_relay.rs b/core/archipelago/src/api/rpc/bitcoin_relay.rs index b7eaf421..57a4b1cd 100644 --- a/core/archipelago/src/api/rpc/bitcoin_relay.rs +++ b/core/archipelago/src/api/rpc/bitcoin_relay.rs @@ -862,7 +862,9 @@ async fn hydrate_tor_endpoint(data_dir: &Path, state: &mut BitcoinRelayState) { let onion = onion.trim().trim_end_matches('/').to_string(); if !onion.is_empty() { state.settings.tor_endpoint = Some(format!("http://{onion}/")); - let _ = save_relay_state(data_dir, state).await; + if let Err(e) = save_relay_state(data_dir, state).await { + tracing::warn!("Failed to persist relay tor endpoint: {e:#}"); + } } } } diff --git a/core/archipelago/src/api/rpc/content.rs b/core/archipelago/src/api/rpc/content.rs index d918b9b3..37366154 100644 --- a/core/archipelago/src/api/rpc/content.rs +++ b/core/archipelago/src/api/rpc/content.rs @@ -267,13 +267,16 @@ impl RpcHandler { .context("Failed to connect to peer")?; // Record which transport actually reached the peer (B14) so the UI // reflects FIPS vs Tor truthfully instead of always showing Tor/none. - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if response.status() == reqwest::StatusCode::PAYMENT_REQUIRED { let body: serde_json::Value = response.json().await.unwrap_or_default(); @@ -348,13 +351,16 @@ impl RpcHandler { .await .context("Failed to connect to peer")?; // Record which transport actually reached the peer (B14). - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if !response.status().is_success() { return Err(anyhow::anyhow!( @@ -497,13 +503,16 @@ impl RpcHandler { } }; // Record which transport actually reached the peer (B14). - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if response.status() == reqwest::StatusCode::PAYMENT_REQUIRED { // Payment was rejected by the seller. Surface the most likely cause @@ -763,13 +772,16 @@ impl RpcHandler { })); } }; - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if response.status() == reqwest::StatusCode::PAYMENT_REQUIRED { return Ok(serde_json::json!({ @@ -951,13 +963,16 @@ impl RpcHandler { })); } }; - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if response.status() == reqwest::StatusCode::PAYMENT_REQUIRED { return Ok(serde_json::json!({ @@ -1019,13 +1034,16 @@ impl RpcHandler { .await .context("Failed to connect to peer for preview")?; // Record which transport actually reached the peer (B14). - let _ = crate::federation::record_peer_transport( + if let Err(e) = crate::federation::record_peer_transport( &self.config.data_dir, None, Some(onion), &transport.to_string(), ) - .await; + .await + { + tracing::warn!("Failed to persist peer transport badge: {e:#}"); + } if !response.status().is_success() { return Err(anyhow::anyhow!( diff --git a/core/archipelago/src/api/rpc/mesh/status.rs b/core/archipelago/src/api/rpc/mesh/status.rs index 9f1e91e5..7118c4a9 100644 --- a/core/archipelago/src/api/rpc/mesh/status.rs +++ b/core/archipelago/src/api/rpc/mesh/status.rs @@ -1,6 +1,7 @@ use super::super::RpcHandler; use crate::mesh; use anyhow::Result; +use tracing::warn; impl RpcHandler { /// mesh.status โ€” Get mesh radio status, device info, and peer count. @@ -283,7 +284,9 @@ impl RpcHandler { let mut set = state.radio_contact_blocklist.write().await; set.clear(); } - let _ = crate::mesh::save_ignored_radio_contacts(&data_dir, &[]).await; + if let Err(e) = crate::mesh::save_ignored_radio_contacts(&data_dir, &[]).await { + warn!("Failed to persist cleared radio-contact blocklist: {e:#}"); + } // Actually DELETE each radio contact from the firmware table (via // CMD_REMOVE_CONTACT) so wiped peers don't just reappear on the next diff --git a/core/archipelago/src/api/rpc/package/install.rs b/core/archipelago/src/api/rpc/package/install.rs index b8b9dc1c..e01598ab 100644 --- a/core/archipelago/src/api/rpc/package/install.rs +++ b/core/archipelago/src/api/rpc/package/install.rs @@ -32,6 +32,8 @@ pub(in crate::api::rpc) async fn install_log(msg: &str) { .open(INSTALL_LOG) .await { + // Fire-and-forget by design: install-log lines are advisory, and a + // per-line warn would spam the very log stream that's failing. let _ = f.write_all(line.as_bytes()).await; } } diff --git a/core/archipelago/src/container/registry.rs b/core/archipelago/src/container/registry.rs index 621b09ed..c413e25f 100644 --- a/core/archipelago/src/container/registry.rs +++ b/core/archipelago/src/container/registry.rs @@ -155,7 +155,9 @@ pub async fn load_registries(data_dir: &Path) -> Result { .collect::>(); if changed { // Persist so the next load doesn't have to re-merge. - let _ = save_registries(data_dir, &config).await; + if let Err(e) = save_registries(data_dir, &config).await { + tracing::warn!("Failed to persist migrated registry config: {e:#}"); + } } Ok(config) } diff --git a/core/archipelago/src/federation/storage.rs b/core/archipelago/src/federation/storage.rs index 6b23bfd0..8ff3a525 100644 --- a/core/archipelago/src/federation/storage.rs +++ b/core/archipelago/src/federation/storage.rs @@ -166,7 +166,12 @@ pub async fn add_node(data_dir: &Path, node: FederatedNode) -> Result Result, state: &Arc) { } q.retain(|m| !to_remove.contains(&m.id)); } - let _ = scheduler.save().await; + if let Err(e) = scheduler.save().await { + warn!("Failed to persist mesh outbox after sweep: {e:#}"); + } } /// Hand a due message to the radio. Returns true if it was sent (or should be diff --git a/core/archipelago/src/server.rs b/core/archipelago/src/server.rs index 093557c4..807a27f7 100644 --- a/core/archipelago/src/server.rs +++ b/core/archipelago/src/server.rs @@ -267,7 +267,10 @@ impl Server { info!("๐Ÿ“ก Auto-detected mesh radio: {:?} โ€” enabling mesh", devices); mesh_config.enabled = true; mesh_config.device_path = Some(devices[0].clone()); - let _ = crate::mesh::save_config(&data_dir, &mesh_config).await; + if let Err(e) = crate::mesh::save_config(&data_dir, &mesh_config).await + { + warn!("Failed to persist auto-detected mesh config: {e:#}"); + } } } diff --git a/core/archipelago/src/update.rs b/core/archipelago/src/update.rs index 2eda9c1a..5a0744d4 100644 --- a/core/archipelago/src/update.rs +++ b/core/archipelago/src/update.rs @@ -160,7 +160,9 @@ pub async fn load_mirrors(data_dir: &Path) -> Result> { force_ovh_update_primary(&mut list); changed = changed || before_order != list.iter().map(|m| m.url.clone()).collect::>(); if changed { - let _ = save_mirrors(data_dir, &list).await; + if let Err(e) = save_mirrors(data_dir, &list).await { + warn!("Failed to persist migrated update-mirrors list: {e:#}"); + } } Ok(list) } @@ -1175,14 +1177,22 @@ async fn download_component_resumable( } } if canceled { + // Best-effort: the partial file is re-validated (size + sha) + // when the download resumes, so a lost tail here is harmless. let _ = file.flush().await; drop(file); DOWNLOAD_TOTAL.store(0, Ordering::Relaxed); DOWNLOAD_BYTES.store(0, Ordering::Relaxed); anyhow::bail!("Download canceled"); } - let _ = file.flush().await; - let _ = file.sync_all().await; + // A failed flush/sync surfaces later as a size/sha mismatch and a + // resume, but log it โ€” this is where ENOSPC first shows itself. + if let Err(e) = file.flush().await { + warn!("Staging file flush failed: {e:#}"); + } + if let Err(e) = file.sync_all().await { + warn!("Staging file sync failed: {e:#}"); + } drop(file); if stream_err { continue; @@ -1265,7 +1275,9 @@ pub async fn cancel_download(data_dir: &Path) -> Result<()> { if let Ok(mut state) = load_state(data_dir).await { if state.update_in_progress { state.update_in_progress = false; - let _ = save_state(data_dir, &state).await; + if let Err(e) = save_state(data_dir, &state).await { + warn!("Failed to persist cleared update_in_progress: {e:#}"); + } cleared_marker = true; } } diff --git a/docs/1.8.0-RELEASE-HARDENING-PLAN.md b/docs/1.8.0-RELEASE-HARDENING-PLAN.md index 115753da..68bdc07f 100644 --- a/docs/1.8.0-RELEASE-HARDENING-PLAN.md +++ b/docs/1.8.0-RELEASE-HARDENING-PLAN.md @@ -107,11 +107,13 @@ end-to-end on real hardware. Note: the `.unwrap()`/`panic!` worry is a **non-issue** โ€” nearly all are in test modules; production request/boot paths are essentially panic-free. The real risks: -- [ ] ๐ŸŸ  **Log swallowed persistence writes.** ~30-40 dangerous `let _ = save_*().await` - sites discard durability failures with zero diagnostics: `server.rs:270` (mesh config), - `bitcoin_relay.rs:865` (relay state), `update.rs:163/1223` (mirrors/update state), - `registry.rs:158`, `mesh/status.rs:286`, `scheduler.rs:179`, `install.rs:34`. Convert to - `if let Err(e) = โ€ฆ { warn!(โ€ฆ) }`; leave genuinely fire-and-forget ones commented. +- [x] ๐ŸŸ  **Log swallowed persistence writes.** DONE 2026-07-02 (full-workspace re-inventory + found 19 production sites): 16 converted to `if let Err(e) = โ€ฆ { warn!(โ€ฆ) }` โ€” mesh + config (`server.rs`), relay tor endpoint (`bitcoin_relay.rs`), update mirrors/state + + staging flush/sync (`update.rs`), registry config, radio-contact blocklist, mesh outbox + sweep (`scheduler.rs`), block-header cache (`mesh/mod.rs`), 7ร— peer-transport badge + (`sync.rs` + `content.rs`). Federation tombstone/untombstone upgraded to hard errors + (see ยงI). Install-log line write left fire-and-forget with an explanatory comment. - [ ] ๐ŸŸ  **Remove blocking `std::process::Command` from async handlers.** `install.rs:2222` `published_host_port` (sync podman on the install path), `dependencies.rs:316` (`df`), `system/handlers.rs:578` (`sudo`), `transport/fips.rs:50` @@ -271,10 +273,10 @@ media (latest artifact only one minor behind). - [~] ๐ŸŸ  **Multinode gate pass** โ€” 5ร— destructive gate was launched on node `.5`; bring the rest of the fleet to precondition, then run the existing (undocumented-but-present) `tests/multinode/{smoke,meshtastic}.sh` cross-node suites. -- [ ] ๐ŸŸ  **Federation `remove-node` tombstone regression.** - `federation/storage.rs:187` does `let _ = tombstone_did(...)` โ€” swallows the write error, - so a removed peer reappears after the next sync. (This is a specific, confirmed instance - of the ยงC swallowed-writes class.) Needs a careful fix + `smoke.sh` re-verify. +- [~] ๐ŸŸ  **Federation `remove-node` tombstone regression.** Code fix DONE 2026-07-02: + `remove_node` now tombstones BEFORE trimming the node list and propagates the write + error (idempotent, so retries are clean); `add_node`'s untombstone likewise propagates + before mutating. **Still open: `tests/multinode/smoke.sh` re-verify on real nodes.** - [ ] ๐ŸŸ  **Phase-3 Quadlet default-flip** โ€” validated + opt-in on .228/.198; flip `config.rs:256` once the .5 gate reports clean. - [ ] ๐ŸŸ  **Developer CLI suite** (`archy app validate/render/install/test`) โ€” gates external @@ -300,5 +302,7 @@ media (latest artifact only one minor behind). 6. **Before public GA only (NOT alpha/beta):** remove + rotate the Anthropic key (ยงF) โ€” intentionally left in for frictionless AI during alpha/beta. -*Last updated: 2026-07-02 (initial deep-audit synthesis). Update this line + tick -boxes with commit shas as items land.* +*Last updated: 2026-07-02 PM (hardening session 1: ยงA anchor+catalog `1977bdef`, ยงA +manifest signature `51647b21`, ยงD origin checks `206d5fe8`, ยงC swallowed writes + +ยงI tombstone fix โ€” this commit). Update this line + tick boxes with commit shas as +items land.*