2026-01-24 22:59:20 +00:00
|
|
|
|
use crate::api::ApiHandler;
|
2026-01-27 23:21:26 +00:00
|
|
|
|
use crate::config::{Config, ContainerRuntime};
|
2026-04-22 19:20:13 -04:00
|
|
|
|
use crate::container::{
|
|
|
|
|
|
docker_packages, ContainerOrchestrator, DevContainerOrchestrator, DockerPackageScanner,
|
|
|
|
|
|
};
|
2026-02-17 15:03:34 +00:00
|
|
|
|
use crate::identity::{self, NodeIdentity};
|
2026-03-11 11:11:02 +00:00
|
|
|
|
use crate::monitoring::MetricsStore;
|
2026-03-09 07:43:12 +00:00
|
|
|
|
use crate::node_message;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
use crate::nostr_discovery;
|
2026-03-12 12:56:59 +00:00
|
|
|
|
use crate::nostr_handshake;
|
2026-03-09 07:43:12 +00:00
|
|
|
|
use crate::peers;
|
2026-01-27 23:06:18 +00:00
|
|
|
|
use crate::state::StateManager;
|
2026-01-24 22:59:20 +00:00
|
|
|
|
use anyhow::Result;
|
Update archipelago: API, auth, container, parmanode, performance, security
- API handler, RPC, and server updates
- Auth and coding rules
- Container data manager, dev orchestrator, health monitor, podman client
- Parmanode script runner
- Performance resource manager
- Security container policies and secrets manager
- Add build scripts and documentation
2026-01-27 22:27:17 +00:00
|
|
|
|
use hyper::server::conn::Http;
|
|
|
|
|
|
use hyper::service::service_fn;
|
2026-04-02 01:28:11 +01:00
|
|
|
|
use std::collections::HashMap;
|
2026-01-24 22:59:20 +00:00
|
|
|
|
use std::net::SocketAddr;
|
2026-06-11 04:44:58 -04:00
|
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2026-01-24 22:59:20 +00:00
|
|
|
|
use std::sync::Arc;
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
use std::time::{Duration, Instant};
|
2026-06-11 00:24:32 -04:00
|
|
|
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
2026-01-24 22:59:20 +00:00
|
|
|
|
use tokio::net::TcpListener;
|
2026-03-11 11:11:02 +00:00
|
|
|
|
use tracing::{debug, error, info, warn};
|
2026-01-24 22:59:20 +00:00
|
|
|
|
|
|
|
|
|
|
pub struct Server {
|
Update archipelago: API, auth, container, parmanode, performance, security
- API handler, RPC, and server updates
- Auth and coding rules
- Container data manager, dev orchestrator, health monitor, podman client
- Parmanode script runner
- Performance resource manager
- Security container policies and secrets manager
- Add build scripts and documentation
2026-01-27 22:27:17 +00:00
|
|
|
|
_config: Config,
|
2026-02-17 15:03:34 +00:00
|
|
|
|
_identity: Arc<NodeIdentity>,
|
2026-01-24 22:59:20 +00:00
|
|
|
|
api_handler: Arc<ApiHandler>,
|
2026-02-01 05:42:05 +00:00
|
|
|
|
_state_manager: Arc<StateManager>,
|
2026-01-24 22:59:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-11 04:44:58 -04:00
|
|
|
|
struct ContainerScanGuard<'a> {
|
|
|
|
|
|
scanning: &'a AtomicBool,
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<'a> ContainerScanGuard<'a> {
|
|
|
|
|
|
fn try_acquire(scanning: &'a AtomicBool) -> Option<Self> {
|
|
|
|
|
|
scanning
|
|
|
|
|
|
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
|
|
|
|
|
|
.ok()
|
|
|
|
|
|
.map(|_| Self { scanning })
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl Drop for ContainerScanGuard<'_> {
|
|
|
|
|
|
fn drop(&mut self) {
|
|
|
|
|
|
self.scanning.store(false, Ordering::Release);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-24 22:59:20 +00:00
|
|
|
|
impl Server {
|
2026-04-22 19:20:13 -04:00
|
|
|
|
pub async fn new(
|
|
|
|
|
|
config: Config,
|
|
|
|
|
|
orchestrator: Option<Arc<dyn ContainerOrchestrator>>,
|
|
|
|
|
|
dev_orchestrator: Option<Arc<DevContainerOrchestrator>>,
|
|
|
|
|
|
) -> Result<Self> {
|
2026-01-27 23:06:18 +00:00
|
|
|
|
let state_manager = Arc::new(StateManager::new());
|
2026-02-17 15:03:34 +00:00
|
|
|
|
|
2026-03-31 00:50:43 +01:00
|
|
|
|
// Load node identity and set stable server_info.
|
|
|
|
|
|
// Detect seed-backed vs legacy vs fresh install.
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let identity_dir = config.data_dir.join("identity");
|
2026-03-31 00:50:43 +01:00
|
|
|
|
let has_seed = crate::seed::seed_exists(&config.data_dir);
|
|
|
|
|
|
let has_node_key = NodeIdentity::key_exists(&identity_dir);
|
|
|
|
|
|
|
|
|
|
|
|
let identity = if has_node_key {
|
|
|
|
|
|
// Existing keys on disk (seed-derived or legacy random) — load them.
|
|
|
|
|
|
NodeIdentity::load_or_create(&identity_dir).await?
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Fresh install — create a temporary identity.
|
|
|
|
|
|
// Onboarding will overwrite this with seed-derived keys.
|
|
|
|
|
|
NodeIdentity::load_or_create(&identity_dir).await?
|
|
|
|
|
|
};
|
|
|
|
|
|
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let (mut data, _) = state_manager.get_snapshot().await;
|
|
|
|
|
|
data.server_info.id = identity.node_id();
|
|
|
|
|
|
data.server_info.pubkey = identity.pubkey_hex();
|
2026-03-31 00:50:43 +01:00
|
|
|
|
data.server_info.seed_backed = has_seed;
|
2026-03-14 03:00:29 +00:00
|
|
|
|
// Load persisted server name
|
|
|
|
|
|
let name_file = config.data_dir.join("server-name");
|
|
|
|
|
|
if let Ok(name) = tokio::fs::read_to_string(&name_file).await {
|
|
|
|
|
|
let name = name.trim().to_string();
|
|
|
|
|
|
if !name.is_empty() {
|
|
|
|
|
|
data.server_info.name = Some(name);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-03-21 01:21:08 +00:00
|
|
|
|
data.server_info.tor_address = docker_packages::read_tor_address("archipelago").await;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
if let Some(ref tor) = data.server_info.tor_address {
|
|
|
|
|
|
data.server_info.node_address = Some(identity.node_address(tor));
|
|
|
|
|
|
}
|
|
|
|
|
|
state_manager.update_data(data.clone()).await;
|
|
|
|
|
|
|
2026-03-31 05:11:55 +01:00
|
|
|
|
// Retry Tor address in background — Tor may not be ready at startup
|
|
|
|
|
|
if data.server_info.tor_address.is_none() {
|
|
|
|
|
|
let sm = state_manager.clone();
|
|
|
|
|
|
let pubkey = identity.pubkey_hex();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
for delay in [5, 10, 20, 30, 60] {
|
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_secs(delay)).await;
|
|
|
|
|
|
if let Some(tor) = docker_packages::read_tor_address("archipelago").await {
|
|
|
|
|
|
let (mut d, _) = sm.get_snapshot().await;
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let addr =
|
|
|
|
|
|
format!("archipelago://{}#{}", tor.trim_end_matches('/'), pubkey);
|
2026-03-31 05:11:55 +01:00
|
|
|
|
d.server_info.tor_address = Some(tor.clone());
|
|
|
|
|
|
d.server_info.node_address = Some(addr);
|
|
|
|
|
|
sm.update_data(d).await;
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
tracing::info!(
|
|
|
|
|
|
"Tor address discovered after startup: {}",
|
|
|
|
|
|
&tor[..20.min(tor.len())]
|
|
|
|
|
|
);
|
2026-03-31 05:11:55 +01:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-20 08:26:40 +00:00
|
|
|
|
// Load persisted messages (Archipelago channel)
|
|
|
|
|
|
node_message::init(&config.data_dir).await;
|
|
|
|
|
|
|
2026-04-22 08:29:56 -04:00
|
|
|
|
// Auto-create the Node identity on fresh boot, mirroring the node's
|
|
|
|
|
|
// own signing key (seed-derived when onboarded, random otherwise).
|
|
|
|
|
|
// This keeps the DID shown on the Identities page, the DID Status
|
|
|
|
|
|
// card, and the DID used for peer-to-peer connects all aligned on
|
|
|
|
|
|
// one value — the seed-derived node DID. Idempotent: if the entry
|
|
|
|
|
|
// already exists from a prior boot, create_from_signing_key returns
|
|
|
|
|
|
// the existing record unchanged.
|
2026-03-15 05:18:12 +00:00
|
|
|
|
{
|
|
|
|
|
|
let im = crate::identity_manager::IdentityManager::new(&config.data_dir).await;
|
|
|
|
|
|
if let Ok(mgr) = im {
|
|
|
|
|
|
if let Ok((list, _)) = mgr.list().await {
|
|
|
|
|
|
if list.is_empty() {
|
2026-04-22 08:29:56 -04:00
|
|
|
|
let signing_key = ed25519_dalek::SigningKey::from_bytes(
|
|
|
|
|
|
&identity.signing_key().to_bytes(),
|
|
|
|
|
|
);
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
match mgr
|
2026-04-22 08:29:56 -04:00
|
|
|
|
.create_from_signing_key(
|
|
|
|
|
|
"Node".to_string(),
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
crate::identity_manager::IdentityPurpose::Personal,
|
2026-04-22 08:29:56 -04:00
|
|
|
|
signing_key,
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
2026-03-15 05:18:12 +00:00
|
|
|
|
Ok(record) => {
|
|
|
|
|
|
let _ = mgr.create_nostr_key(&record.id).await;
|
2026-04-22 08:29:56 -04:00
|
|
|
|
tracing::info!(did = %record.did, "Auto-created Node identity mirroring node key");
|
2026-03-15 05:18:12 +00:00
|
|
|
|
}
|
|
|
|
|
|
Err(e) => tracing::debug!("Auto-identity creation (non-fatal): {}", e),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-17 04:47:18 -04:00
|
|
|
|
// DHT swarm-assist (Phase 3): build the iroh provider once at startup so
|
|
|
|
|
|
// release downloads can fetch from peers (origin always wins) and seed
|
|
|
|
|
|
// what they hold. Inert unless built with `iroh-swarm` AND swarm_enabled.
|
|
|
|
|
|
if let Err(e) = crate::swarm::init(
|
|
|
|
|
|
&config.data_dir,
|
|
|
|
|
|
&config.nostr_relays,
|
|
|
|
|
|
config.nostr_tor_proxy.as_deref(),
|
|
|
|
|
|
config.swarm_enabled,
|
|
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
|
|
|
|
|
tracing::warn!("Swarm init (non-fatal, falling back to origin-only): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-17 15:03:34 +00:00
|
|
|
|
// Revoke any previously published Nostr data (runs before publish so revocation is not overwritten)
|
|
|
|
|
|
let identity_dir = config.data_dir.join("identity");
|
|
|
|
|
|
let tor_proxy_revoke = config.nostr_tor_proxy.clone();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
if let Err(e) =
|
|
|
|
|
|
nostr_discovery::revoke_if_needed(&identity_dir, tor_proxy_revoke.as_deref()).await
|
|
|
|
|
|
{
|
2026-02-17 15:03:34 +00:00
|
|
|
|
tracing::debug!("Nostr revoke (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-12 00:19:30 +00:00
|
|
|
|
// Publish presence-only to Nostr (DID + Nostr pubkey, NO onion address).
|
|
|
|
|
|
// Onion addresses are exchanged privately via NIP-44 encrypted DMs.
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
if config.nostr_discovery_enabled && !config.nostr_relays.is_empty() {
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let identity_dir = config.data_dir.join("identity");
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let did =
|
|
|
|
|
|
identity::did_key_from_pubkey_hex(&data.server_info.pubkey).unwrap_or_default();
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let version = data.server_info.version.clone();
|
|
|
|
|
|
let relays = config.nostr_relays.clone();
|
|
|
|
|
|
let tor_proxy = config.nostr_tor_proxy.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
2026-03-12 00:19:30 +00:00
|
|
|
|
if let Err(e) = nostr_handshake::publish_presence(
|
2026-02-17 15:03:34 +00:00
|
|
|
|
&identity_dir,
|
|
|
|
|
|
&did,
|
|
|
|
|
|
&version,
|
|
|
|
|
|
&relays,
|
|
|
|
|
|
tor_proxy.as_deref(),
|
|
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
2026-03-12 00:19:30 +00:00
|
|
|
|
tracing::debug!("Nostr presence publish (non-fatal): {}", e);
|
2026-02-17 15:03:34 +00:00
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
info!(
|
|
|
|
|
|
"🔑 Node identity: {} (pubkey: {}...)",
|
|
|
|
|
|
identity.node_id(),
|
|
|
|
|
|
&identity.pubkey_hex()[..16.min(identity.pubkey_hex().len())]
|
|
|
|
|
|
);
|
2026-02-17 15:03:34 +00:00
|
|
|
|
|
|
|
|
|
|
let identity = Arc::new(identity);
|
2026-03-11 11:11:02 +00:00
|
|
|
|
|
|
|
|
|
|
// Create metrics store and spawn background collector
|
2026-03-22 03:30:21 +00:00
|
|
|
|
let metrics_store = Arc::new(MetricsStore::with_data_dir(config.data_dir.clone()).await);
|
feat(TASK-12): periodic telemetry reporter — 15min interval, collector POST
Background task spawned on server startup: every 15 minutes, checks opt-in
status, builds anonymous health report (node ID hash, version, uptime,
CPU/RAM/disk %, container states, recent alerts), saves to disk, and POSTs
to TELEMETRY_COLLECTOR_URL env var if configured. Non-fatal on failure.
Fixed FiredAlert field references (kind not rule_type, timestamp not
fired_at) in both monitoring and analytics modules.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 23:36:57 +00:00
|
|
|
|
let metrics_for_telemetry = metrics_store.clone();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
crate::monitoring::spawn_metrics_collector(
|
|
|
|
|
|
metrics_store.clone(),
|
|
|
|
|
|
Some(state_manager.clone()),
|
|
|
|
|
|
Some(config.data_dir.clone()),
|
2026-03-11 11:11:02 +00:00
|
|
|
|
);
|
2026-01-27 23:21:26 +00:00
|
|
|
|
|
2026-04-22 19:20:13 -04:00
|
|
|
|
let api_handler = Arc::new(
|
|
|
|
|
|
ApiHandler::new(
|
|
|
|
|
|
config.clone(),
|
|
|
|
|
|
state_manager.clone(),
|
|
|
|
|
|
metrics_store,
|
|
|
|
|
|
orchestrator,
|
|
|
|
|
|
dev_orchestrator,
|
|
|
|
|
|
)
|
|
|
|
|
|
.await?,
|
|
|
|
|
|
);
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
|
2026-03-17 00:03:08 +00:00
|
|
|
|
// Initialize mesh networking service (if config has enabled: true)
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let did =
|
|
|
|
|
|
identity::did_key_from_pubkey_hex(&data.server_info.pubkey).unwrap_or_default();
|
2026-03-17 00:03:08 +00:00
|
|
|
|
let pubkey_hex = identity.pubkey_hex();
|
|
|
|
|
|
let signing_key = identity.signing_key();
|
|
|
|
|
|
match crate::mesh::MeshService::new(&data_dir, signing_key, &did, &pubkey_hex).await {
|
|
|
|
|
|
Ok(mut mesh_service) => {
|
2026-04-18 11:53:06 -04:00
|
|
|
|
// Pass the human-readable server name for mesh adverts
|
|
|
|
|
|
mesh_service.set_server_name(data.server_info.name.clone());
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let mut mesh_config = crate::mesh::load_config(&data_dir)
|
|
|
|
|
|
.await
|
|
|
|
|
|
.unwrap_or_default();
|
2026-03-31 00:50:43 +01:00
|
|
|
|
|
|
|
|
|
|
// Auto-enable mesh if a radio is detected and no config exists yet
|
|
|
|
|
|
if !mesh_config.enabled {
|
|
|
|
|
|
let devices = crate::mesh::detect_devices().await;
|
|
|
|
|
|
if !devices.is_empty() {
|
|
|
|
|
|
info!("📡 Auto-detected mesh radio: {:?} — enabling mesh", devices);
|
|
|
|
|
|
mesh_config.enabled = true;
|
|
|
|
|
|
mesh_config.device_path = Some(devices[0].clone());
|
|
|
|
|
|
let _ = crate::mesh::save_config(&data_dir, &mesh_config).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-17 00:03:08 +00:00
|
|
|
|
if mesh_config.enabled {
|
|
|
|
|
|
if let Err(e) = mesh_service.start() {
|
|
|
|
|
|
warn!("Mesh service start failed (non-fatal): {}", e);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
info!("📡 Mesh networking started");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
api_handler
|
|
|
|
|
|
.rpc_handler()
|
|
|
|
|
|
.set_mesh_service(mesh_service)
|
|
|
|
|
|
.await;
|
2026-03-17 00:03:08 +00:00
|
|
|
|
info!("📡 Mesh service initialized");
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
warn!("Mesh service init failed (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Initialize transport router (unified routing: mesh > lan > tor)
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let did =
|
|
|
|
|
|
identity::did_key_from_pubkey_hex(&data.server_info.pubkey).unwrap_or_default();
|
2026-03-17 00:03:08 +00:00
|
|
|
|
let pubkey_hex = identity.pubkey_hex();
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
let mesh_config = crate::mesh::load_config(&data_dir)
|
|
|
|
|
|
.await
|
|
|
|
|
|
.unwrap_or_default();
|
2026-03-17 00:03:08 +00:00
|
|
|
|
let mesh_only = mesh_config.mesh_only_mode.unwrap_or(false);
|
|
|
|
|
|
|
|
|
|
|
|
match crate::transport::PeerRegistry::load(&data_dir).await {
|
|
|
|
|
|
Ok(registry) => {
|
|
|
|
|
|
let registry = std::sync::Arc::new(registry);
|
|
|
|
|
|
let mut transports: Vec<Box<dyn crate::transport::NodeTransport>> = Vec::new();
|
|
|
|
|
|
|
|
|
|
|
|
// Tor transport (always register — availability checked dynamically)
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
transports.push(Box::new(crate::transport::tor::TorTransport::new(
|
|
|
|
|
|
&pubkey_hex,
|
|
|
|
|
|
)));
|
2026-03-17 00:03:08 +00:00
|
|
|
|
|
|
|
|
|
|
// Mesh transport (wraps the mesh service)
|
|
|
|
|
|
transports.push(Box::new(
|
|
|
|
|
|
crate::transport::mesh_transport::MeshTransport::new(
|
|
|
|
|
|
api_handler.rpc_handler().mesh_service_arc(),
|
|
|
|
|
|
),
|
|
|
|
|
|
));
|
|
|
|
|
|
|
|
|
|
|
|
// LAN transport (mDNS discovery)
|
|
|
|
|
|
let mut lan = crate::transport::lan::LanTransport::new(&did, &pubkey_hex, 5678);
|
|
|
|
|
|
match lan.start(registry.clone()) {
|
|
|
|
|
|
Ok(()) => info!("📡 LAN transport (mDNS) started"),
|
|
|
|
|
|
Err(e) => debug!("LAN transport init (non-fatal): {}", e),
|
|
|
|
|
|
}
|
|
|
|
|
|
transports.push(Box::new(lan));
|
|
|
|
|
|
|
|
|
|
|
|
let router = std::sync::Arc::new(crate::transport::TransportRouter::new(
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
transports, registry, mesh_only,
|
2026-03-17 00:03:08 +00:00
|
|
|
|
));
|
|
|
|
|
|
api_handler.rpc_handler().set_transport_router(router).await;
|
|
|
|
|
|
info!("📡 Transport router initialized (mesh_only={})", mesh_only);
|
|
|
|
|
|
}
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
warn!("Transport router init failed (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-14 03:00:29 +00:00
|
|
|
|
// Register Archipelago DWN protocols (background, non-blocking)
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
if let Err(e) = register_dwn_protocols(&data_dir).await {
|
|
|
|
|
|
debug!("DWN protocol registration (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-17 15:03:34 +00:00
|
|
|
|
// Periodic Tor address refresh (runs regardless of dev_mode)
|
|
|
|
|
|
// Picks up hostname when Tor creates it after startup/rotation (30-60s delay)
|
|
|
|
|
|
{
|
|
|
|
|
|
let state = state_manager.clone();
|
|
|
|
|
|
let identity_clone = identity.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(30));
|
|
|
|
|
|
loop {
|
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
|
if let Err(e) = refresh_tor_address(&state, identity_clone.as_ref()).await {
|
|
|
|
|
|
debug!("Tor address refresh (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-12 00:19:30 +00:00
|
|
|
|
// Initialize container scanner — discovers installed apps from Podman/Docker
|
|
|
|
|
|
{
|
2026-01-27 23:21:26 +00:00
|
|
|
|
let scanner = create_docker_scanner(&config).await?;
|
|
|
|
|
|
let state = state_manager.clone();
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let identity_clone = identity.clone();
|
2026-05-05 11:29:18 -04:00
|
|
|
|
let data_dir = config.data_dir.clone();
|
2026-04-23 07:59:03 -04:00
|
|
|
|
let scan_kick = api_handler.rpc_handler().scan_kick();
|
|
|
|
|
|
let scan_tick = api_handler.rpc_handler().scan_tick();
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
|
// Initial scan (delayed to let crash recovery finish first)
|
2026-01-27 23:21:26 +00:00
|
|
|
|
tokio::spawn(async move {
|
2026-03-18 14:22:00 +00:00
|
|
|
|
// Brief delay for containers to stabilize after boot
|
|
|
|
|
|
tokio::time::sleep(Duration::from_secs(3)).await;
|
2026-03-12 00:19:30 +00:00
|
|
|
|
info!("🐳 Scanning containers...");
|
2026-04-02 01:28:11 +01:00
|
|
|
|
// Tracks how many consecutive scans each container has been absent from.
|
|
|
|
|
|
// Prevents UI flapping when podman intermittently returns incomplete results.
|
|
|
|
|
|
let mut absence_tracker: HashMap<String, u32> = HashMap::new();
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
// Tracks when each container first entered a transitional state
|
|
|
|
|
|
// (Stopping / Starting / Restarting / ...). Used by the merge
|
|
|
|
|
|
// loop below to ignore podman's live state during a pending
|
|
|
|
|
|
// lifecycle op, and to break out if the spawned task dies
|
|
|
|
|
|
// without ever writing a final state.
|
|
|
|
|
|
let mut transitional_since: HashMap<String, Instant> = HashMap::new();
|
2026-06-11 00:24:32 -04:00
|
|
|
|
let mut scan_backoff_until: Option<Instant> = None;
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
if let Err(e) = scan_and_update_packages(
|
|
|
|
|
|
&scanner,
|
|
|
|
|
|
&state,
|
|
|
|
|
|
identity_clone.as_ref(),
|
2026-05-05 11:29:18 -04:00
|
|
|
|
&data_dir,
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
&mut absence_tracker,
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
&mut transitional_since,
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
2026-03-12 00:19:30 +00:00
|
|
|
|
error!("Failed to scan containers: {}", e);
|
2026-06-11 00:24:32 -04:00
|
|
|
|
if is_podman_scan_timeout(&e) {
|
|
|
|
|
|
scan_backoff_until = Some(Instant::now() + Duration::from_secs(30));
|
|
|
|
|
|
warn!("Podman container scan timed out; backing off scans for 30s");
|
|
|
|
|
|
}
|
2026-01-27 23:21:26 +00:00
|
|
|
|
}
|
2026-04-23 07:59:03 -04:00
|
|
|
|
// Bump the scan-completion counter so any caller waiting on a
|
|
|
|
|
|
// kicked scan (install/update success path) can proceed.
|
|
|
|
|
|
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
|
2026-03-12 00:19:30 +00:00
|
|
|
|
|
2026-04-23 07:59:03 -04:00
|
|
|
|
// Periodic scan every 60 seconds (only broadcasts if state changed).
|
|
|
|
|
|
// Also wakes immediately when `scan_kick` fires — install/update
|
|
|
|
|
|
// success paths poke it so the fresh manifest (with populated
|
|
|
|
|
|
// interfaces) lands before they flip state to Running.
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
|
// Uses an in-flight guard to skip scans when a previous one is still running
|
2026-04-07 20:25:09 +01:00
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(60));
|
|
|
|
|
|
// Skip missed ticks instead of catching up — prevents burst of scans
|
|
|
|
|
|
// after a slow podman response (which causes DB lock storms)
|
|
|
|
|
|
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
2026-06-11 04:44:58 -04:00
|
|
|
|
let scanning = std::sync::Arc::new(AtomicBool::new(false));
|
2026-01-27 23:21:26 +00:00
|
|
|
|
loop {
|
2026-04-23 07:59:03 -04:00
|
|
|
|
tokio::select! {
|
|
|
|
|
|
_ = interval.tick() => {}
|
|
|
|
|
|
_ = scan_kick.notified() => {
|
|
|
|
|
|
debug!("Scan kicked by install/update success — running immediately");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-11 00:24:32 -04:00
|
|
|
|
if let Some(until) = scan_backoff_until {
|
|
|
|
|
|
if Instant::now() < until {
|
|
|
|
|
|
debug!("Skipping container scan — Podman scan backoff active");
|
|
|
|
|
|
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-11 04:44:58 -04:00
|
|
|
|
let Some(_scan_guard) = ContainerScanGuard::try_acquire(&scanning) else {
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
|
debug!("Skipping container scan — previous scan still in progress");
|
2026-06-11 00:24:32 -04:00
|
|
|
|
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
|
feat: v1.2.0-alpha — E2E encrypted mesh relay, steganography, relay status polling
Phase 5 mesh networking:
- E2E encrypted TX relay (X25519 + ChaCha20-Poly1305) — non-Archy nodes
relay encrypted blobs transparently via Meshcore native routing
- Steganographic encoding modes (WeatherStation, SensorNetwork) — traffic
looks like sensor data on the wire, 0xAA marker, configurable per-node
- Pre-flight Bitcoin Core health check on relay node — specific error codes
(bitcoin_unreachable, bitcoin_syncing, tx_rejected) instead of generic fails
- mesh.relay-status RPC endpoint — frontend polls for relay result every 3s
- On-Chain / Lightning tabs in Off-Grid Bitcoin panel
- Archy Peers vs Mesh Broadcast relay mode selector
- Mesh view fills viewport (no page scroll), internal panel scrolling
- Version bump to 1.2.0-alpha
Also includes: deploy hardening, container fixes, IndeedHub updates,
boot screen, dashboard improvements, MASTER_PLAN task tracking
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 23:56:37 +00:00
|
|
|
|
continue;
|
2026-06-11 04:44:58 -04:00
|
|
|
|
};
|
|
|
|
|
|
let scan_result = scan_and_update_packages(
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
&scanner,
|
|
|
|
|
|
&state,
|
|
|
|
|
|
identity_clone.as_ref(),
|
2026-05-05 11:29:18 -04:00
|
|
|
|
&data_dir,
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
&mut absence_tracker,
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
&mut transitional_since,
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
)
|
2026-06-11 04:44:58 -04:00
|
|
|
|
.await;
|
|
|
|
|
|
if let Err(e) = scan_result {
|
2026-03-12 00:19:30 +00:00
|
|
|
|
error!("Failed to update containers: {}", e);
|
2026-06-11 00:24:32 -04:00
|
|
|
|
if is_podman_scan_timeout(&e) {
|
|
|
|
|
|
scan_backoff_until = Some(Instant::now() + Duration::from_secs(30));
|
|
|
|
|
|
warn!("Podman container scan timed out; backing off scans for 30s");
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
scan_backoff_until = None;
|
2026-01-27 23:21:26 +00:00
|
|
|
|
}
|
2026-04-23 07:59:03 -04:00
|
|
|
|
scan_tick.send_modify(|n| *n = n.wrapping_add(1));
|
2026-01-27 23:21:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
2026-01-24 22:59:20 +00:00
|
|
|
|
|
2026-03-09 07:43:12 +00:00
|
|
|
|
// Peer health monitoring — check every 5 minutes
|
|
|
|
|
|
{
|
|
|
|
|
|
let state = state_manager.clone();
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(300));
|
|
|
|
|
|
loop {
|
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
|
if let Err(e) = check_peer_health(&state, &data_dir).await {
|
|
|
|
|
|
debug!("Peer health check (non-fatal): {}", e);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-21 06:21:37 -04:00
|
|
|
|
// FIPS seed-anchor apply loop — every 5 minutes we re-push the
|
|
|
|
|
|
// configured seed anchors into the running fips daemon via
|
|
|
|
|
|
// `fipsctl connect`. This keeps the mesh bootstrap resilient:
|
|
|
|
|
|
// operators add cluster-local anchors in the UI, and a daemon
|
|
|
|
|
|
// restart or a flaky public anchor can't strand the node.
|
|
|
|
|
|
// First run is delayed 30s so fips has time to come up after
|
|
|
|
|
|
// onboarding before we start dialing.
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
tokio::time::sleep(Duration::from_secs(30)).await;
|
|
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(300));
|
|
|
|
|
|
loop {
|
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
|
match crate::fips::anchors::load(&data_dir).await {
|
|
|
|
|
|
Ok(list) if !list.is_empty() => {
|
|
|
|
|
|
let _ = crate::fips::anchors::apply(&list).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(_) => { /* no seed anchors configured yet */ }
|
2026-04-28 15:00:58 -04:00
|
|
|
|
Err(e) => {
|
|
|
|
|
|
tracing::debug!("Seed-anchor apply: load failed (non-fatal): {}", e)
|
|
|
|
|
|
}
|
2026-04-21 06:21:37 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-15 04:59:20 +00:00
|
|
|
|
// did:dht auto-refresh — re-publish DHT records every 2 hours
|
|
|
|
|
|
if config.nostr_discovery_enabled {
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(7200));
|
|
|
|
|
|
loop {
|
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
|
let identity_dir = data_dir.join("identity");
|
|
|
|
|
|
let node_key_path = identity_dir.join("node_key");
|
|
|
|
|
|
if !node_key_path.exists() {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
match tokio::fs::read(&node_key_path).await {
|
|
|
|
|
|
Ok(key_bytes) if key_bytes.len() == 32 => {
|
|
|
|
|
|
let mut seed = [0u8; 32];
|
|
|
|
|
|
seed.copy_from_slice(&key_bytes);
|
|
|
|
|
|
let signing_key = ed25519_dalek::SigningKey::from_bytes(&seed);
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
match crate::network::did_dht::create_and_publish(&signing_key, &[])
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
2026-03-15 04:59:20 +00:00
|
|
|
|
Ok(did) => tracing::info!(did = %did, "did:dht record refreshed"),
|
|
|
|
|
|
Err(e) => tracing::debug!("did:dht refresh (non-fatal): {}", e),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {
|
|
|
|
|
|
tracing::debug!("did:dht refresh skipped: no valid node key");
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-19 08:32:11 -04:00
|
|
|
|
// Periodic federation state sync — every 30 min we call
|
|
|
|
|
|
// federation::sync_with_peer on each Trusted peer. Without this
|
|
|
|
|
|
// users had to manually click Sync for `fips_npub`/transport
|
|
|
|
|
|
// badge/state updates to propagate; now it happens in the
|
|
|
|
|
|
// background. Staggers peers with a 5s delay so we don't thunder
|
|
|
|
|
|
// the Tor SOCKS proxy. Sync itself already prefers FIPS.
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
let state = state_manager.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
// First run 60s after boot to let onboarding settle.
|
|
|
|
|
|
tokio::time::sleep(Duration::from_secs(60)).await;
|
|
|
|
|
|
let mut interval = tokio::time::interval(Duration::from_secs(1800));
|
|
|
|
|
|
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
|
|
|
|
|
|
loop {
|
|
|
|
|
|
interval.tick().await;
|
|
|
|
|
|
let Ok(nodes) = crate::federation::load_nodes(&data_dir).await else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
if nodes.is_empty() {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
let (data, _) = state.get_snapshot().await;
|
|
|
|
|
|
let Ok(local_did) =
|
|
|
|
|
|
crate::identity::did_key_from_pubkey_hex(&data.server_info.pubkey)
|
|
|
|
|
|
else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
let identity_dir = data_dir.join("identity");
|
|
|
|
|
|
let Ok(node_identity) =
|
|
|
|
|
|
crate::identity::NodeIdentity::load_or_create(&identity_dir).await
|
|
|
|
|
|
else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for node in &nodes {
|
|
|
|
|
|
if node.trust_level == crate::federation::TrustLevel::Untrusted {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
match crate::federation::sync_with_peer(
|
|
|
|
|
|
&data_dir,
|
|
|
|
|
|
node,
|
|
|
|
|
|
&local_did,
|
|
|
|
|
|
|bytes| node_identity.sign(bytes),
|
|
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
|
|
|
|
|
Ok(_) => debug!(
|
|
|
|
|
|
"Periodic federation sync ok: {}",
|
|
|
|
|
|
node.did.chars().take(20).collect::<String>()
|
|
|
|
|
|
),
|
|
|
|
|
|
Err(e) => debug!(
|
|
|
|
|
|
"Periodic federation sync with {}: {}",
|
|
|
|
|
|
node.did.chars().take(20).collect::<String>(),
|
|
|
|
|
|
e
|
|
|
|
|
|
),
|
|
|
|
|
|
}
|
|
|
|
|
|
tokio::time::sleep(Duration::from_secs(5)).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-11 11:11:02 +00:00
|
|
|
|
// Container health monitoring — auto-restart unhealthy containers
|
2026-03-12 22:19:04 +00:00
|
|
|
|
// Respects webhook config: skips when disabled or ContainerCrash not subscribed
|
|
|
|
|
|
crate::health_monitor::spawn_health_monitor(state_manager.clone(), config.data_dir.clone());
|
2026-03-11 11:11:02 +00:00
|
|
|
|
|
feat(TASK-12): periodic telemetry reporter — 15min interval, collector POST
Background task spawned on server startup: every 15 minutes, checks opt-in
status, builds anonymous health report (node ID hash, version, uptime,
CPU/RAM/disk %, container states, recent alerts), saves to disk, and POSTs
to TELEMETRY_COLLECTOR_URL env var if configured. Non-fatal on failure.
Fixed FiredAlert field references (kind not rule_type, timestamp not
fired_at) in both monitoring and analytics modules.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 23:36:57 +00:00
|
|
|
|
// Periodic telemetry reporter (every 15 min when opted in)
|
|
|
|
|
|
crate::monitoring::spawn_telemetry_reporter(
|
|
|
|
|
|
metrics_for_telemetry,
|
|
|
|
|
|
Some(state_manager.clone()),
|
|
|
|
|
|
config.data_dir.clone(),
|
|
|
|
|
|
);
|
|
|
|
|
|
|
fix(fips,iso): bulletproof FIPS from install — no Activate button needed
Problems addressed (all observed on .198):
* fips_key was written as raw 32 bytes; upstream fips daemon reads it
with read_to_string() and bailed with "stream did not contain valid
UTF-8", crashlooping indefinitely.
* Activate button racy: user had to hit it, and it would keep failing
silently because the daemon couldn't parse its own config.
* FIPS schema drift (already fixed in 7d8a5864) put the config write
path behind the same broken "Activate" flow, so the fix alone
didn't help existing nodes.
* Journal was on tmpfs — every reboot wiped install/onboarding history,
making post-hoc debugging impossible.
Changes:
* identity.rs: write fips_key as bech32 nsec + newline. load_fips_keys
now auto-migrates legacy 32-byte files to bech32 the first time it
reads them, so OTA updates from v1.5.0-alpha self-heal without user
action.
* server.rs: post-onboarding auto-activate task runs on every
archipelago startup. If fips_key exists it ensures /etc/fips/fips.yaml
is schema-current and starts archipelago-fips.service. Pre-onboarding
nodes stay quiet (guarded on fips_key_exists).
* ISO build: un-mask archipelago-fips + archipelago-wg + wg-address —
all use ConditionPathExists on their key files, so systemd silently
skips them pre-onboarding (no MOTD [FAILED]). Only nostr-vpn stays
masked (legacy service, superseded by upstream fips).
* Journald made persistent via /var/log/journal + 500M cap, so
install and first-boot logs survive reboots for diagnosis.
After this, a fresh install + onboarding should bring FIPS up automatically
with no user interaction. The UI "Activate" button can stay as an escape
hatch (the RPC is still there) but is no longer on the critical path.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 16:33:21 -04:00
|
|
|
|
// Post-onboarding auto-activation for archipelago-fips. Runs once
|
|
|
|
|
|
// at startup: if fips_key is on disk, install /etc/fips/fips.yaml
|
|
|
|
|
|
// (schema-refreshed) and start the service. This removes the
|
|
|
|
|
|
// need for a user-facing "Activate" button — the node comes up
|
|
|
|
|
|
// with FIPS running whenever the seed has been onboarded. Also
|
|
|
|
|
|
// self-heals legacy raw-byte fips.key files (load_fips_keys
|
|
|
|
|
|
// rewrites them as bech32 nsec the first time they're read).
|
|
|
|
|
|
// Pre-onboarding nodes: ConditionPathExists on the service unit
|
|
|
|
|
|
// + the `fips_key_exists` guard here keep this quiet.
|
|
|
|
|
|
{
|
|
|
|
|
|
let data_dir = config.data_dir.clone();
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
let identity_dir = data_dir.join("identity");
|
|
|
|
|
|
if !crate::identity::fips_key_exists(&identity_dir) {
|
|
|
|
|
|
tracing::debug!("FIPS auto-activate skipped: fips_key not on disk");
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Trigger the migration path in load_fips_keys so old raw-byte
|
|
|
|
|
|
// key files are rewritten as bech32 before fips.yaml install.
|
|
|
|
|
|
if let Err(e) = crate::identity::load_fips_keys(&identity_dir).await {
|
|
|
|
|
|
tracing::warn!("FIPS key load/migrate failed: {}", e);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2026-04-21 09:25:53 -04:00
|
|
|
|
// Check if the installed fips.yaml matches what we'd
|
|
|
|
|
|
// render now. If not, we need to restart the daemon after
|
|
|
|
|
|
// reinstalling so it picks up schema changes (e.g. the
|
|
|
|
|
|
// v1.7.25 re-addition of the TCP transport). Without this,
|
|
|
|
|
|
// OTA'd nodes would be stuck on the old UDP-only config
|
|
|
|
|
|
// until someone manually clicked Reconnect.
|
|
|
|
|
|
let expected = crate::fips::config::render_config_yaml();
|
2026-04-28 15:00:58 -04:00
|
|
|
|
let installed = tokio::fs::read_to_string("/etc/fips/fips.yaml").await.ok();
|
2026-04-21 09:25:53 -04:00
|
|
|
|
let config_changed = installed.as_deref() != Some(expected.as_str());
|
|
|
|
|
|
|
fix(fips,iso): bulletproof FIPS from install — no Activate button needed
Problems addressed (all observed on .198):
* fips_key was written as raw 32 bytes; upstream fips daemon reads it
with read_to_string() and bailed with "stream did not contain valid
UTF-8", crashlooping indefinitely.
* Activate button racy: user had to hit it, and it would keep failing
silently because the daemon couldn't parse its own config.
* FIPS schema drift (already fixed in 7d8a5864) put the config write
path behind the same broken "Activate" flow, so the fix alone
didn't help existing nodes.
* Journal was on tmpfs — every reboot wiped install/onboarding history,
making post-hoc debugging impossible.
Changes:
* identity.rs: write fips_key as bech32 nsec + newline. load_fips_keys
now auto-migrates legacy 32-byte files to bech32 the first time it
reads them, so OTA updates from v1.5.0-alpha self-heal without user
action.
* server.rs: post-onboarding auto-activate task runs on every
archipelago startup. If fips_key exists it ensures /etc/fips/fips.yaml
is schema-current and starts archipelago-fips.service. Pre-onboarding
nodes stay quiet (guarded on fips_key_exists).
* ISO build: un-mask archipelago-fips + archipelago-wg + wg-address —
all use ConditionPathExists on their key files, so systemd silently
skips them pre-onboarding (no MOTD [FAILED]). Only nostr-vpn stays
masked (legacy service, superseded by upstream fips).
* Journald made persistent via /var/log/journal + 500M cap, so
install and first-boot logs survive reboots for diagnosis.
After this, a fresh install + onboarding should bring FIPS up automatically
with no user interaction. The UI "Activate" button can stay as an escape
hatch (the RPC is still there) but is no longer on the critical path.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 16:33:21 -04:00
|
|
|
|
if let Err(e) = crate::fips::config::install(&identity_dir).await {
|
|
|
|
|
|
tracing::warn!("FIPS config install failed on startup: {}", e);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
2026-04-21 09:25:53 -04:00
|
|
|
|
if config_changed {
|
|
|
|
|
|
tracing::info!(
|
|
|
|
|
|
"FIPS config schema changed on disk — restarting daemon to pick up new transports"
|
|
|
|
|
|
);
|
|
|
|
|
|
// Restart whichever unit is actually supervising
|
|
|
|
|
|
// the daemon (archipelago-fips vs upstream fips).
|
|
|
|
|
|
let unit = crate::fips::service::active_unit().await;
|
|
|
|
|
|
if let Err(e) = crate::fips::service::restart(unit).await {
|
|
|
|
|
|
tracing::warn!(
|
|
|
|
|
|
"FIPS restart after config migration failed on {}: {} — user can retry via fips.reconnect",
|
|
|
|
|
|
unit,
|
|
|
|
|
|
e
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
fix(fips,iso): bulletproof FIPS from install — no Activate button needed
Problems addressed (all observed on .198):
* fips_key was written as raw 32 bytes; upstream fips daemon reads it
with read_to_string() and bailed with "stream did not contain valid
UTF-8", crashlooping indefinitely.
* Activate button racy: user had to hit it, and it would keep failing
silently because the daemon couldn't parse its own config.
* FIPS schema drift (already fixed in 7d8a5864) put the config write
path behind the same broken "Activate" flow, so the fix alone
didn't help existing nodes.
* Journal was on tmpfs — every reboot wiped install/onboarding history,
making post-hoc debugging impossible.
Changes:
* identity.rs: write fips_key as bech32 nsec + newline. load_fips_keys
now auto-migrates legacy 32-byte files to bech32 the first time it
reads them, so OTA updates from v1.5.0-alpha self-heal without user
action.
* server.rs: post-onboarding auto-activate task runs on every
archipelago startup. If fips_key exists it ensures /etc/fips/fips.yaml
is schema-current and starts archipelago-fips.service. Pre-onboarding
nodes stay quiet (guarded on fips_key_exists).
* ISO build: un-mask archipelago-fips + archipelago-wg + wg-address —
all use ConditionPathExists on their key files, so systemd silently
skips them pre-onboarding (no MOTD [FAILED]). Only nostr-vpn stays
masked (legacy service, superseded by upstream fips).
* Journald made persistent via /var/log/journal + 500M cap, so
install and first-boot logs survive reboots for diagnosis.
After this, a fresh install + onboarding should bring FIPS up automatically
with no user interaction. The UI "Activate" button can stay as an escape
hatch (the RPC is still there) but is no longer on the critical path.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-19 16:33:21 -04:00
|
|
|
|
if let Err(e) = crate::fips::service::activate(crate::fips::SERVICE_UNIT).await {
|
|
|
|
|
|
tracing::warn!(
|
|
|
|
|
|
"archipelago-fips activate failed on startup: {} — user can retry via fips.install RPC",
|
|
|
|
|
|
e
|
|
|
|
|
|
);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
tracing::info!("archipelago-fips auto-activated on startup");
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-24 22:59:20 +00:00
|
|
|
|
Ok(Self {
|
Update archipelago: API, auth, container, parmanode, performance, security
- API handler, RPC, and server updates
- Auth and coding rules
- Container data manager, dev orchestrator, health monitor, podman client
- Parmanode script runner
- Performance resource manager
- Security container policies and secrets manager
- Add build scripts and documentation
2026-01-27 22:27:17 +00:00
|
|
|
|
_config: config,
|
2026-02-17 15:03:34 +00:00
|
|
|
|
_identity: identity,
|
2026-01-24 22:59:20 +00:00
|
|
|
|
api_handler,
|
2026-02-01 05:42:05 +00:00
|
|
|
|
_state_manager: state_manager,
|
2026-01-24 22:59:20 +00:00
|
|
|
|
})
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-11 11:11:02 +00:00
|
|
|
|
/// Serve with a graceful shutdown signal.
|
2026-04-19 01:12:39 -04:00
|
|
|
|
///
|
|
|
|
|
|
/// `main_addr` is the primary listener (historically `127.0.0.1:5678`).
|
2026-04-19 04:21:20 -04:00
|
|
|
|
/// The main listener always comes up on `main_addr`. The FIPS peer
|
|
|
|
|
|
/// listener (path-filtered, bound to `fips0`'s ULA) is managed by a
|
|
|
|
|
|
/// late-binding task that polls every 30s: if fips0 isn't up at
|
|
|
|
|
|
/// startup (pre-onboarding install, legacy node pre-fips.install),
|
|
|
|
|
|
/// it keeps trying until the interface appears — no archipelago
|
|
|
|
|
|
/// restart required after the user activates FIPS.
|
2026-04-19 01:12:39 -04:00
|
|
|
|
///
|
|
|
|
|
|
/// When `shutdown` completes, both listeners stop accepting and drain
|
|
|
|
|
|
/// in-flight requests (bounded by `DRAIN_TIMEOUT`).
|
2026-03-11 11:11:02 +00:00
|
|
|
|
pub async fn serve_with_shutdown(
|
|
|
|
|
|
&self,
|
2026-04-19 01:12:39 -04:00
|
|
|
|
main_addr: SocketAddr,
|
2026-03-11 11:11:02 +00:00
|
|
|
|
shutdown: impl std::future::Future<Output = ()>,
|
|
|
|
|
|
) -> Result<()> {
|
|
|
|
|
|
let active_connections = Arc::new(tokio::sync::Semaphore::new(1024));
|
2026-04-19 01:12:39 -04:00
|
|
|
|
let (tx, rx_main) = tokio::sync::watch::channel(false);
|
|
|
|
|
|
|
|
|
|
|
|
let main_task = tokio::spawn(accept_loop(
|
|
|
|
|
|
self.api_handler.clone(),
|
|
|
|
|
|
TcpListener::bind(main_addr).await?,
|
|
|
|
|
|
active_connections.clone(),
|
|
|
|
|
|
false, // main listener: no path filter
|
|
|
|
|
|
rx_main,
|
|
|
|
|
|
main_addr,
|
|
|
|
|
|
));
|
|
|
|
|
|
|
2026-04-19 04:21:20 -04:00
|
|
|
|
// Peer listener: late-binding so we don't need an archipelago
|
|
|
|
|
|
// restart when fips0 comes up after onboarding.
|
|
|
|
|
|
let peer_task = tokio::spawn(peer_late_bind_loop(
|
|
|
|
|
|
self.api_handler.clone(),
|
|
|
|
|
|
active_connections.clone(),
|
|
|
|
|
|
tx.subscribe(),
|
|
|
|
|
|
));
|
2026-03-11 11:11:02 +00:00
|
|
|
|
|
2026-04-19 01:12:39 -04:00
|
|
|
|
shutdown.await;
|
|
|
|
|
|
info!("Shutdown signal received, draining connections...");
|
|
|
|
|
|
let _ = tx.send(true);
|
|
|
|
|
|
|
|
|
|
|
|
// Wait up to 5s for in-flight requests.
|
|
|
|
|
|
let drain_start = std::time::Instant::now();
|
|
|
|
|
|
let drain_timeout = std::time::Duration::from_secs(5);
|
|
|
|
|
|
while active_connections.available_permits() < 1024 {
|
|
|
|
|
|
if drain_start.elapsed() > drain_timeout {
|
|
|
|
|
|
warn!("Drain timeout reached, forcing shutdown");
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
|
|
|
|
|
}
|
2026-01-24 22:59:20 +00:00
|
|
|
|
|
2026-04-19 01:12:39 -04:00
|
|
|
|
let _ = main_task.await;
|
2026-04-19 04:21:20 -04:00
|
|
|
|
let _ = peer_task.await;
|
2026-03-11 11:11:02 +00:00
|
|
|
|
|
2026-04-19 01:12:39 -04:00
|
|
|
|
info!("Shutdown complete");
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-19 04:21:20 -04:00
|
|
|
|
/// Poll every 30s for `fips0`'s ULA; when it appears, bind the peer
|
|
|
|
|
|
/// listener and run the normal accept loop. If the bind fails (port
|
|
|
|
|
|
/// already taken, permissions), log and keep retrying. Returns on
|
|
|
|
|
|
/// shutdown. First tick fires immediately so the hot path for
|
|
|
|
|
|
/// already-up fips0 is still zero-cost.
|
|
|
|
|
|
async fn peer_late_bind_loop(
|
|
|
|
|
|
handler: Arc<ApiHandler>,
|
|
|
|
|
|
active_connections: Arc<tokio::sync::Semaphore>,
|
|
|
|
|
|
mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
|
|
|
|
|
|
) {
|
|
|
|
|
|
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
|
|
|
|
|
|
interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
|
|
|
|
|
|
loop {
|
|
|
|
|
|
tokio::select! {
|
|
|
|
|
|
_ = interval.tick() => {
|
|
|
|
|
|
let Some(ip) = crate::fips::iface::fips0_ula() else { continue };
|
|
|
|
|
|
let addr = SocketAddr::new(
|
|
|
|
|
|
std::net::IpAddr::V6(ip),
|
|
|
|
|
|
crate::fips::dial::PEER_PORT,
|
|
|
|
|
|
);
|
|
|
|
|
|
let listener = match TcpListener::bind(addr).await {
|
|
|
|
|
|
Ok(l) => l,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
warn!("FIPS peer listener bind {} failed: {} — retrying in 30s", addr, e);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
info!("FIPS peer listener bound {}", addr);
|
|
|
|
|
|
// Once bound, serve until shutdown fires. accept_loop
|
|
|
|
|
|
// returns on shutdown, which also ends this outer loop.
|
|
|
|
|
|
accept_loop(
|
|
|
|
|
|
handler,
|
|
|
|
|
|
listener,
|
|
|
|
|
|
active_connections,
|
|
|
|
|
|
true, // peer listener: apply path filter
|
|
|
|
|
|
shutdown_rx,
|
|
|
|
|
|
addr,
|
|
|
|
|
|
)
|
|
|
|
|
|
.await;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
_ = shutdown_rx.changed() => {
|
|
|
|
|
|
if *shutdown_rx.borrow() { return; }
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-19 01:12:39 -04:00
|
|
|
|
/// Whitelist of HTTP paths reachable via the peer-facing (FIPS) listener.
|
|
|
|
|
|
/// Every entry is an endpoint already protected by cryptographic auth
|
|
|
|
|
|
/// (ed25519 signature verification inside the handler, federation DID
|
|
|
|
|
|
/// headers checked by the content server, or JSON-RPC methods whose
|
|
|
|
|
|
/// handlers verify per-message signatures).
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Anything not on this list returns 404 on the peer listener.
|
|
|
|
|
|
pub fn is_peer_allowed_path(path: &str) -> bool {
|
|
|
|
|
|
// Exact matches
|
|
|
|
|
|
matches!(
|
|
|
|
|
|
path,
|
|
|
|
|
|
"/health"
|
|
|
|
|
|
| "/rpc/v1"
|
|
|
|
|
|
| "/archipelago/node-message"
|
|
|
|
|
|
| "/archipelago/mesh-typed"
|
|
|
|
|
|
| "/dwn"
|
|
|
|
|
|
| "/transport/inbox"
|
fix(fips,federation,ui): mesh content browse, removed-node tombstones, modal sizing
FIPS peer content browse over the mesh was failing with "Peer returned
error: 404 Not Found" and never falling back to Tor. `is_peer_allowed_path`
only allowed `/content/<id>` (item fetches) — the catalog endpoint is
exactly `/content` (no trailing slash), so it 404'd over the FIPS peer
listener. A FIPS 404 was also treated as a successful response, so the dial
never retried Tor. Fixes: allow `/content` over the mesh; add
`fips_should_fall_back()` so a FIPS 404/5xx in Auto mode falls back to Tor
(handles version-skew peers reaching a different route). Also correct the
reconnect hint text — the public anchor is TCP/8443, not UDP/8668.
Federation: deleted nodes reappeared because transitive discovery
(`merge` of a peer's advertised trusted peers) re-added any unknown DID.
Add a tombstone store (`removed-nodes.json`): remove_node tombstones the
DID, transitive merge skips tombstoned DIDs, and a remote-triggered
peer-joined is ignored for a removed DID. Explicit local re-add (add_node)
clears the tombstone.
UI: the app credentials modal panel stretched edge-to-edge (height:100%,
max-width:none, items-stretch overlay). Constrain it to a centered card
(max-width 34rem, rounded, dimmed full-screen backdrop) matching the
AppIconGrid / wallet-receive modal.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 08:09:26 -04:00
|
|
|
|
// Content *catalog* — the peer-browse entry point. This is the
|
|
|
|
|
|
// exact path `/content` (no trailing slash); the prefix match
|
|
|
|
|
|
// below only covers `/content/<id>` item fetches, so without
|
|
|
|
|
|
// this the catalog 404s over the mesh and `content.browse-peer`
|
|
|
|
|
|
// fails with "Peer returned error: 404 Not Found" (and never
|
|
|
|
|
|
// falls back to Tor, since a 404 is a successful HTTP exchange).
|
|
|
|
|
|
| "/content"
|
2026-04-19 01:12:39 -04:00
|
|
|
|
)
|
|
|
|
|
|
// Prefix-matched content endpoints (peer file browse + fetch)
|
|
|
|
|
|
|| path.starts_with("/content/")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async fn accept_loop(
|
|
|
|
|
|
handler: Arc<ApiHandler>,
|
|
|
|
|
|
listener: TcpListener,
|
|
|
|
|
|
active_connections: Arc<tokio::sync::Semaphore>,
|
|
|
|
|
|
peer_only: bool,
|
|
|
|
|
|
mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
|
|
|
|
|
|
local_addr: SocketAddr,
|
|
|
|
|
|
) {
|
|
|
|
|
|
loop {
|
|
|
|
|
|
tokio::select! {
|
|
|
|
|
|
result = listener.accept() => {
|
|
|
|
|
|
let (stream, peer_addr) = match result {
|
|
|
|
|
|
Ok(c) => c,
|
|
|
|
|
|
Err(e) => {
|
|
|
|
|
|
error!("{} accept error: {}", local_addr, e);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
let handler = handler.clone();
|
|
|
|
|
|
let permit = active_connections.clone().acquire_owned().await;
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
|
|
let _permit = permit;
|
|
|
|
|
|
let service = service_fn(move |req: hyper::Request<hyper::Body>| {
|
|
|
|
|
|
let handler = handler.clone();
|
|
|
|
|
|
async move {
|
|
|
|
|
|
if peer_only && !is_peer_allowed_path(req.uri().path()) {
|
|
|
|
|
|
let resp = hyper::Response::builder()
|
|
|
|
|
|
.status(hyper::StatusCode::NOT_FOUND)
|
|
|
|
|
|
.body(hyper::Body::empty())
|
|
|
|
|
|
.expect("static response builds");
|
|
|
|
|
|
return Ok::<_, std::io::Error>(resp);
|
|
|
|
|
|
}
|
|
|
|
|
|
handler
|
|
|
|
|
|
.handle_request(req)
|
|
|
|
|
|
.await
|
|
|
|
|
|
.map_err(|e| std::io::Error::other(format!("{}", e)))
|
2026-03-11 11:11:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
});
|
2026-04-19 01:12:39 -04:00
|
|
|
|
if let Err(e) = Http::new()
|
|
|
|
|
|
.http1_keep_alive(false)
|
|
|
|
|
|
.serve_connection(stream, service)
|
|
|
|
|
|
.with_upgrades()
|
|
|
|
|
|
.await
|
|
|
|
|
|
{
|
|
|
|
|
|
error!("Error serving connection from {}: {}", peer_addr, e);
|
2026-01-24 22:59:20 +00:00
|
|
|
|
}
|
2026-04-19 01:12:39 -04:00
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
_ = shutdown_rx.changed() => {
|
|
|
|
|
|
if *shutdown_rx.borrow() {
|
|
|
|
|
|
return;
|
2026-01-24 22:59:20 +00:00
|
|
|
|
}
|
2026-03-11 11:11:02 +00:00
|
|
|
|
}
|
2026-01-24 22:59:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-01-27 23:21:26 +00:00
|
|
|
|
|
|
|
|
|
|
async fn create_docker_scanner(config: &Config) -> Result<DockerPackageScanner> {
|
|
|
|
|
|
let user = std::env::var("USER").unwrap_or_else(|_| "archipelago".to_string());
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
|
|
|
|
|
|
let runtime: Arc<dyn archipelago_container::ContainerRuntime> = match &config.container_runtime
|
|
|
|
|
|
{
|
2026-01-27 23:21:26 +00:00
|
|
|
|
ContainerRuntime::Podman => {
|
|
|
|
|
|
Arc::new(archipelago_container::PodmanRuntime::new(user.clone()))
|
|
|
|
|
|
}
|
|
|
|
|
|
ContainerRuntime::Docker => {
|
|
|
|
|
|
Arc::new(archipelago_container::DockerRuntime::new(user.clone()))
|
|
|
|
|
|
}
|
|
|
|
|
|
ContainerRuntime::Auto => {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
Arc::new(archipelago_container::AutoRuntime::new(user.clone()).await?)
|
2026-01-27 23:21:26 +00:00
|
|
|
|
}
|
|
|
|
|
|
};
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
|
2026-01-27 23:21:26 +00:00
|
|
|
|
Ok(DockerPackageScanner::new(runtime))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-02-17 15:03:34 +00:00
|
|
|
|
async fn refresh_tor_address(state: &StateManager, identity: &NodeIdentity) -> Result<()> {
|
2026-03-21 01:21:08 +00:00
|
|
|
|
let tor_addr = docker_packages::read_tor_address("archipelago").await;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let (current_data, _) = state.get_snapshot().await;
|
|
|
|
|
|
if tor_addr != current_data.server_info.tor_address {
|
|
|
|
|
|
let mut data = current_data;
|
|
|
|
|
|
data.server_info.tor_address = tor_addr.clone();
|
|
|
|
|
|
data.server_info.node_address = tor_addr.as_ref().map(|t| identity.node_address(t));
|
|
|
|
|
|
state.update_data(data).await;
|
|
|
|
|
|
if let Some(ref addr) = tor_addr {
|
|
|
|
|
|
info!("🔒 Tor address updated: {}", addr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-02 01:28:11 +01:00
|
|
|
|
/// Number of consecutive absent scans before removing a container from state.
|
|
|
|
|
|
/// 3 scans × 30s = 90 seconds of absence before removal.
|
|
|
|
|
|
const CONTAINER_ABSENCE_THRESHOLD: u32 = 3;
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
/// Maximum time a package entry may remain stuck in a transitional state
|
|
|
|
|
|
/// before the scan loop overrides it with podman's live state.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// Rationale: the longest single-container stop timeout is bitcoin-core at
|
|
|
|
|
|
/// 600s. 2× that gives the spawned task ample margin before we assume it
|
|
|
|
|
|
/// died (panic, OOM, process restart mid-stop) and fall back to the
|
|
|
|
|
|
/// scanner's authoritative view. Applies to all transitional variants.
|
2026-05-13 15:09:22 -04:00
|
|
|
|
const TRANSITIONAL_STUCK_TIMEOUT: Duration = Duration::from_secs(120);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
|
2026-05-17 22:13:21 -04:00
|
|
|
|
/// Multi-container installs can legitimately spend several minutes before the
|
|
|
|
|
|
/// primary user-facing container exists. BTCPay, for example, pulls/starts
|
|
|
|
|
|
/// Postgres and NBXplorer before `btcpay-server`; do not erase its installing
|
|
|
|
|
|
/// card just because the primary container is absent during that setup window.
|
|
|
|
|
|
const INSTALLING_STUCK_TIMEOUT: Duration = Duration::from_secs(20 * 60);
|
|
|
|
|
|
|
|
|
|
|
|
fn transitional_stuck_timeout(state: &crate::data_model::PackageState) -> Duration {
|
2026-06-11 00:24:32 -04:00
|
|
|
|
use crate::data_model::PackageState::*;
|
|
|
|
|
|
match state {
|
|
|
|
|
|
Installing | Starting | Restarting => INSTALLING_STUCK_TIMEOUT,
|
|
|
|
|
|
_ => TRANSITIONAL_STUCK_TIMEOUT,
|
2026-05-17 22:13:21 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
/// Returns true if `state` is one of the transitional variants that a
|
|
|
|
|
|
/// `spawn_transitional`-style background task owns. While such a state is
|
|
|
|
|
|
/// set, the package scanner must not overwrite it with whatever podman
|
|
|
|
|
|
/// reports (see `merge_preserving_transitional`).
|
|
|
|
|
|
fn is_transitional(state: &crate::data_model::PackageState) -> bool {
|
|
|
|
|
|
use crate::data_model::PackageState::*;
|
|
|
|
|
|
matches!(
|
|
|
|
|
|
state,
|
|
|
|
|
|
Installing
|
|
|
|
|
|
| Stopping
|
|
|
|
|
|
| Starting
|
|
|
|
|
|
| Restarting
|
|
|
|
|
|
| Updating
|
|
|
|
|
|
| Removing
|
|
|
|
|
|
| CreatingBackup
|
|
|
|
|
|
| RestoringBackup
|
|
|
|
|
|
| BackingUp
|
|
|
|
|
|
)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-11 00:24:32 -04:00
|
|
|
|
fn absent_transitional_replacement(
|
|
|
|
|
|
state: &crate::data_model::PackageState,
|
|
|
|
|
|
) -> Option<crate::data_model::PackageState> {
|
|
|
|
|
|
match state {
|
|
|
|
|
|
// A stop operation is complete once the container record disappears.
|
|
|
|
|
|
// Do not leave the app card wedged in "Stopping..." just because the
|
|
|
|
|
|
// background task died or the backend restarted before it wrote back.
|
|
|
|
|
|
crate::data_model::PackageState::Stopping => Some(crate::data_model::PackageState::Stopped),
|
|
|
|
|
|
_ => None,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
/// Merge a fresh scan entry `fresh` into `existing` while preserving
|
|
|
|
|
|
/// `existing.state` (which is transitional — the RPC spawn task owns it).
|
|
|
|
|
|
/// Non-state observability fields are taken from `fresh` so the UI still
|
|
|
|
|
|
/// sees live health / exit_code / lan_address readings during a transition.
|
|
|
|
|
|
fn merge_preserving_transitional(
|
|
|
|
|
|
existing: &crate::data_model::PackageDataEntry,
|
|
|
|
|
|
fresh: &crate::data_model::PackageDataEntry,
|
2026-06-11 00:24:32 -04:00
|
|
|
|
user_stop_requested: bool,
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
) -> crate::data_model::PackageDataEntry {
|
2026-05-05 11:29:18 -04:00
|
|
|
|
let state = match (&existing.state, &fresh.state) {
|
2026-06-11 00:24:32 -04:00
|
|
|
|
// A user-initiated stop must keep showing Stopping while podman still
|
|
|
|
|
|
// reports Running. Repair/restart transitions do not have a user-stop
|
|
|
|
|
|
// marker, so a fresh Running scan means the app recovered.
|
|
|
|
|
|
(crate::data_model::PackageState::Stopping, crate::data_model::PackageState::Running)
|
|
|
|
|
|
if !user_stop_requested =>
|
|
|
|
|
|
{
|
|
|
|
|
|
fresh.state.clone()
|
|
|
|
|
|
}
|
2026-05-05 11:29:18 -04:00
|
|
|
|
// Removing with a live running container is stale: uninstall either
|
|
|
|
|
|
// failed or Archipelago restarted before the spawned task could revert
|
|
|
|
|
|
// state. Let the scanner recover the UI immediately instead of
|
|
|
|
|
|
// keeping the app wedged in Removing for 20 minutes.
|
|
|
|
|
|
(crate::data_model::PackageState::Removing, crate::data_model::PackageState::Running) => {
|
|
|
|
|
|
fresh.state.clone()
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => existing.state.clone(),
|
|
|
|
|
|
};
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
crate::data_model::PackageDataEntry {
|
2026-05-05 11:29:18 -04:00
|
|
|
|
state,
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
// install_progress and uninstall_stage are also owned by the
|
|
|
|
|
|
// initiating op (same reason as state) — keep them.
|
|
|
|
|
|
install_progress: existing.install_progress.clone(),
|
|
|
|
|
|
uninstall_stage: existing.uninstall_stage.clone(),
|
|
|
|
|
|
// Everything else comes from the fresh scan.
|
|
|
|
|
|
health: fresh.health.clone(),
|
|
|
|
|
|
exit_code: fresh.exit_code,
|
|
|
|
|
|
static_files: fresh.static_files.clone(),
|
|
|
|
|
|
manifest: fresh.manifest.clone(),
|
|
|
|
|
|
installed: fresh.installed.clone(),
|
|
|
|
|
|
available_update: fresh.available_update.clone(),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-11 00:24:32 -04:00
|
|
|
|
fn is_podman_scan_timeout(error: &anyhow::Error) -> bool {
|
|
|
|
|
|
let msg = format!("{:#}", error);
|
|
|
|
|
|
msg.contains("podman ps") && msg.contains("timed out")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-27 23:21:26 +00:00
|
|
|
|
async fn scan_and_update_packages(
|
|
|
|
|
|
scanner: &DockerPackageScanner,
|
|
|
|
|
|
state: &StateManager,
|
2026-02-17 15:03:34 +00:00
|
|
|
|
identity: &NodeIdentity,
|
2026-05-05 11:29:18 -04:00
|
|
|
|
data_dir: &std::path::Path,
|
2026-04-02 01:28:11 +01:00
|
|
|
|
absence_tracker: &mut HashMap<String, u32>,
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
transitional_since: &mut HashMap<String, Instant>,
|
2026-01-27 23:21:26 +00:00
|
|
|
|
) -> Result<()> {
|
2026-05-05 11:29:18 -04:00
|
|
|
|
let mut packages = scanner.scan_containers().await?;
|
|
|
|
|
|
let user_stopped = crate::crash_recovery::load_user_stopped(data_dir).await;
|
|
|
|
|
|
for (id, pkg) in packages.iter_mut() {
|
|
|
|
|
|
if pkg.state == crate::data_model::PackageState::Exited && user_stopped.contains(id) {
|
|
|
|
|
|
pkg.state = crate::data_model::PackageState::Stopped;
|
|
|
|
|
|
pkg.exit_code = None;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-06-11 00:24:32 -04:00
|
|
|
|
normalize_reachable_package_health(&mut packages).await;
|
2026-04-02 01:28:11 +01:00
|
|
|
|
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let (current_data, _) = state.get_snapshot().await;
|
2026-03-21 01:21:08 +00:00
|
|
|
|
let tor_addr = docker_packages::read_tor_address("archipelago").await;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
let tor_changed = tor_addr != current_data.server_info.tor_address;
|
2026-03-18 11:46:38 +00:00
|
|
|
|
let first_scan = !current_data.server_info.status_info.containers_scanned;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
|
2026-04-01 16:18:58 +01:00
|
|
|
|
// Check if update scheduler has found an available update
|
|
|
|
|
|
let update_available = crate::update::load_state(std::path::Path::new("/var/lib/archipelago"))
|
|
|
|
|
|
.await
|
|
|
|
|
|
.map(|s| s.available_update.is_some())
|
|
|
|
|
|
.unwrap_or(false);
|
|
|
|
|
|
let update_changed = update_available != current_data.server_info.status_info.updated;
|
|
|
|
|
|
|
2026-04-02 01:28:11 +01:00
|
|
|
|
// Empty scan result = podman failure or timeout, preserve existing state
|
|
|
|
|
|
if packages.is_empty() && !first_scan {
|
|
|
|
|
|
if tor_changed || update_changed {
|
|
|
|
|
|
let mut data = current_data;
|
|
|
|
|
|
data.server_info.tor_address = tor_addr.clone();
|
|
|
|
|
|
data.server_info.node_address = tor_addr.as_ref().map(|t| identity.node_address(t));
|
|
|
|
|
|
data.server_info.status_info.updated = update_available;
|
|
|
|
|
|
state.update_data(data).await;
|
|
|
|
|
|
}
|
|
|
|
|
|
return Ok(());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Merge scan results with current state instead of full replacement.
|
|
|
|
|
|
// This prevents containers from vanishing when podman intermittently
|
|
|
|
|
|
// returns incomplete results under heavy load.
|
|
|
|
|
|
let mut merged = current_data.package_data.clone();
|
|
|
|
|
|
let mut changed = false;
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
// Update/add containers found in this scan.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Transitional states (Stopping, Starting, Restarting, Installing,
|
|
|
|
|
|
// Updating, Removing, backup variants) are owned by the RPC spawn_task
|
|
|
|
|
|
// that initiated the operation — podman's live state during the op is
|
|
|
|
|
|
// meaningless ("running" during a graceful stop, "exited" during a
|
|
|
|
|
|
// restart, etc.) and must not be written back. See
|
|
|
|
|
|
// `merge_preserving_transitional` for the exact rule.
|
|
|
|
|
|
//
|
|
|
|
|
|
// Escape hatch: if a package has been in a transitional state for
|
|
|
|
|
|
// longer than TRANSITIONAL_STUCK_TIMEOUT we assume the spawned task
|
|
|
|
|
|
// died without cleanup and let the scan override it.
|
|
|
|
|
|
let now = Instant::now();
|
2026-04-02 01:28:11 +01:00
|
|
|
|
for (id, pkg) in &packages {
|
|
|
|
|
|
absence_tracker.remove(id);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
let existing = merged.get(id);
|
|
|
|
|
|
let overwrite = match existing {
|
|
|
|
|
|
Some(existing_entry) if is_transitional(&existing_entry.state) => {
|
|
|
|
|
|
let entered = *transitional_since.entry(id.clone()).or_insert(now);
|
2026-05-17 22:13:21 -04:00
|
|
|
|
let timeout = transitional_stuck_timeout(&existing_entry.state);
|
|
|
|
|
|
let stuck = now.duration_since(entered) > timeout;
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
if stuck {
|
|
|
|
|
|
warn!(
|
|
|
|
|
|
"Container {} stuck in {:?} for >{}s; overriding with scan state {:?}",
|
|
|
|
|
|
id,
|
|
|
|
|
|
existing_entry.state,
|
2026-05-17 22:13:21 -04:00
|
|
|
|
timeout.as_secs(),
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
pkg.state
|
|
|
|
|
|
);
|
|
|
|
|
|
transitional_since.remove(id);
|
|
|
|
|
|
true
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Keep existing transitional state, but merge non-state
|
|
|
|
|
|
// observability fields (health, exit_code, lan_address
|
|
|
|
|
|
// via installed) from the fresh scan so the UI still
|
|
|
|
|
|
// sees live readings.
|
2026-06-11 00:24:32 -04:00
|
|
|
|
let merged_entry = merge_preserving_transitional(
|
|
|
|
|
|
existing_entry,
|
|
|
|
|
|
pkg,
|
|
|
|
|
|
user_stopped.contains(id),
|
|
|
|
|
|
);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
if existing.cloned() != Some(merged_entry.clone()) {
|
|
|
|
|
|
merged.insert(id.clone(), merged_entry);
|
|
|
|
|
|
changed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
false
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Some(_) => {
|
|
|
|
|
|
// Not transitional: the side-table may hold a stale entry
|
|
|
|
|
|
// from a previous transition on this id; drop it.
|
|
|
|
|
|
transitional_since.remove(id);
|
|
|
|
|
|
existing != Some(pkg)
|
|
|
|
|
|
}
|
|
|
|
|
|
None => {
|
|
|
|
|
|
transitional_since.remove(id);
|
|
|
|
|
|
true
|
|
|
|
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
if overwrite && merged.get(id) != Some(pkg) {
|
2026-04-02 01:28:11 +01:00
|
|
|
|
merged.insert(id.clone(), pkg.clone());
|
|
|
|
|
|
changed = true;
|
2026-02-01 13:24:03 +00:00
|
|
|
|
}
|
2026-04-02 01:28:11 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Track containers in state but missing from this scan.
|
|
|
|
|
|
// Only remove after CONTAINER_ABSENCE_THRESHOLD consecutive absent scans.
|
|
|
|
|
|
let current_ids: Vec<String> = merged.keys().cloned().collect();
|
|
|
|
|
|
for id in current_ids {
|
|
|
|
|
|
if !packages.contains_key(&id) {
|
chore: release v1.7.45-alpha
Resilience-validated release. Three full sweeps of the new resilience
harness against .228 confirm no shipstoppers.
Big user-visible:
- Bitcoin RPC auth durably correct via host-rendered nginx.conf bind-mount,
replaces fragile post-start exec that failed under restricted-cap rootless
podman ("crun: write cgroup.procs: Permission denied")
- Multi-container stack installs (indeedhub, immich, btcpay, mempool) now
emit phase events at every boundary so the progress bar advances
- Apps no longer vanish from the dashboard mid-install (absent-scanner skips
packages in transitional states)
- Indeedhub fresh installs work end-to-end (was 8500+ restart loop): five
missing env vars (DATABASE_PORT, QUEUE_HOST, QUEUE_PORT,
S3_PRIVATE_BUCKET_NAME, AES_MASTER_SECRET) added to install code
- Tailscale install fixed: --entrypoint string was being passed as a single
shell-line arg; switched to custom_args array
- Catalog cleaned of broken entries (dwn, endurain, ollama removed; nextcloud
restored on docker.io)
- Bitcoin Core update path uses correct image (was looking for nonexistent
lfg2025/bitcoin:28.4)
- ISO installs now allocate swap on the encrypted data partition
Infra:
- New resilience harness (scripts/resilience/) — black-box state-machine
tester, every app × every transition. Run before each release.
Sweep #3 final: PASS 107 / FAIL 12 / SKIP 14. The 12 fails are 1 cosmetic
(homeassistant trusted_hosts), 8 harness/timing false-positives, and 3
non-shipstopper tracked items. Down from 23 in baseline sweep #1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 12:31:45 -04:00
|
|
|
|
// Don't evict packages mid-transition: Installing/Updating/Removing
|
|
|
|
|
|
// legitimately have no live container yet (image still pulling) or
|
|
|
|
|
|
// briefly (during recreate). The absence-eviction here was racing
|
|
|
|
|
|
// installs and removing apps from the UI 14s in. The transitional
|
|
|
|
|
|
// owner (spawn_task) is responsible for clearing state, not us.
|
|
|
|
|
|
if let Some(entry) = merged.get(&id) {
|
|
|
|
|
|
if is_transitional(&entry.state) {
|
2026-06-11 00:24:32 -04:00
|
|
|
|
if let Some(replacement) = absent_transitional_replacement(&entry.state) {
|
|
|
|
|
|
let mut updated = entry.clone();
|
|
|
|
|
|
updated.state = replacement;
|
|
|
|
|
|
updated.health = None;
|
|
|
|
|
|
updated.exit_code = None;
|
|
|
|
|
|
updated.install_progress = None;
|
|
|
|
|
|
updated.uninstall_stage = None;
|
|
|
|
|
|
merged.insert(id.clone(), updated);
|
|
|
|
|
|
transitional_since.remove(&id);
|
|
|
|
|
|
absence_tracker.remove(&id);
|
|
|
|
|
|
changed = true;
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2026-05-05 11:29:18 -04:00
|
|
|
|
let entered = *transitional_since.entry(id.clone()).or_insert(now);
|
2026-05-17 22:13:21 -04:00
|
|
|
|
let timeout = transitional_stuck_timeout(&entry.state);
|
|
|
|
|
|
if now.duration_since(entered) > timeout {
|
2026-05-05 11:29:18 -04:00
|
|
|
|
warn!(
|
|
|
|
|
|
"Container {} stuck in {:?} and absent for >{}s; removing stale transitional state",
|
|
|
|
|
|
id,
|
|
|
|
|
|
entry.state,
|
2026-05-17 22:13:21 -04:00
|
|
|
|
timeout.as_secs()
|
2026-05-05 11:29:18 -04:00
|
|
|
|
);
|
|
|
|
|
|
merged.remove(&id);
|
|
|
|
|
|
transitional_since.remove(&id);
|
|
|
|
|
|
changed = true;
|
|
|
|
|
|
}
|
chore: release v1.7.45-alpha
Resilience-validated release. Three full sweeps of the new resilience
harness against .228 confirm no shipstoppers.
Big user-visible:
- Bitcoin RPC auth durably correct via host-rendered nginx.conf bind-mount,
replaces fragile post-start exec that failed under restricted-cap rootless
podman ("crun: write cgroup.procs: Permission denied")
- Multi-container stack installs (indeedhub, immich, btcpay, mempool) now
emit phase events at every boundary so the progress bar advances
- Apps no longer vanish from the dashboard mid-install (absent-scanner skips
packages in transitional states)
- Indeedhub fresh installs work end-to-end (was 8500+ restart loop): five
missing env vars (DATABASE_PORT, QUEUE_HOST, QUEUE_PORT,
S3_PRIVATE_BUCKET_NAME, AES_MASTER_SECRET) added to install code
- Tailscale install fixed: --entrypoint string was being passed as a single
shell-line arg; switched to custom_args array
- Catalog cleaned of broken entries (dwn, endurain, ollama removed; nextcloud
restored on docker.io)
- Bitcoin Core update path uses correct image (was looking for nonexistent
lfg2025/bitcoin:28.4)
- ISO installs now allocate swap on the encrypted data partition
Infra:
- New resilience harness (scripts/resilience/) — black-box state-machine
tester, every app × every transition. Run before each release.
Sweep #3 final: PASS 107 / FAIL 12 / SKIP 14. The 12 fails are 1 cosmetic
(homeassistant trusted_hosts), 8 harness/timing false-positives, and 3
non-shipstopper tracked items. Down from 23 in baseline sweep #1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 12:31:45 -04:00
|
|
|
|
absence_tracker.remove(&id);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
2026-05-13 15:09:22 -04:00
|
|
|
|
// Quadlet-generated units run containers with `--rm`, so a
|
|
|
|
|
|
// clean user stop removes the Podman record. Keep the package
|
|
|
|
|
|
// visible as Stopped while the user-stopped marker exists so
|
|
|
|
|
|
// package.start can recreate it via systemd/Quadlet.
|
|
|
|
|
|
if entry.state == crate::data_model::PackageState::Stopped
|
|
|
|
|
|
&& user_stopped.contains(&id)
|
|
|
|
|
|
{
|
|
|
|
|
|
absence_tracker.remove(&id);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
chore: release v1.7.45-alpha
Resilience-validated release. Three full sweeps of the new resilience
harness against .228 confirm no shipstoppers.
Big user-visible:
- Bitcoin RPC auth durably correct via host-rendered nginx.conf bind-mount,
replaces fragile post-start exec that failed under restricted-cap rootless
podman ("crun: write cgroup.procs: Permission denied")
- Multi-container stack installs (indeedhub, immich, btcpay, mempool) now
emit phase events at every boundary so the progress bar advances
- Apps no longer vanish from the dashboard mid-install (absent-scanner skips
packages in transitional states)
- Indeedhub fresh installs work end-to-end (was 8500+ restart loop): five
missing env vars (DATABASE_PORT, QUEUE_HOST, QUEUE_PORT,
S3_PRIVATE_BUCKET_NAME, AES_MASTER_SECRET) added to install code
- Tailscale install fixed: --entrypoint string was being passed as a single
shell-line arg; switched to custom_args array
- Catalog cleaned of broken entries (dwn, endurain, ollama removed; nextcloud
restored on docker.io)
- Bitcoin Core update path uses correct image (was looking for nonexistent
lfg2025/bitcoin:28.4)
- ISO installs now allocate swap on the encrypted data partition
Infra:
- New resilience harness (scripts/resilience/) — black-box state-machine
tester, every app × every transition. Run before each release.
Sweep #3 final: PASS 107 / FAIL 12 / SKIP 14. The 12 fails are 1 cosmetic
(homeassistant trusted_hosts), 8 harness/timing false-positives, and 3
non-shipstopper tracked items. Down from 23 in baseline sweep #1.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 12:31:45 -04:00
|
|
|
|
}
|
2026-04-02 01:28:11 +01:00
|
|
|
|
let count = absence_tracker.entry(id.clone()).or_insert(0);
|
|
|
|
|
|
*count += 1;
|
|
|
|
|
|
if *count >= CONTAINER_ABSENCE_THRESHOLD {
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
debug!(
|
|
|
|
|
|
"Removing {} from state after {} consecutive absent scans",
|
|
|
|
|
|
id, count
|
|
|
|
|
|
);
|
2026-04-02 01:28:11 +01:00
|
|
|
|
merged.remove(&id);
|
|
|
|
|
|
absence_tracker.remove(&id);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
transitional_since.remove(&id);
|
2026-04-02 01:28:11 +01:00
|
|
|
|
changed = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if changed || tor_changed || first_scan || update_changed {
|
|
|
|
|
|
let mut data = current_data;
|
|
|
|
|
|
data.package_data = merged;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
data.server_info.tor_address = tor_addr.clone();
|
|
|
|
|
|
data.server_info.node_address = tor_addr.as_ref().map(|t| identity.node_address(t));
|
2026-03-18 11:46:38 +00:00
|
|
|
|
data.server_info.status_info.containers_scanned = true;
|
2026-04-01 16:18:58 +01:00
|
|
|
|
data.server_info.status_info.updated = update_available;
|
2026-02-17 15:03:34 +00:00
|
|
|
|
state.update_data(data).await;
|
chore(ci): rustfmt + clippy clean-up to unblock the Rust CI job
The .github/workflows/ci.yml Rust job runs cargo fmt --check, clippy
with -D warnings, and tests. All three were failing. This commit:
- Applies rustfmt across the tree (the bulk of the diff — untouched
since the last toolchain bump, so a wide sweep was unavoidable).
- Fixes the correctness-level clippy errors:
container/bitcoin_simulator.rs wildcard-in-or-pattern
container/manifest.rs from_str rename to parse (reserved name)
container/podman_client.rs .get(0) -> .first()
container/runtime.rs manual += collapse
archipelago/src/constants.rs doc-comment → module-doc
api/rpc/package/install.rs stray /// comment above a non-item
container/docker_packages.rs redundant field init
streaming/advertisement.rs missing Metric import in tests
tests/orchestration_tests.rs `vec!` in non-Vec contexts
mesh/listener/dispatch.rs unused store_plain_message import
api/rpc/tor/mod.rs and mesh/steganography.rs: push-after-new → vec!
- Quiets wide legacy surfaces with crate-level allows in main.rs for
stylistic lints (too_many_arguments, type_complexity, doc indent,
enum variant prefix, wildcard-in-or, assertions-on-constants,
drop_non_drop, unused_io_amount, ptr_arg) — these fired in dozens
of places with no correctness payoff and have been churning every
toolchain bump.
- Tags intentional-dead-code helpers: wallet/ and streaming/ modules
are WIP, mesh::send_chunked_payload and DM_V1_MARKER are kept for
rollback compatibility, vpn::get_nostr_vpn_status is surface-area
for a not-yet-landed RPC.
cargo fmt --check, cargo clippy --all-targets --all-features
-- -D warnings, and cargo test --all-features now all pass locally.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 17:23:46 -04:00
|
|
|
|
debug!(
|
|
|
|
|
|
"📦 State changed (packages={}, tor={}, first_scan={}, update={}), broadcasting update",
|
|
|
|
|
|
changed, tor_changed, first_scan, update_changed
|
|
|
|
|
|
);
|
2026-01-27 23:21:26 +00:00
|
|
|
|
}
|
2026-03-09 07:43:12 +00:00
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-11 00:24:32 -04:00
|
|
|
|
async fn normalize_reachable_package_health(
|
|
|
|
|
|
packages: &mut HashMap<String, crate::data_model::PackageDataEntry>,
|
|
|
|
|
|
) {
|
|
|
|
|
|
for (id, pkg) in packages.iter_mut() {
|
|
|
|
|
|
if pkg.state != crate::data_model::PackageState::Running {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
if !matches!(pkg.health.as_deref(), Some("starting" | "unhealthy" | "1")) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
let Some(port) = pkg
|
|
|
|
|
|
.installed
|
|
|
|
|
|
.as_ref()
|
|
|
|
|
|
.and_then(|i| i.interface_addresses.get("main"))
|
|
|
|
|
|
.and_then(|a| a.lan_address.as_deref())
|
|
|
|
|
|
.and_then(port_from_url)
|
|
|
|
|
|
.or_else(|| fallback_package_port(id))
|
|
|
|
|
|
else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
if frontend_port_http_ready(port).await {
|
|
|
|
|
|
debug!(app_id = %id, port, "normalizing reachable package health to healthy");
|
|
|
|
|
|
pkg.health = Some("healthy".to_string());
|
|
|
|
|
|
ensure_main_lan_address(pkg, port);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
async fn frontend_port_http_ready(port: u16) -> bool {
|
|
|
|
|
|
let Ok(Ok(mut stream)) = tokio::time::timeout(
|
|
|
|
|
|
Duration::from_secs(2),
|
|
|
|
|
|
tokio::net::TcpStream::connect(("127.0.0.1", port)),
|
|
|
|
|
|
)
|
|
|
|
|
|
.await
|
|
|
|
|
|
else {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
let request = b"GET / HTTP/1.1\r\nHost: 127.0.0.1\r\nConnection: close\r\n\r\n";
|
|
|
|
|
|
if stream.write_all(request).await.is_err() {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut buf = [0u8; 64];
|
|
|
|
|
|
let Ok(Ok(n)) = tokio::time::timeout(Duration::from_secs(2), stream.read(&mut buf)).await
|
|
|
|
|
|
else {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
};
|
|
|
|
|
|
if n == 0 {
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let head = String::from_utf8_lossy(&buf[..n]);
|
|
|
|
|
|
head.starts_with("HTTP/1.1 2")
|
|
|
|
|
|
|| head.starts_with("HTTP/1.1 3")
|
|
|
|
|
|
|| head.starts_with("HTTP/1.0 2")
|
|
|
|
|
|
|| head.starts_with("HTTP/1.0 3")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn ensure_main_lan_address(pkg: &mut crate::data_model::PackageDataEntry, port: u16) {
|
|
|
|
|
|
let Some(installed) = pkg.installed.as_mut() else {
|
|
|
|
|
|
return;
|
|
|
|
|
|
};
|
|
|
|
|
|
let main = installed
|
|
|
|
|
|
.interface_addresses
|
|
|
|
|
|
.entry("main".to_string())
|
|
|
|
|
|
.or_insert_with(|| crate::data_model::InterfaceAddress {
|
|
|
|
|
|
tor_address: String::new(),
|
|
|
|
|
|
lan_address: None,
|
|
|
|
|
|
});
|
|
|
|
|
|
if main.lan_address.is_none() {
|
|
|
|
|
|
main.lan_address = Some(format!("http://localhost:{port}"));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn fallback_package_port(app_id: &str) -> Option<u16> {
|
|
|
|
|
|
match app_id {
|
|
|
|
|
|
"fedimint" | "fedimintd" => Some(8175),
|
|
|
|
|
|
"filebrowser" => Some(8083),
|
|
|
|
|
|
"indeedhub" => Some(7778),
|
|
|
|
|
|
"nginx-proxy-manager" => Some(8081),
|
|
|
|
|
|
"nostr-rs-relay" => Some(18081),
|
|
|
|
|
|
_ => None,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn port_from_url(url: &str) -> Option<u16> {
|
|
|
|
|
|
let after_scheme = url.split_once("://").map(|(_, rest)| rest).unwrap_or(url);
|
|
|
|
|
|
let host_port = after_scheme.split('/').next().unwrap_or(after_scheme);
|
|
|
|
|
|
let port = host_port.rsplit_once(':')?.1;
|
|
|
|
|
|
port.parse::<u16>().ok()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-14 03:00:29 +00:00
|
|
|
|
/// Register Archipelago DWN protocols on startup.
|
|
|
|
|
|
async fn register_dwn_protocols(data_dir: &std::path::Path) -> Result<()> {
|
|
|
|
|
|
use crate::network::dwn_store::{DwnStore, ProtocolDefinition};
|
|
|
|
|
|
|
|
|
|
|
|
let protocols = [
|
|
|
|
|
|
("https://archipelago.dev/protocols/node-identity/v1", true),
|
|
|
|
|
|
("https://archipelago.dev/protocols/file-catalog/v1", true),
|
|
|
|
|
|
("https://archipelago.dev/protocols/federation/v1", false),
|
|
|
|
|
|
("https://archipelago.dev/protocols/app-deploy/v1", false),
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
let store = DwnStore::new(data_dir).await?;
|
|
|
|
|
|
let existing = store.list_protocols().await?;
|
|
|
|
|
|
let existing_uris: std::collections::HashSet<String> =
|
|
|
|
|
|
existing.iter().map(|p| p.protocol.clone()).collect();
|
|
|
|
|
|
|
|
|
|
|
|
let mut registered = 0;
|
|
|
|
|
|
for (uri, published) in &protocols {
|
|
|
|
|
|
if existing_uris.contains(*uri) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
let def = ProtocolDefinition {
|
|
|
|
|
|
protocol: uri.to_string(),
|
|
|
|
|
|
published: *published,
|
|
|
|
|
|
types: std::collections::HashMap::new(),
|
|
|
|
|
|
structure: std::collections::HashMap::new(),
|
|
|
|
|
|
date_registered: chrono::Utc::now().to_rfc3339(),
|
|
|
|
|
|
};
|
|
|
|
|
|
store.register_protocol(&def).await?;
|
|
|
|
|
|
registered += 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if registered > 0 {
|
|
|
|
|
|
info!("📋 Registered {registered} DWN protocols");
|
|
|
|
|
|
}
|
|
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 07:43:12 +00:00
|
|
|
|
/// Periodically check peer reachability and broadcast status changes.
|
|
|
|
|
|
async fn check_peer_health(state: &StateManager, data_dir: &std::path::Path) -> Result<()> {
|
|
|
|
|
|
let known_peers = peers::load_peers(data_dir).await.unwrap_or_default();
|
|
|
|
|
|
if known_peers.is_empty() {
|
|
|
|
|
|
return Ok(());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut new_health = std::collections::HashMap::new();
|
|
|
|
|
|
for peer in &known_peers {
|
2026-04-19 01:36:04 -04:00
|
|
|
|
let fips_npub = crate::federation::fips_npub_for_onion(data_dir, &peer.onion).await;
|
2026-04-28 15:00:58 -04:00
|
|
|
|
let reachable = node_message::check_peer_reachable(&peer.onion, fips_npub.as_deref())
|
|
|
|
|
|
.await
|
|
|
|
|
|
.unwrap_or(false);
|
2026-03-09 07:43:12 +00:00
|
|
|
|
new_health.insert(peer.onion.clone(), reachable);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let (current_data, _) = state.get_snapshot().await;
|
|
|
|
|
|
if current_data.peer_health != new_health {
|
|
|
|
|
|
let mut data = current_data;
|
|
|
|
|
|
data.peer_health = new_health;
|
|
|
|
|
|
state.update_data(data).await;
|
|
|
|
|
|
debug!("🔗 Peer health updated, broadcasting changes");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-01-27 23:21:26 +00:00
|
|
|
|
Ok(())
|
|
|
|
|
|
}
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
mod merge_tests {
|
|
|
|
|
|
use super::*;
|
2026-04-28 15:00:58 -04:00
|
|
|
|
use crate::data_model::{Description, Manifest, PackageDataEntry, PackageState, StaticFiles};
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
|
|
|
|
|
|
fn make_manifest() -> Manifest {
|
|
|
|
|
|
Manifest {
|
|
|
|
|
|
id: "lnd".to_string(),
|
|
|
|
|
|
title: "LND".to_string(),
|
|
|
|
|
|
version: "0.18.4".to_string(),
|
|
|
|
|
|
description: Description {
|
|
|
|
|
|
short: "".to_string(),
|
|
|
|
|
|
long: "".to_string(),
|
|
|
|
|
|
},
|
|
|
|
|
|
release_notes: "".to_string(),
|
|
|
|
|
|
license: "".to_string(),
|
|
|
|
|
|
wrapper_repo: "".to_string(),
|
|
|
|
|
|
upstream_repo: "".to_string(),
|
|
|
|
|
|
support_site: "".to_string(),
|
|
|
|
|
|
marketing_site: "".to_string(),
|
|
|
|
|
|
donation_url: None,
|
|
|
|
|
|
author: None,
|
|
|
|
|
|
website: None,
|
|
|
|
|
|
interfaces: None,
|
|
|
|
|
|
tier: None,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn make_static() -> StaticFiles {
|
|
|
|
|
|
StaticFiles {
|
|
|
|
|
|
license: "".to_string(),
|
|
|
|
|
|
instructions: "".to_string(),
|
|
|
|
|
|
icon: "".to_string(),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn make_entry(state: PackageState, health: Option<&str>) -> PackageDataEntry {
|
|
|
|
|
|
PackageDataEntry {
|
|
|
|
|
|
state,
|
|
|
|
|
|
health: health.map(|s| s.to_string()),
|
|
|
|
|
|
exit_code: None,
|
|
|
|
|
|
static_files: make_static(),
|
|
|
|
|
|
manifest: make_manifest(),
|
|
|
|
|
|
installed: None,
|
|
|
|
|
|
install_progress: None,
|
|
|
|
|
|
uninstall_stage: None,
|
|
|
|
|
|
available_update: None,
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(fips,federation,ui): mesh content browse, removed-node tombstones, modal sizing
FIPS peer content browse over the mesh was failing with "Peer returned
error: 404 Not Found" and never falling back to Tor. `is_peer_allowed_path`
only allowed `/content/<id>` (item fetches) — the catalog endpoint is
exactly `/content` (no trailing slash), so it 404'd over the FIPS peer
listener. A FIPS 404 was also treated as a successful response, so the dial
never retried Tor. Fixes: allow `/content` over the mesh; add
`fips_should_fall_back()` so a FIPS 404/5xx in Auto mode falls back to Tor
(handles version-skew peers reaching a different route). Also correct the
reconnect hint text — the public anchor is TCP/8443, not UDP/8668.
Federation: deleted nodes reappeared because transitive discovery
(`merge` of a peer's advertised trusted peers) re-added any unknown DID.
Add a tombstone store (`removed-nodes.json`): remove_node tombstones the
DID, transitive merge skips tombstoned DIDs, and a remote-triggered
peer-joined is ignored for a removed DID. Explicit local re-add (add_node)
clears the tombstone.
UI: the app credentials modal panel stretched edge-to-edge (height:100%,
max-width:none, items-stretch overlay). Constrain it to a centered card
(max-width 34rem, rounded, dimmed full-screen backdrop) matching the
AppIconGrid / wallet-receive modal.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-15 08:09:26 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn peer_path_filter_allows_content_catalog_and_items() {
|
|
|
|
|
|
// Regression: the content *catalog* is exactly "/content" (no trailing
|
|
|
|
|
|
// slash). It must be reachable over the peer (FIPS) listener, else
|
|
|
|
|
|
// `content.browse-peer` 404s over the mesh. Item fetches are
|
|
|
|
|
|
// "/content/<id>".
|
|
|
|
|
|
assert!(is_peer_allowed_path("/content"), "catalog must be allowed");
|
|
|
|
|
|
assert!(
|
|
|
|
|
|
is_peer_allowed_path("/content/abc123"),
|
|
|
|
|
|
"items must be allowed"
|
|
|
|
|
|
);
|
|
|
|
|
|
assert!(is_peer_allowed_path("/rpc/v1"));
|
|
|
|
|
|
assert!(is_peer_allowed_path("/health"));
|
|
|
|
|
|
// Not on the allow-list → rejected (no broad surface over the mesh).
|
|
|
|
|
|
assert!(!is_peer_allowed_path("/contention"), "must not prefix-leak");
|
|
|
|
|
|
assert!(!is_peer_allowed_path("/"));
|
|
|
|
|
|
assert!(!is_peer_allowed_path("/rpc/v2"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn preserves_transitional_state_on_merge() {
|
|
|
|
|
|
// existing: user initiated a stop, spawn_transitional set Stopping.
|
|
|
|
|
|
// fresh: podman hasn't finished the stop yet, still reports Running.
|
|
|
|
|
|
// Expected: merged state stays Stopping — podman's live view must
|
|
|
|
|
|
// not clobber the transitional state owned by the RPC spawn task.
|
|
|
|
|
|
let existing = make_entry(PackageState::Stopping, Some("healthy"));
|
|
|
|
|
|
let fresh = make_entry(PackageState::Running, Some("starting"));
|
2026-06-11 00:24:32 -04:00
|
|
|
|
let merged = merge_preserving_transitional(&existing, &fresh, true);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
assert_eq!(merged.state, PackageState::Stopping);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-11 00:24:32 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn non_user_stopping_recovers_when_container_is_running() {
|
|
|
|
|
|
let existing = make_entry(PackageState::Stopping, Some("unknown"));
|
|
|
|
|
|
let fresh = make_entry(PackageState::Running, Some("healthy"));
|
|
|
|
|
|
let merged = merge_preserving_transitional(&existing, &fresh, false);
|
|
|
|
|
|
assert_eq!(merged.state, PackageState::Running);
|
|
|
|
|
|
assert_eq!(merged.health.as_deref(), Some("healthy"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn merges_fresh_observability_fields() {
|
|
|
|
|
|
// Non-state observability fields (health, exit_code, installed)
|
|
|
|
|
|
// MUST come from the fresh scan even while state is preserved —
|
|
|
|
|
|
// the UI still shows live health/health during a transition.
|
|
|
|
|
|
let mut existing = make_entry(PackageState::Stopping, Some("healthy"));
|
|
|
|
|
|
existing.exit_code = None;
|
|
|
|
|
|
let mut fresh = make_entry(PackageState::Running, Some("unhealthy"));
|
|
|
|
|
|
fresh.exit_code = Some(0);
|
2026-06-11 00:24:32 -04:00
|
|
|
|
let merged = merge_preserving_transitional(&existing, &fresh, true);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
assert_eq!(merged.state, PackageState::Stopping);
|
|
|
|
|
|
assert_eq!(merged.health.as_deref(), Some("unhealthy"));
|
|
|
|
|
|
assert_eq!(merged.exit_code, Some(0));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-05-05 11:29:18 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn stale_removing_recovers_when_container_is_running() {
|
|
|
|
|
|
let existing = make_entry(PackageState::Removing, Some("unknown"));
|
|
|
|
|
|
let fresh = make_entry(PackageState::Running, Some("healthy"));
|
2026-06-11 00:24:32 -04:00
|
|
|
|
let merged = merge_preserving_transitional(&existing, &fresh, false);
|
2026-05-05 11:29:18 -04:00
|
|
|
|
assert_eq!(merged.state, PackageState::Running);
|
|
|
|
|
|
assert_eq!(merged.health.as_deref(), Some("healthy"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn is_transitional_covers_all_variants() {
|
|
|
|
|
|
for s in [
|
|
|
|
|
|
PackageState::Installing,
|
|
|
|
|
|
PackageState::Stopping,
|
|
|
|
|
|
PackageState::Starting,
|
|
|
|
|
|
PackageState::Restarting,
|
|
|
|
|
|
PackageState::Updating,
|
|
|
|
|
|
PackageState::Removing,
|
|
|
|
|
|
PackageState::CreatingBackup,
|
|
|
|
|
|
PackageState::RestoringBackup,
|
|
|
|
|
|
PackageState::BackingUp,
|
|
|
|
|
|
] {
|
|
|
|
|
|
assert!(is_transitional(&s), "{:?} should be transitional", s);
|
|
|
|
|
|
}
|
|
|
|
|
|
for s in [
|
|
|
|
|
|
PackageState::Installed,
|
|
|
|
|
|
PackageState::Stopped,
|
|
|
|
|
|
PackageState::Exited,
|
|
|
|
|
|
PackageState::Running,
|
|
|
|
|
|
] {
|
2026-04-28 15:00:58 -04:00
|
|
|
|
assert!(!is_transitional(&s), "{:?} should NOT be transitional", s);
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-05-17 22:13:21 -04:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn installing_uses_longer_stale_timeout_than_other_transitions() {
|
|
|
|
|
|
assert!(transitional_stuck_timeout(&PackageState::Installing) > TRANSITIONAL_STUCK_TIMEOUT);
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
transitional_stuck_timeout(&PackageState::Stopping),
|
|
|
|
|
|
TRANSITIONAL_STUCK_TIMEOUT
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
2026-06-11 00:24:32 -04:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn absent_stopping_transitions_to_stopped() {
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
absent_transitional_replacement(&PackageState::Stopping),
|
|
|
|
|
|
Some(PackageState::Stopped)
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn absent_installing_still_waits_for_owner() {
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
|
absent_transitional_replacement(&PackageState::Installing),
|
|
|
|
|
|
None
|
|
|
|
|
|
);
|
|
|
|
|
|
}
|
fix(state): preserve transitional state across container scans
The 30s package scan loop used to blindly overwrite every package
entry from podman inspect. While a user-initiated Stop / Start /
Restart was in flight, the RPC spawn task would flip the state to
Stopping / Starting / Restarting, the next scan would see podman
still reporting "running" (for the duration of the graceful stop,
up to 600s for bitcoin-core), and clobber the transitional state
back to Running. The dashboard would then flip Running -> Stopping
-> Running -> Stopped, making it look like the stop had silently
failed until it eventually completed.
The merge loop now treats transitional variants (Stopping, Starting,
Restarting, Installing, Updating, Removing, and the three backup
variants) as owned by the RPC spawn task. For those variants,
merge_preserving_transitional keeps the existing state while still
taking live observability fields (health, exit_code, installed,
lan_address, manifest, static_files, available_update) from the
fresh scan so the UI continues to see live health readings.
Adds an escape hatch via a per-scan transitional_since side table:
if a package has been in a transitional state for more than 1200s
(2x the longest graceful stop at 600s on bitcoin-core), the scan
loop assumes the spawn task died without cleanup and overrides with
podman's live state. Prevents a crashed background task from wedging
a package in Stopping forever.
Three unit tests cover the merge rule, the observability passthrough,
and the transitional-variant classifier.
2026-04-23 05:15:13 -04:00
|
|
|
|
}
|