2026-04-30 16:29:56 -04:00
|
|
|
//! Cached Bitcoin node status for browser UIs.
|
|
|
|
|
//!
|
|
|
|
|
//! The bitcoin-ui should not poll Bitcoin RPC directly for display state.
|
|
|
|
|
//! During container restarts, reindexing, and IBD, direct browser RPC polling
|
|
|
|
|
//! turns short RPC gaps into visible UI failures. This module owns the RPC
|
|
|
|
|
//! polling loop, caches the last successful snapshot, and serves stale-but-known
|
|
|
|
|
//! state while the node is reconnecting.
|
|
|
|
|
|
|
|
|
|
use anyhow::{Context, Result};
|
|
|
|
|
use serde::Serialize;
|
|
|
|
|
use std::sync::OnceLock;
|
|
|
|
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|
|
|
|
use tokio::sync::RwLock;
|
|
|
|
|
use tracing::{debug, warn};
|
|
|
|
|
|
2026-06-11 00:24:32 -04:00
|
|
|
const CACHE_REFRESH_SECS: u64 = 10;
|
|
|
|
|
const CACHE_ERROR_BACKOFF_SECS: u64 = 15;
|
2026-04-30 16:29:56 -04:00
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
|
|
|
pub struct BitcoinNodeStatus {
|
|
|
|
|
pub ok: bool,
|
|
|
|
|
pub stale: bool,
|
|
|
|
|
pub updated_at_ms: u64,
|
|
|
|
|
pub error: Option<String>,
|
|
|
|
|
pub blockchain_info: Option<serde_json::Value>,
|
|
|
|
|
pub network_info: Option<serde_json::Value>,
|
|
|
|
|
pub index_info: Option<serde_json::Value>,
|
|
|
|
|
pub zmq_notifications: Option<serde_json::Value>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Default for BitcoinNodeStatus {
|
|
|
|
|
fn default() -> Self {
|
|
|
|
|
Self {
|
|
|
|
|
ok: false,
|
|
|
|
|
stale: false,
|
|
|
|
|
updated_at_ms: 0,
|
|
|
|
|
error: Some("Connecting to Bitcoin node...".to_string()),
|
|
|
|
|
blockchain_info: None,
|
|
|
|
|
network_info: None,
|
|
|
|
|
index_info: None,
|
|
|
|
|
zmq_notifications: None,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static STATUS_CACHE: OnceLock<RwLock<BitcoinNodeStatus>> = OnceLock::new();
|
|
|
|
|
|
|
|
|
|
fn cache() -> &'static RwLock<BitcoinNodeStatus> {
|
|
|
|
|
STATUS_CACHE.get_or_init(|| RwLock::new(BitcoinNodeStatus::default()))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn now_ms() -> u64 {
|
|
|
|
|
SystemTime::now()
|
|
|
|
|
.duration_since(UNIX_EPOCH)
|
|
|
|
|
.unwrap_or_default()
|
|
|
|
|
.as_millis() as u64
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn transient_error(err_msg: &str) -> bool {
|
|
|
|
|
let lower = err_msg.to_lowercase();
|
|
|
|
|
lower.contains("connect")
|
|
|
|
|
|| lower.contains("reset")
|
|
|
|
|
|| lower.contains("refused")
|
|
|
|
|
|| lower.contains("timed out")
|
|
|
|
|
|| lower.contains("timeout")
|
|
|
|
|
|| lower.contains("broken pipe")
|
|
|
|
|
|| lower.contains("eof")
|
|
|
|
|
|| lower.contains("500 internal server error")
|
2026-06-11 00:24:32 -04:00
|
|
|
|| lower.contains("503 service unavailable")
|
|
|
|
|
|| lower.contains("work queue depth exceeded")
|
|
|
|
|
|| lower.contains("decode bitcoin rpc json")
|
|
|
|
|
|| lower.contains("error decoding response body")
|
|
|
|
|
|| lower.contains("expected value at line 1 column 1")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn friendly_transient_error(has_cached_state: bool, err_msg: &str) -> String {
|
|
|
|
|
let detail = err_msg
|
|
|
|
|
.lines()
|
|
|
|
|
.next()
|
|
|
|
|
.unwrap_or(err_msg)
|
|
|
|
|
.trim()
|
|
|
|
|
.trim_end_matches('.');
|
|
|
|
|
let lower = detail.to_lowercase();
|
|
|
|
|
let state = if lower.contains("verifying blocks") {
|
|
|
|
|
"verifying blocks after restart"
|
|
|
|
|
} else if lower.contains("connection refused") || lower.contains("tcp connect error") {
|
|
|
|
|
"waiting for the Bitcoin RPC listener"
|
|
|
|
|
} else if lower.contains("timed out") || lower.contains("timeout") {
|
|
|
|
|
"busy and not answering RPC before the timeout"
|
|
|
|
|
} else {
|
|
|
|
|
"starting or busy syncing"
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if has_cached_state {
|
|
|
|
|
format!("Bitcoin node is {state}; showing last known state and retrying. Detail: {detail}")
|
|
|
|
|
} else {
|
|
|
|
|
format!("Bitcoin node is {state}; retrying automatically. Detail: {detail}")
|
|
|
|
|
}
|
2026-04-30 16:29:56 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn spawn_status_cache() {
|
|
|
|
|
tokio::spawn(async {
|
|
|
|
|
loop {
|
|
|
|
|
let fresh = fetch_bitcoin_status().await;
|
|
|
|
|
let mut cached = cache().write().await;
|
2026-06-11 00:24:32 -04:00
|
|
|
let mut sleep_secs = CACHE_REFRESH_SECS;
|
2026-04-30 16:29:56 -04:00
|
|
|
match fresh {
|
|
|
|
|
Ok(mut status) => {
|
|
|
|
|
status.ok = true;
|
|
|
|
|
status.stale = false;
|
|
|
|
|
status.error = None;
|
|
|
|
|
*cached = status;
|
|
|
|
|
}
|
|
|
|
|
Err(e) => {
|
2026-06-11 00:24:32 -04:00
|
|
|
let err_msg = format!("{e:#}");
|
2026-04-30 16:29:56 -04:00
|
|
|
if transient_error(&err_msg) {
|
|
|
|
|
debug!("Bitcoin status: transient RPC failure: {}", err_msg);
|
|
|
|
|
} else {
|
|
|
|
|
warn!("Bitcoin status: RPC failure: {}", err_msg);
|
|
|
|
|
}
|
2026-06-11 00:24:32 -04:00
|
|
|
sleep_secs = CACHE_ERROR_BACKOFF_SECS;
|
2026-04-30 16:29:56 -04:00
|
|
|
|
|
|
|
|
if cached.blockchain_info.is_some() {
|
|
|
|
|
cached.ok = false;
|
|
|
|
|
cached.stale = true;
|
2026-06-11 00:24:32 -04:00
|
|
|
cached.error = Some(friendly_transient_error(true, &err_msg));
|
2026-04-30 16:29:56 -04:00
|
|
|
} else {
|
|
|
|
|
*cached = BitcoinNodeStatus {
|
|
|
|
|
ok: false,
|
|
|
|
|
stale: false,
|
|
|
|
|
updated_at_ms: now_ms(),
|
2026-06-11 00:24:32 -04:00
|
|
|
error: Some(friendly_transient_error(false, &err_msg)),
|
2026-04-30 16:29:56 -04:00
|
|
|
..BitcoinNodeStatus::default()
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
drop(cached);
|
2026-06-11 00:24:32 -04:00
|
|
|
tokio::time::sleep(Duration::from_secs(sleep_secs)).await;
|
2026-04-30 16:29:56 -04:00
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub async fn get_bitcoin_status() -> BitcoinNodeStatus {
|
|
|
|
|
cache().read().await.clone()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
|
|
|
|
|
let client = reqwest::Client::builder()
|
2026-06-11 00:24:32 -04:00
|
|
|
.timeout(Duration::from_secs(20))
|
2026-04-30 16:29:56 -04:00
|
|
|
.build()
|
|
|
|
|
.context("build Bitcoin status HTTP client")?;
|
|
|
|
|
|
|
|
|
|
let blockchain_info = bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([]))
|
|
|
|
|
.await
|
|
|
|
|
.context("getblockchaininfo")?;
|
|
|
|
|
let network_info = bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([]))
|
|
|
|
|
.await
|
|
|
|
|
.context("getnetworkinfo")
|
|
|
|
|
.ok();
|
|
|
|
|
let index_info = bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([]))
|
|
|
|
|
.await
|
|
|
|
|
.context("getindexinfo")
|
|
|
|
|
.ok();
|
refactor(install): route orchestrator-managed apps through orchestrator first
Phase 3a of the install path consolidation. Two coupled changes:
1. install.rs handle_package_install: gate the legacy "container exists →
adopt + return" probe on !orchestrator_managed. Apps the orchestrator
knows about (bitcoin-knots, bitcoin-core, lnd, electrumx, fedimint,
filebrowser, btcpay-server stack apps, mempool stack apps, plus the
companion UIs that just moved to Quadlet) skip the legacy probe and
fall straight into the orchestrator branch.
The legacy adopt block was returning success on a bare `podman start`
exit-0 — even when the process inside the container crashed seconds
later. That's the .228 "running but unreachable" failure mode. The
orchestrator's ensure_running honors the manifest's health check and
pre-start hooks (e.g. re-renders bitcoin-ui's nginx.conf if the RPC
password rotated), so this is a behavioral upgrade, not just a
refactor.
2. ProdContainerOrchestrator::install: make idempotent. Previously it
blindly called install_fresh which would fail on `podman create` if
the container name already existed. Now it delegates to ensure_running:
- Container Running + healthy → no-op (refresh hooks, restart if
config rewritten)
- Container Stopped/Exited → start (with hook refresh)
- Container missing → install_fresh
- Container in wedged state (Created/Paused/Unknown) → force-recreate
Without this, change #1 would regress every "container already exists"
case for the 18 orchestrator-managed app IDs. With it, install becomes
the single source of truth for "make app X be in the desired state."
Tests: 654 passed across the workspace (614 unit + 37 orchestration + 3
rpc), 0 failures. The 20 prod_orchestrator tests cover the install /
ensure_running / reconcile paths the new install delegates through.
Net delta: install.rs grows by ~30 lines (gating wrapper + comments),
prod_orchestrator.rs grows by ~30 lines (idempotent install body). Both
are temporary — the larger deletions (~1700 lines) come once every app
has been verified through the orchestrator path in subsequent phases.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 11:12:52 -04:00
|
|
|
let zmq_notifications = bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([]))
|
|
|
|
|
.await
|
|
|
|
|
.context("getzmqnotifications")
|
|
|
|
|
.ok();
|
2026-04-30 16:29:56 -04:00
|
|
|
|
|
|
|
|
Ok(BitcoinNodeStatus {
|
|
|
|
|
ok: true,
|
|
|
|
|
stale: false,
|
|
|
|
|
updated_at_ms: now_ms(),
|
|
|
|
|
error: None,
|
|
|
|
|
blockchain_info: Some(blockchain_info),
|
|
|
|
|
network_info,
|
|
|
|
|
index_info,
|
|
|
|
|
zmq_notifications,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async fn bitcoin_rpc_call(
|
|
|
|
|
client: &reqwest::Client,
|
|
|
|
|
method: &str,
|
|
|
|
|
params: serde_json::Value,
|
|
|
|
|
) -> Result<serde_json::Value> {
|
|
|
|
|
let (rpc_user, rpc_pass) = crate::bitcoin_rpc::bitcoin_rpc_credentials().await;
|
|
|
|
|
let body = serde_json::json!({
|
|
|
|
|
"jsonrpc": "1.0",
|
|
|
|
|
"id": "bitcoin-status",
|
|
|
|
|
"method": method,
|
|
|
|
|
"params": params,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
let resp = client
|
|
|
|
|
.post(crate::constants::BITCOIN_RPC_URL)
|
|
|
|
|
.basic_auth(rpc_user, Some(rpc_pass))
|
|
|
|
|
.header("Content-Type", "application/json")
|
|
|
|
|
.json(&body)
|
|
|
|
|
.send()
|
|
|
|
|
.await
|
|
|
|
|
.context("Bitcoin RPC request failed")?;
|
|
|
|
|
|
|
|
|
|
let status = resp.status();
|
|
|
|
|
let json: serde_json::Value = resp.json().await.context("decode Bitcoin RPC JSON")?;
|
|
|
|
|
if !status.is_success() {
|
|
|
|
|
anyhow::bail!("Bitcoin RPC returned {}: {}", status, json);
|
|
|
|
|
}
|
|
|
|
|
if let Some(error) = json.get("error").filter(|e| !e.is_null()) {
|
|
|
|
|
anyhow::bail!("Bitcoin RPC {} error: {}", method, error);
|
|
|
|
|
}
|
|
|
|
|
json.get("result")
|
|
|
|
|
.cloned()
|
|
|
|
|
.context("missing Bitcoin RPC result")
|
|
|
|
|
}
|
2026-06-11 00:24:32 -04:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::friendly_transient_error;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn explains_verifying_blocks_without_generic_timeout_copy() {
|
|
|
|
|
let msg = friendly_transient_error(
|
|
|
|
|
false,
|
|
|
|
|
r#"getblockchaininfo: Bitcoin RPC returned 500 Internal Server Error: {"error":{"code":-28,"message":"Verifying blocks..."}}"#,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert!(msg.contains("verifying blocks after restart"));
|
|
|
|
|
assert!(msg.contains("retrying automatically"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn explains_missing_rpc_listener() {
|
|
|
|
|
let msg = friendly_transient_error(
|
|
|
|
|
true,
|
|
|
|
|
"getblockchaininfo: tcp connect error: Connection refused (os error 111)",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert!(msg.contains("waiting for the Bitcoin RPC listener"));
|
|
|
|
|
assert!(msg.contains("showing last known state"));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn explains_rpc_timeout() {
|
|
|
|
|
let msg = friendly_transient_error(
|
|
|
|
|
false,
|
|
|
|
|
"getblockchaininfo: Bitcoin RPC request failed: operation timed out",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
assert!(msg.contains("busy and not answering RPC before the timeout"));
|
|
|
|
|
}
|
|
|
|
|
}
|