archy/core/archipelago/src/bitcoin_status.rs

253 lines
8.2 KiB
Rust

//! Cached Bitcoin node status for browser UIs.
//!
//! The bitcoin-ui should not poll Bitcoin RPC directly for display state.
//! During container restarts, reindexing, and IBD, direct browser RPC polling
//! turns short RPC gaps into visible UI failures. This module owns the RPC
//! polling loop, caches the last successful snapshot, and serves stale-but-known
//! state while the node is reconnecting.
use anyhow::{Context, Result};
use serde::Serialize;
use std::sync::OnceLock;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tokio::sync::RwLock;
use tracing::{debug, warn};
const CACHE_REFRESH_SECS: u64 = 10;
const CACHE_ERROR_BACKOFF_SECS: u64 = 15;
#[derive(Debug, Clone, Serialize)]
pub struct BitcoinNodeStatus {
pub ok: bool,
pub stale: bool,
pub updated_at_ms: u64,
pub error: Option<String>,
pub blockchain_info: Option<serde_json::Value>,
pub network_info: Option<serde_json::Value>,
pub index_info: Option<serde_json::Value>,
pub zmq_notifications: Option<serde_json::Value>,
}
impl Default for BitcoinNodeStatus {
fn default() -> Self {
Self {
ok: false,
stale: false,
updated_at_ms: 0,
error: Some("Connecting to Bitcoin node...".to_string()),
blockchain_info: None,
network_info: None,
index_info: None,
zmq_notifications: None,
}
}
}
static STATUS_CACHE: OnceLock<RwLock<BitcoinNodeStatus>> = OnceLock::new();
fn cache() -> &'static RwLock<BitcoinNodeStatus> {
STATUS_CACHE.get_or_init(|| RwLock::new(BitcoinNodeStatus::default()))
}
fn now_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
fn transient_error(err_msg: &str) -> bool {
let lower = err_msg.to_lowercase();
lower.contains("connect")
|| lower.contains("reset")
|| lower.contains("refused")
|| lower.contains("timed out")
|| lower.contains("timeout")
|| lower.contains("broken pipe")
|| lower.contains("eof")
|| lower.contains("500 internal server error")
|| lower.contains("503 service unavailable")
|| lower.contains("work queue depth exceeded")
|| lower.contains("decode bitcoin rpc json")
|| lower.contains("error decoding response body")
|| lower.contains("expected value at line 1 column 1")
}
fn friendly_transient_error(has_cached_state: bool, err_msg: &str) -> String {
let detail = err_msg
.lines()
.next()
.unwrap_or(err_msg)
.trim()
.trim_end_matches('.');
let lower = detail.to_lowercase();
let state = if lower.contains("verifying blocks") {
"verifying blocks after restart"
} else if lower.contains("connection refused") || lower.contains("tcp connect error") {
"waiting for the Bitcoin RPC listener"
} else if lower.contains("timed out") || lower.contains("timeout") {
"busy and not answering RPC before the timeout"
} else {
"starting or busy syncing"
};
if has_cached_state {
format!("Bitcoin node is {state}; showing last known state and retrying. Detail: {detail}")
} else {
format!("Bitcoin node is {state}; retrying automatically. Detail: {detail}")
}
}
pub fn spawn_status_cache() {
tokio::spawn(async {
loop {
let fresh = fetch_bitcoin_status().await;
let mut cached = cache().write().await;
let mut sleep_secs = CACHE_REFRESH_SECS;
match fresh {
Ok(mut status) => {
status.ok = true;
status.stale = false;
status.error = None;
*cached = status;
}
Err(e) => {
let err_msg = format!("{e:#}");
if transient_error(&err_msg) {
debug!("Bitcoin status: transient RPC failure: {}", err_msg);
} else {
warn!("Bitcoin status: RPC failure: {}", err_msg);
}
sleep_secs = CACHE_ERROR_BACKOFF_SECS;
if cached.blockchain_info.is_some() {
cached.ok = false;
cached.stale = true;
cached.error = Some(friendly_transient_error(true, &err_msg));
} else {
*cached = BitcoinNodeStatus {
ok: false,
stale: false,
updated_at_ms: now_ms(),
error: Some(friendly_transient_error(false, &err_msg)),
..BitcoinNodeStatus::default()
};
}
}
}
drop(cached);
tokio::time::sleep(Duration::from_secs(sleep_secs)).await;
}
});
}
pub async fn get_bitcoin_status() -> BitcoinNodeStatus {
cache().read().await.clone()
}
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(20))
.build()
.context("build Bitcoin status HTTP client")?;
let blockchain_info = bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([]))
.await
.context("getblockchaininfo")?;
let network_info = bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([]))
.await
.context("getnetworkinfo")
.ok();
let index_info = bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([]))
.await
.context("getindexinfo")
.ok();
let zmq_notifications = bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([]))
.await
.context("getzmqnotifications")
.ok();
Ok(BitcoinNodeStatus {
ok: true,
stale: false,
updated_at_ms: now_ms(),
error: None,
blockchain_info: Some(blockchain_info),
network_info,
index_info,
zmq_notifications,
})
}
async fn bitcoin_rpc_call(
client: &reqwest::Client,
method: &str,
params: serde_json::Value,
) -> Result<serde_json::Value> {
let (rpc_user, rpc_pass) = crate::bitcoin_rpc::bitcoin_rpc_credentials().await;
let body = serde_json::json!({
"jsonrpc": "1.0",
"id": "bitcoin-status",
"method": method,
"params": params,
});
let resp = client
.post(crate::constants::BITCOIN_RPC_URL)
.basic_auth(rpc_user, Some(rpc_pass))
.header("Content-Type", "application/json")
.json(&body)
.send()
.await
.context("Bitcoin RPC request failed")?;
let status = resp.status();
let json: serde_json::Value = resp.json().await.context("decode Bitcoin RPC JSON")?;
if !status.is_success() {
anyhow::bail!("Bitcoin RPC returned {}: {}", status, json);
}
if let Some(error) = json.get("error").filter(|e| !e.is_null()) {
anyhow::bail!("Bitcoin RPC {} error: {}", method, error);
}
json.get("result")
.cloned()
.context("missing Bitcoin RPC result")
}
#[cfg(test)]
mod tests {
use super::friendly_transient_error;
#[test]
fn explains_verifying_blocks_without_generic_timeout_copy() {
let msg = friendly_transient_error(
false,
r#"getblockchaininfo: Bitcoin RPC returned 500 Internal Server Error: {"error":{"code":-28,"message":"Verifying blocks..."}}"#,
);
assert!(msg.contains("verifying blocks after restart"));
assert!(msg.contains("retrying automatically"));
}
#[test]
fn explains_missing_rpc_listener() {
let msg = friendly_transient_error(
true,
"getblockchaininfo: tcp connect error: Connection refused (os error 111)",
);
assert!(msg.contains("waiting for the Bitcoin RPC listener"));
assert!(msg.contains("showing last known state"));
}
#[test]
fn explains_rpc_timeout() {
let msg = friendly_transient_error(
false,
"getblockchaininfo: Bitcoin RPC request failed: operation timed out",
);
assert!(msg.contains("busy and not answering RPC before the timeout"));
}
}