fix: harden ElectrumX status — cached backend, stable frontend

Backend: cache status in RwLock, refresh every 15s via background task.
Eliminates per-request TCP race to ElectrumX that caused volatile errors.
Fix error classification so "Failed to read" is transient, not hard error.

Frontend: keep last-known-good data across failed polls, persist Tor
onion once discovered, adaptive polling (5s active / 30s synced).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dorian 2026-04-09 10:32:55 +02:00
parent 07dff3e4ca
commit d0b9f168f4
3 changed files with 186 additions and 84 deletions

View File

@ -1,10 +1,14 @@
//! ElectrumX sync status: fetches indexed height from Electrum RPC and network height from Bitcoin Core.
//! Status is cached and refreshed on a background timer to avoid race conditions from concurrent queries.
use anyhow::{Context, Result};
use serde::Serialize;
use std::sync::OnceLock;
use std::time::Duration;
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
use tokio::net::TcpStream;
use tokio::sync::RwLock;
use tracing::warn;
const ELECTRUMX_HOST: &str = "127.0.0.1";
const ELECTRUMX_PORT: u16 = 50001;
@ -12,6 +16,9 @@ const ELECTRUMX_DATA_DIR: &str = "/var/lib/archipelago/electrumx";
// Approximate final index size in bytes for mainnet (~130GB for ElectrumX full index as of 2026)
const ESTIMATED_FULL_INDEX_BYTES: f64 = 130_000_000_000.0;
/// Refresh interval for status cache
const CACHE_REFRESH_SECS: u64 = 15;
/// Build Bitcoin RPC Basic auth header using shared credentials.
async fn bitcoin_rpc_auth() -> String {
let (user, pass) = crate::bitcoin_rpc::bitcoin_rpc_credentials().await;
@ -20,7 +27,7 @@ async fn bitcoin_rpc_auth() -> String {
format!("Basic {}", encoded)
}
#[derive(Debug, Serialize)]
#[derive(Debug, Clone, Serialize)]
pub struct ElectrsSyncStatus {
pub indexed_height: u64,
pub network_height: u64,
@ -33,6 +40,48 @@ pub struct ElectrsSyncStatus {
pub tor_onion: Option<String>,
}
impl Default for ElectrsSyncStatus {
fn default() -> Self {
Self {
indexed_height: 0,
network_height: 0,
progress_pct: 0.0,
status: "starting".to_string(),
error: None,
index_size: None,
tor_onion: None,
}
}
}
/// Cached status, initialized once and refreshed by background task.
static STATUS_CACHE: OnceLock<RwLock<ElectrsSyncStatus>> = OnceLock::new();
fn cache() -> &'static RwLock<ElectrsSyncStatus> {
STATUS_CACHE.get_or_init(|| RwLock::new(ElectrsSyncStatus::default()))
}
/// Spawn background task that refreshes ElectrumX status every CACHE_REFRESH_SECS.
pub fn spawn_status_cache() {
tokio::spawn(async {
// Initial delay — let services start up before first query
tokio::time::sleep(Duration::from_secs(5)).await;
let mut interval = tokio::time::interval(Duration::from_secs(CACHE_REFRESH_SECS));
loop {
interval.tick().await;
let fresh = fetch_electrs_sync_status().await;
let mut cached = cache().write().await;
*cached = fresh;
}
});
}
/// Return cached status (non-blocking read).
pub async fn get_electrs_sync_status() -> ElectrsSyncStatus {
cache().read().await.clone()
}
/// Get the total size of a directory in bytes.
async fn dir_size_bytes(path: &str) -> u64 {
let mut total: u64 = 0;
@ -62,6 +111,23 @@ fn format_bytes(bytes: u64) -> String {
}
}
/// Returns true if the error is a transient connection/IO issue (not a data error).
fn is_transient_error(err_msg: &str) -> bool {
let lower = err_msg.to_lowercase();
lower.contains("connect")
|| lower.contains("reset")
|| lower.contains("refused")
|| lower.contains("timed out")
|| lower.contains("timeout")
|| lower.contains("failed to read")
|| lower.contains("failed to write")
|| lower.contains("failed to flush")
|| lower.contains("empty response")
|| lower.contains("broken pipe")
|| lower.contains("eof")
|| lower.contains("connection")
}
/// Fetch ElectrumX indexed height via Electrum protocol (TCP JSON-RPC).
async fn electrumx_indexed_height() -> Result<u64> {
let timeout_duration = Duration::from_secs(5);
@ -135,6 +201,7 @@ async fn bitcoin_network_height() -> Result<u64> {
.header("Content-Type", "application/json")
.header("Authorization", bitcoin_rpc_auth().await)
.body(body.to_string())
.timeout(Duration::from_secs(10))
.send()
.await
.context("Bitcoin RPC request failed")?;
@ -151,8 +218,8 @@ async fn bitcoin_network_height() -> Result<u64> {
Ok(height)
}
/// Get ElectrumX sync status.
pub async fn get_electrs_sync_status() -> ElectrsSyncStatus {
/// Fetch fresh ElectrumX sync status (called by background cache task).
async fn fetch_electrs_sync_status() -> ElectrsSyncStatus {
// Get index data size
let data_bytes = dir_size_bytes(ELECTRUMX_DATA_DIR).await;
let index_size = if data_bytes > 0 {
@ -196,12 +263,13 @@ pub async fn get_electrs_sync_status() -> ElectrsSyncStatus {
let network_height = match bitcoin_network_height().await {
Ok(h) => h,
Err(e) => {
warn!("ElectrumX status: Bitcoin RPC failed: {}", e);
return ElectrsSyncStatus {
indexed_height: 0,
network_height: 0,
progress_pct: 0.0,
status: "error".to_string(),
error: Some(format!("Bitcoin RPC: {}", e)),
status: "waiting".to_string(),
error: Some("Waiting for Bitcoin node...".to_string()),
index_size,
tor_onion,
};
@ -211,41 +279,38 @@ pub async fn get_electrs_sync_status() -> ElectrsSyncStatus {
let indexed_height = match electrumx_indexed_height().await {
Ok(h) => h,
Err(e) => {
// ElectrumX may not be ready on 50001 during initial sync
let err_msg = e.to_string();
let err_lower = err_msg.to_lowercase();
let (status, error) = if err_lower.contains("connect") || err_lower.contains("reset") || err_lower.contains("refused") || err_lower.contains("timed out") {
// Estimate progress from data directory size
let _est_pct = if data_bytes > 0 {
if is_transient_error(&err_msg) {
// ElectrumX is starting up or busy — estimate from data size
let progress_pct = if data_bytes > 0 {
((data_bytes as f64 / ESTIMATED_FULL_INDEX_BYTES) * 100.0).min(99.0)
} else {
0.0
};
let size_str = index_size.clone().unwrap_or_else(|| "0 MB".to_string());
(
"indexing".to_string(),
Some(format!(
"Building index ({} / ~130 GB estimated). Electrum RPC will be available when complete.",
size_str
)),
)
} else {
("error".to_string(), Some(format!("ElectrumX: {}", e)))
};
// Use estimated progress when indexing
let progress_pct = if status == "indexing" && data_bytes > 0 {
((data_bytes as f64 / ESTIMATED_FULL_INDEX_BYTES) * 100.0).min(99.0)
} else {
0.0
};
return ElectrsSyncStatus {
indexed_height: 0,
network_height,
progress_pct,
status,
error,
status: "indexing".to_string(),
error: Some(format!(
"Building index ({} / ~130 GB estimated). Electrum RPC will be available when complete.",
size_str
)),
index_size,
tor_onion: tor_onion.clone(),
tor_onion,
};
}
// Genuine unexpected error
warn!("ElectrumX status: unexpected error: {}", err_msg);
return ElectrsSyncStatus {
indexed_height: 0,
network_height,
progress_pct: 0.0,
status: "error".to_string(),
error: Some(format!("ElectrumX: {}", err_msg)),
index_size,
tor_onion,
};
}
};

View File

@ -139,6 +139,9 @@ async fn main() -> Result<()> {
// Spawn disk space monitor (warns at 85%, auto-cleans at 90%)
disk_monitor::spawn_disk_monitor(config.data_dir.clone());
// Spawn ElectrumX status cache (refreshes every 15s, serves cached data to avoid race conditions)
electrs_status::spawn_status_cache();
let startup_ms = startup_start.elapsed().as_millis();
info!("Server listening on http://{} (startup: {}ms)", addr, startup_ms);
info!("RPC API: http://{}/rpc/v1", addr);

View File

@ -268,6 +268,11 @@
<script>
var currentTab = 'local';
var torOnion = null;
var lastGoodData = null;
var consecutiveFailures = 0;
var pollTimer = null;
var POLL_ACTIVE = 5000; // 5s while syncing/indexing/starting
var POLL_SYNCED = 30000; // 30s when synced
function renderQR(containerId, text) {
var container = document.getElementById(containerId);
@ -353,20 +358,14 @@
}
}
async function updateStatus() {
try {
var resp = await fetch('/electrs-status', { cache: 'no-store' });
if (!resp.ok) {
throw new Error('Backend unavailable (HTTP ' + resp.status + ')');
function schedulePoll(intervalMs) {
if (pollTimer) clearTimeout(pollTimer);
pollTimer = setTimeout(function() { updateStatus(); }, intervalMs);
}
var text = await resp.text();
if (text.trim().charAt(0) !== '{') {
throw new Error('Waiting for Archipelago backend...');
}
var data = JSON.parse(text);
// Extract Tor onion from status response
if (data.tor_onion && !torOnion) {
function applyData(data) {
// Persist tor onion once discovered — never flicker back to "not configured"
if (data.tor_onion) {
applyTorOnion(data.tor_onion);
}
@ -386,7 +385,14 @@
var statusDot = document.getElementById('statusDot');
var syncIcon = document.getElementById('syncIcon');
if (data.status === 'indexing') {
if (data.status === 'starting' || data.status === 'waiting') {
statusTextEl.textContent = data.error || 'Starting up...';
statusTextEl.style.color = '#fbbf24';
statusDot.className = 'status-dot bg-yellow animate-pulse';
document.getElementById('statusText').textContent = 'Starting';
syncIcon.classList.add('animate-spin-slow');
document.getElementById('connSubtitle').textContent = 'Connections will be available once ElectrumX has completed syncing.';
} else if (data.status === 'indexing') {
statusTextEl.textContent = data.error || 'Building index...';
statusTextEl.style.color = '#fbbf24';
statusDot.className = 'status-dot bg-amber animate-pulse';
@ -416,21 +422,49 @@
syncIcon.classList.add('animate-spin-slow');
document.getElementById('connSubtitle').textContent = 'Connections will be available once ElectrumX has completed syncing.';
}
} catch (e) {
var msg = e.message || 'Unknown error';
if (msg.indexOf('HTTP 5') !== -1 || msg.indexOf('Failed to fetch') !== -1 || msg.indexOf('NetworkError') !== -1) {
msg = 'Waiting for Archipelago backend...';
}
document.getElementById('syncStatusText').textContent = msg;
async function updateStatus() {
try {
var resp = await fetch('/electrs-status', { cache: 'no-store' });
if (!resp.ok) {
throw new Error('backend_unavailable');
}
var text = await resp.text();
if (text.trim().charAt(0) !== '{') {
throw new Error('backend_starting');
}
var data = JSON.parse(text);
// Success — reset failure counter, save as last known good
consecutiveFailures = 0;
lastGoodData = data;
applyData(data);
// Adaptive polling — slower when synced
schedulePoll(data.status === 'synced' ? POLL_SYNCED : POLL_ACTIVE);
} catch (e) {
consecutiveFailures++;
if (lastGoodData && consecutiveFailures <= 3) {
// Keep showing last known good data — don't flicker the UI
// Just poll again sooner to recover
schedulePoll(POLL_ACTIVE);
return;
}
// No prior good data, or too many failures — show connecting state
document.getElementById('syncStatusText').textContent = 'Connecting to Archipelago...';
document.getElementById('syncStatusText').style.color = '#fbbf24';
document.getElementById('statusDot').className = 'status-dot bg-yellow animate-pulse';
document.getElementById('statusText').textContent = 'Connecting';
schedulePoll(POLL_ACTIVE);
}
}
updateStatus();
updateConnectionInfo();
setInterval(updateStatus, 5000);
</script>
</body>
</html>