fix(bitcoin,ui): RAM-aware dbcache to stop swap-thrash 502s + snappier status + icon placeholder

Sizes bitcoind -dbcache to host RAM (~1/16, floor 300MB, cap 4096) instead of a
fixed 2048/4096. A multi-GB UTXO cache on an 8GB node running the full app stack
pushed memory past physical RAM and triggered system-wide swap thrash: the disk
saturated, bitcoind could not answer its own RPC, and the dashboard backend's
sqlite reads stalled — surfacing as fleet-wide /rpc/v1 502s and a blank Bitcoin
UI. Applied in scripts/container-specs.sh (reconciler path) and the config.rs
bitcoin-core path.

Bitcoin status cache now polls every 5s (was 10/15) with an 8s timeout (was 20s)
and fetches the four RPCs concurrently, so the cached snapshot tracks bitcoind's
responsive windows during IBD and the UI stops dwelling on "reconnecting...".

Unifies the divergent discover AppGrid/FeaturedApps image-error handlers onto the
canonical placeholder fallback so missing app icons render the placeholder.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-06-18 09:14:47 -04:00
parent 549c6180a2
commit cc2e055e09
5 changed files with 70 additions and 43 deletions

View File

@ -349,13 +349,37 @@ fn http_probe_cmd(url: &'static str) -> &'static str {
}
}
/// Bitcoin UTXO cache (`-dbcache`) in MB, sized to host RAM.
///
/// A fixed large dbcache on a small box pushes bitcoind + the ~20 app
/// containers past physical RAM and triggers system-wide swap thrash: the
/// disk saturates, bitcoind can't answer its own RPC, and the dashboard
/// backend's sqlite reads stall — surfacing as /rpc/v1 502s and a blank
/// Bitcoin UI. Budget ~1/16 of RAM for the cache (floor 300 MB — bitcoind's
/// own default is 450 — cap 4096 MB), mirroring scripts/container-specs.sh.
pub(super) fn bitcoin_dbcache_mb() -> u64 {
let total_mb = std::fs::read_to_string("/proc/meminfo")
.ok()
.and_then(|c| {
c.lines()
.find_map(|l| l.strip_prefix("MemTotal:"))
.and_then(|v| v.split_whitespace().next())
.and_then(|kb| kb.parse::<u64>().ok())
})
.map(|kb| kb / 1024)
.unwrap_or(16000); // assume a comfortable host if /proc/meminfo is unreadable
(total_mb / 16).clamp(300, 4096)
}
/// Get per-app memory limit.
pub(super) fn get_memory_limit(app_id: &str) -> &'static str {
match app_id {
// Heavy apps. Bitcoin: dbcache uses ~4GB; the daemon also needs
// headroom for mempool + connection buffers + script-verifier
// memory + I/O. 4g caused OOM-cascades during IBD. 8g is the
// floor; ideally this would be host-RAM aware (next pass).
// Heavy apps. Bitcoin: dbcache is now host-RAM-aware (see
// bitcoin_dbcache_mb), so the daemon's footprint scales with the box.
// This cgroup cap is an upper bound for mempool + connection buffers +
// script-verifier memory + I/O; a tight cap (4g) previously caused
// OOM-cascades during IBD, so keep 8g as a generous ceiling rather
// than a tight limit — swap thrash is prevented at the dbcache layer.
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => "8g",
// ElectrumX indexing spikes above its cache size due Python,
// RocksDB, socket buffers, and reorg/history work. Keep cache
@ -674,9 +698,10 @@ pub(super) async fn get_app_config(
// RPC is reachable from the bitcoin-ui companion container.
//
// Sync-speed flags:
// -dbcache=4096 — UTXO set cache; 4GB is the sweet spot before
// diminishing returns. Container has --memory=8g now so
// there's headroom for mempool + connections.
// -dbcache — UTXO set cache, sized to host RAM via
// bitcoin_dbcache_mb() (see there). A fixed 4GB cache swap-
// thrashed small nodes into fleet-wide 502s; ~1/16 of RAM
// keeps headroom for mempool + connections + the app stack.
// -par=0 — use all available cores for script
// verification (defaults to NCPU-1 capped at 16). Was
// effectively pinned at 2 by --cpus=2 (now removed).
@ -689,7 +714,7 @@ pub(super) async fn get_app_config(
"-rpcport=8332".to_string(),
"-printtoconsole=1".to_string(),
"-datadir=/home/bitcoin/.bitcoin".to_string(),
"-dbcache=4096".to_string(),
format!("-dbcache={}", bitcoin_dbcache_mb()),
"-par=0".to_string(),
"-maxconnections=125".to_string(),
]),

View File

@ -13,8 +13,14 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tokio::sync::RwLock;
use tracing::{debug, warn};
const CACHE_REFRESH_SECS: u64 = 10;
const CACHE_ERROR_BACKOFF_SECS: u64 = 15;
// Poll frequently and recover fast so the cached snapshot tracks bitcoind's
// responsive windows during IBD. During heavy block-connection, getblockchaininfo
// can block briefly; a slow 10s/15s/20s cadence let one missed poll age the
// snapshot past the UI's 30s "stale" threshold, so the UI dwelled on
// "reconnecting…" long after bitcoind was answering again. Tight cadence + short
// timeout keeps last-known state fresh and clears the stale banner promptly.
const CACHE_REFRESH_SECS: u64 = 5;
const CACHE_ERROR_BACKOFF_SECS: u64 = 5;
#[derive(Debug, Clone, Serialize)]
pub struct BitcoinNodeStatus {
@ -147,25 +153,20 @@ pub async fn get_bitcoin_status() -> BitcoinNodeStatus {
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(20))
.timeout(Duration::from_secs(8))
.build()
.context("build Bitcoin status HTTP client")?;
let blockchain_info = bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([]))
.await
.context("getblockchaininfo")?;
let network_info = bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([]))
.await
.context("getnetworkinfo")
.ok();
let index_info = bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([]))
.await
.context("getindexinfo")
.ok();
let zmq_notifications = bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([]))
.await
.context("getzmqnotifications")
.ok();
// Fetch all four calls concurrently: getblockchaininfo gates freshness, so a
// slow auxiliary call (network/index/zmq) must not delay the snapshot or block
// the next refresh. Only getblockchaininfo failing marks the status stale.
let (blockchain_info, network_info, index_info, zmq_notifications) = tokio::join!(
bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([])),
bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([])),
bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([])),
bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([])),
);
let blockchain_info = blockchain_info.context("getblockchaininfo")?;
Ok(BitcoinNodeStatus {
ok: true,
@ -173,9 +174,9 @@ async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
updated_at_ms: now_ms(),
error: None,
blockchain_info: Some(blockchain_info),
network_info,
index_info,
zmq_notifications,
network_info: network_info.ok(),
index_info: index_info.ok(),
zmq_notifications: zmq_notifications.ok(),
})
}

View File

@ -157,6 +157,7 @@
<script setup lang="ts">
import type { MarketplaceApp } from './types'
import { handleImageError } from '@/views/apps/appsConfig'
defineProps<{
filteredApps: MarketplaceApp[]
@ -181,11 +182,6 @@ defineEmits<{
'install': [app: MarketplaceApp]
'retry-nostr': []
}>()
function handleImageError(event: Event) {
const img = event.target as HTMLImageElement
img.src = '/assets/img/logo-archipelago.svg'
}
</script>
<style scoped>

View File

@ -98,6 +98,7 @@
<script setup lang="ts">
import type { FeaturedApp, MarketplaceApp } from './types'
import { handleImageError } from '@/views/apps/appsConfig'
defineProps<{
featuredApps: FeaturedApp[]
@ -114,9 +115,4 @@ defineEmits<{
'launch': [app: MarketplaceApp]
'install': [app: MarketplaceApp]
}>()
function handleImageError(event: Event) {
const img = event.target as HTMLImageElement
img.src = '/assets/img/logo-archipelago.svg'
}
</script>

View File

@ -39,6 +39,17 @@ detect_environment() {
TOTAL_MEM_MB=$(($(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 16000000) / 1024))
LOW_MEM=false
[ "$TOTAL_MEM_MB" -lt 12000 ] && LOW_MEM=true
# Bitcoin UTXO cache (dbcache) sized to host RAM, NOT a fixed value.
# A large dbcache on a small box pushes total memory (bitcoind + the ~20 app
# containers) past physical RAM and forces system-wide swap thrash: the disk
# saturates, bitcoind can't answer its own RPC, and the dashboard backend's
# sqlite reads stall — surfacing as fleet-wide /rpc/v1 502s and a blank
# Bitcoin UI. The old binary LOW_MEM->2048 toggle still over-committed 8 GB
# nodes. Budget ~1/16 of RAM for the cache, leaving the bulk for the OS +
# containers; floor 300 MB (bitcoind default is 450), cap 4096 MB.
BTC_DBCACHE=$(( TOTAL_MEM_MB / 16 ))
[ "$BTC_DBCACHE" -lt 300 ] && BTC_DBCACHE=300
[ "$BTC_DBCACHE" -gt 4096 ] && BTC_DBCACHE=4096
HOST_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
HOST_IP=${HOST_IP:-127.0.0.1}
# Stable mDNS hostname for URLs that get baked into federation/consensus data.
@ -175,8 +186,6 @@ load_spec_bitcoin-knots() {
SPEC_TIER="1"
SPEC_DATA_DIR="/var/lib/archipelago/bitcoin"
SPEC_DATA_UID="100101:100101"
local btc_dbcache=4096
[ "${LOW_MEM:-false}" = "true" ] && btc_dbcache=2048
local btc_rpc_headroom="-rpcthreads=16 -rpcworkqueue=256"
local btc_txrelay_flags="-rpcwhitelistdefault=0"
if [ -f "$SECRETS_DIR/bitcoin-rpc-txrelay-rpcauth" ]; then
@ -184,9 +193,9 @@ load_spec_bitcoin-knots() {
fi
# Dynamic: prune on small disk
if [ "${DISK_GB:-0}" -lt 1000 ]; then
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${btc_dbcache} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${BTC_DBCACHE} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
else
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${BTC_DBCACHE} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
fi
}