fix(bitcoin,ui): RAM-aware dbcache to stop swap-thrash 502s + snappier status + icon placeholder
Sizes bitcoind -dbcache to host RAM (~1/16, floor 300MB, cap 4096) instead of a fixed 2048/4096. A multi-GB UTXO cache on an 8GB node running the full app stack pushed memory past physical RAM and triggered system-wide swap thrash: the disk saturated, bitcoind could not answer its own RPC, and the dashboard backend's sqlite reads stalled — surfacing as fleet-wide /rpc/v1 502s and a blank Bitcoin UI. Applied in scripts/container-specs.sh (reconciler path) and the config.rs bitcoin-core path. Bitcoin status cache now polls every 5s (was 10/15) with an 8s timeout (was 20s) and fetches the four RPCs concurrently, so the cached snapshot tracks bitcoind's responsive windows during IBD and the UI stops dwelling on "reconnecting...". Unifies the divergent discover AppGrid/FeaturedApps image-error handlers onto the canonical placeholder fallback so missing app icons render the placeholder. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
549c6180a2
commit
cc2e055e09
@ -349,13 +349,37 @@ fn http_probe_cmd(url: &'static str) -> &'static str {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Bitcoin UTXO cache (`-dbcache`) in MB, sized to host RAM.
|
||||||
|
///
|
||||||
|
/// A fixed large dbcache on a small box pushes bitcoind + the ~20 app
|
||||||
|
/// containers past physical RAM and triggers system-wide swap thrash: the
|
||||||
|
/// disk saturates, bitcoind can't answer its own RPC, and the dashboard
|
||||||
|
/// backend's sqlite reads stall — surfacing as /rpc/v1 502s and a blank
|
||||||
|
/// Bitcoin UI. Budget ~1/16 of RAM for the cache (floor 300 MB — bitcoind's
|
||||||
|
/// own default is 450 — cap 4096 MB), mirroring scripts/container-specs.sh.
|
||||||
|
pub(super) fn bitcoin_dbcache_mb() -> u64 {
|
||||||
|
let total_mb = std::fs::read_to_string("/proc/meminfo")
|
||||||
|
.ok()
|
||||||
|
.and_then(|c| {
|
||||||
|
c.lines()
|
||||||
|
.find_map(|l| l.strip_prefix("MemTotal:"))
|
||||||
|
.and_then(|v| v.split_whitespace().next())
|
||||||
|
.and_then(|kb| kb.parse::<u64>().ok())
|
||||||
|
})
|
||||||
|
.map(|kb| kb / 1024)
|
||||||
|
.unwrap_or(16000); // assume a comfortable host if /proc/meminfo is unreadable
|
||||||
|
(total_mb / 16).clamp(300, 4096)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get per-app memory limit.
|
/// Get per-app memory limit.
|
||||||
pub(super) fn get_memory_limit(app_id: &str) -> &'static str {
|
pub(super) fn get_memory_limit(app_id: &str) -> &'static str {
|
||||||
match app_id {
|
match app_id {
|
||||||
// Heavy apps. Bitcoin: dbcache uses ~4GB; the daemon also needs
|
// Heavy apps. Bitcoin: dbcache is now host-RAM-aware (see
|
||||||
// headroom for mempool + connection buffers + script-verifier
|
// bitcoin_dbcache_mb), so the daemon's footprint scales with the box.
|
||||||
// memory + I/O. 4g caused OOM-cascades during IBD. 8g is the
|
// This cgroup cap is an upper bound for mempool + connection buffers +
|
||||||
// floor; ideally this would be host-RAM aware (next pass).
|
// script-verifier memory + I/O; a tight cap (4g) previously caused
|
||||||
|
// OOM-cascades during IBD, so keep 8g as a generous ceiling rather
|
||||||
|
// than a tight limit — swap thrash is prevented at the dbcache layer.
|
||||||
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => "8g",
|
"bitcoin" | "bitcoin-core" | "bitcoin-knots" => "8g",
|
||||||
// ElectrumX indexing spikes above its cache size due Python,
|
// ElectrumX indexing spikes above its cache size due Python,
|
||||||
// RocksDB, socket buffers, and reorg/history work. Keep cache
|
// RocksDB, socket buffers, and reorg/history work. Keep cache
|
||||||
@ -674,9 +698,10 @@ pub(super) async fn get_app_config(
|
|||||||
// RPC is reachable from the bitcoin-ui companion container.
|
// RPC is reachable from the bitcoin-ui companion container.
|
||||||
//
|
//
|
||||||
// Sync-speed flags:
|
// Sync-speed flags:
|
||||||
// -dbcache=4096 — UTXO set cache; 4GB is the sweet spot before
|
// -dbcache — UTXO set cache, sized to host RAM via
|
||||||
// diminishing returns. Container has --memory=8g now so
|
// bitcoin_dbcache_mb() (see there). A fixed 4GB cache swap-
|
||||||
// there's headroom for mempool + connections.
|
// thrashed small nodes into fleet-wide 502s; ~1/16 of RAM
|
||||||
|
// keeps headroom for mempool + connections + the app stack.
|
||||||
// -par=0 — use all available cores for script
|
// -par=0 — use all available cores for script
|
||||||
// verification (defaults to NCPU-1 capped at 16). Was
|
// verification (defaults to NCPU-1 capped at 16). Was
|
||||||
// effectively pinned at 2 by --cpus=2 (now removed).
|
// effectively pinned at 2 by --cpus=2 (now removed).
|
||||||
@ -689,7 +714,7 @@ pub(super) async fn get_app_config(
|
|||||||
"-rpcport=8332".to_string(),
|
"-rpcport=8332".to_string(),
|
||||||
"-printtoconsole=1".to_string(),
|
"-printtoconsole=1".to_string(),
|
||||||
"-datadir=/home/bitcoin/.bitcoin".to_string(),
|
"-datadir=/home/bitcoin/.bitcoin".to_string(),
|
||||||
"-dbcache=4096".to_string(),
|
format!("-dbcache={}", bitcoin_dbcache_mb()),
|
||||||
"-par=0".to_string(),
|
"-par=0".to_string(),
|
||||||
"-maxconnections=125".to_string(),
|
"-maxconnections=125".to_string(),
|
||||||
]),
|
]),
|
||||||
|
|||||||
@ -13,8 +13,14 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
use tracing::{debug, warn};
|
use tracing::{debug, warn};
|
||||||
|
|
||||||
const CACHE_REFRESH_SECS: u64 = 10;
|
// Poll frequently and recover fast so the cached snapshot tracks bitcoind's
|
||||||
const CACHE_ERROR_BACKOFF_SECS: u64 = 15;
|
// responsive windows during IBD. During heavy block-connection, getblockchaininfo
|
||||||
|
// can block briefly; a slow 10s/15s/20s cadence let one missed poll age the
|
||||||
|
// snapshot past the UI's 30s "stale" threshold, so the UI dwelled on
|
||||||
|
// "reconnecting…" long after bitcoind was answering again. Tight cadence + short
|
||||||
|
// timeout keeps last-known state fresh and clears the stale banner promptly.
|
||||||
|
const CACHE_REFRESH_SECS: u64 = 5;
|
||||||
|
const CACHE_ERROR_BACKOFF_SECS: u64 = 5;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct BitcoinNodeStatus {
|
pub struct BitcoinNodeStatus {
|
||||||
@ -147,25 +153,20 @@ pub async fn get_bitcoin_status() -> BitcoinNodeStatus {
|
|||||||
|
|
||||||
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
|
async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
|
||||||
let client = reqwest::Client::builder()
|
let client = reqwest::Client::builder()
|
||||||
.timeout(Duration::from_secs(20))
|
.timeout(Duration::from_secs(8))
|
||||||
.build()
|
.build()
|
||||||
.context("build Bitcoin status HTTP client")?;
|
.context("build Bitcoin status HTTP client")?;
|
||||||
|
|
||||||
let blockchain_info = bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([]))
|
// Fetch all four calls concurrently: getblockchaininfo gates freshness, so a
|
||||||
.await
|
// slow auxiliary call (network/index/zmq) must not delay the snapshot or block
|
||||||
.context("getblockchaininfo")?;
|
// the next refresh. Only getblockchaininfo failing marks the status stale.
|
||||||
let network_info = bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([]))
|
let (blockchain_info, network_info, index_info, zmq_notifications) = tokio::join!(
|
||||||
.await
|
bitcoin_rpc_call(&client, "getblockchaininfo", serde_json::json!([])),
|
||||||
.context("getnetworkinfo")
|
bitcoin_rpc_call(&client, "getnetworkinfo", serde_json::json!([])),
|
||||||
.ok();
|
bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([])),
|
||||||
let index_info = bitcoin_rpc_call(&client, "getindexinfo", serde_json::json!([]))
|
bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([])),
|
||||||
.await
|
);
|
||||||
.context("getindexinfo")
|
let blockchain_info = blockchain_info.context("getblockchaininfo")?;
|
||||||
.ok();
|
|
||||||
let zmq_notifications = bitcoin_rpc_call(&client, "getzmqnotifications", serde_json::json!([]))
|
|
||||||
.await
|
|
||||||
.context("getzmqnotifications")
|
|
||||||
.ok();
|
|
||||||
|
|
||||||
Ok(BitcoinNodeStatus {
|
Ok(BitcoinNodeStatus {
|
||||||
ok: true,
|
ok: true,
|
||||||
@ -173,9 +174,9 @@ async fn fetch_bitcoin_status() -> Result<BitcoinNodeStatus> {
|
|||||||
updated_at_ms: now_ms(),
|
updated_at_ms: now_ms(),
|
||||||
error: None,
|
error: None,
|
||||||
blockchain_info: Some(blockchain_info),
|
blockchain_info: Some(blockchain_info),
|
||||||
network_info,
|
network_info: network_info.ok(),
|
||||||
index_info,
|
index_info: index_info.ok(),
|
||||||
zmq_notifications,
|
zmq_notifications: zmq_notifications.ok(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -157,6 +157,7 @@
|
|||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import type { MarketplaceApp } from './types'
|
import type { MarketplaceApp } from './types'
|
||||||
|
import { handleImageError } from '@/views/apps/appsConfig'
|
||||||
|
|
||||||
defineProps<{
|
defineProps<{
|
||||||
filteredApps: MarketplaceApp[]
|
filteredApps: MarketplaceApp[]
|
||||||
@ -181,11 +182,6 @@ defineEmits<{
|
|||||||
'install': [app: MarketplaceApp]
|
'install': [app: MarketplaceApp]
|
||||||
'retry-nostr': []
|
'retry-nostr': []
|
||||||
}>()
|
}>()
|
||||||
|
|
||||||
function handleImageError(event: Event) {
|
|
||||||
const img = event.target as HTMLImageElement
|
|
||||||
img.src = '/assets/img/logo-archipelago.svg'
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<style scoped>
|
<style scoped>
|
||||||
|
|||||||
@ -98,6 +98,7 @@
|
|||||||
|
|
||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import type { FeaturedApp, MarketplaceApp } from './types'
|
import type { FeaturedApp, MarketplaceApp } from './types'
|
||||||
|
import { handleImageError } from '@/views/apps/appsConfig'
|
||||||
|
|
||||||
defineProps<{
|
defineProps<{
|
||||||
featuredApps: FeaturedApp[]
|
featuredApps: FeaturedApp[]
|
||||||
@ -114,9 +115,4 @@ defineEmits<{
|
|||||||
'launch': [app: MarketplaceApp]
|
'launch': [app: MarketplaceApp]
|
||||||
'install': [app: MarketplaceApp]
|
'install': [app: MarketplaceApp]
|
||||||
}>()
|
}>()
|
||||||
|
|
||||||
function handleImageError(event: Event) {
|
|
||||||
const img = event.target as HTMLImageElement
|
|
||||||
img.src = '/assets/img/logo-archipelago.svg'
|
|
||||||
}
|
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@ -39,6 +39,17 @@ detect_environment() {
|
|||||||
TOTAL_MEM_MB=$(($(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 16000000) / 1024))
|
TOTAL_MEM_MB=$(($(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null || echo 16000000) / 1024))
|
||||||
LOW_MEM=false
|
LOW_MEM=false
|
||||||
[ "$TOTAL_MEM_MB" -lt 12000 ] && LOW_MEM=true
|
[ "$TOTAL_MEM_MB" -lt 12000 ] && LOW_MEM=true
|
||||||
|
# Bitcoin UTXO cache (dbcache) sized to host RAM, NOT a fixed value.
|
||||||
|
# A large dbcache on a small box pushes total memory (bitcoind + the ~20 app
|
||||||
|
# containers) past physical RAM and forces system-wide swap thrash: the disk
|
||||||
|
# saturates, bitcoind can't answer its own RPC, and the dashboard backend's
|
||||||
|
# sqlite reads stall — surfacing as fleet-wide /rpc/v1 502s and a blank
|
||||||
|
# Bitcoin UI. The old binary LOW_MEM->2048 toggle still over-committed 8 GB
|
||||||
|
# nodes. Budget ~1/16 of RAM for the cache, leaving the bulk for the OS +
|
||||||
|
# containers; floor 300 MB (bitcoind default is 450), cap 4096 MB.
|
||||||
|
BTC_DBCACHE=$(( TOTAL_MEM_MB / 16 ))
|
||||||
|
[ "$BTC_DBCACHE" -lt 300 ] && BTC_DBCACHE=300
|
||||||
|
[ "$BTC_DBCACHE" -gt 4096 ] && BTC_DBCACHE=4096
|
||||||
HOST_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
|
HOST_IP=$(hostname -I 2>/dev/null | awk '{print $1}')
|
||||||
HOST_IP=${HOST_IP:-127.0.0.1}
|
HOST_IP=${HOST_IP:-127.0.0.1}
|
||||||
# Stable mDNS hostname for URLs that get baked into federation/consensus data.
|
# Stable mDNS hostname for URLs that get baked into federation/consensus data.
|
||||||
@ -175,8 +186,6 @@ load_spec_bitcoin-knots() {
|
|||||||
SPEC_TIER="1"
|
SPEC_TIER="1"
|
||||||
SPEC_DATA_DIR="/var/lib/archipelago/bitcoin"
|
SPEC_DATA_DIR="/var/lib/archipelago/bitcoin"
|
||||||
SPEC_DATA_UID="100101:100101"
|
SPEC_DATA_UID="100101:100101"
|
||||||
local btc_dbcache=4096
|
|
||||||
[ "${LOW_MEM:-false}" = "true" ] && btc_dbcache=2048
|
|
||||||
local btc_rpc_headroom="-rpcthreads=16 -rpcworkqueue=256"
|
local btc_rpc_headroom="-rpcthreads=16 -rpcworkqueue=256"
|
||||||
local btc_txrelay_flags="-rpcwhitelistdefault=0"
|
local btc_txrelay_flags="-rpcwhitelistdefault=0"
|
||||||
if [ -f "$SECRETS_DIR/bitcoin-rpc-txrelay-rpcauth" ]; then
|
if [ -f "$SECRETS_DIR/bitcoin-rpc-txrelay-rpcauth" ]; then
|
||||||
@ -184,9 +193,9 @@ load_spec_bitcoin-knots() {
|
|||||||
fi
|
fi
|
||||||
# Dynamic: prune on small disk
|
# Dynamic: prune on small disk
|
||||||
if [ "${DISK_GB:-0}" -lt 1000 ]; then
|
if [ "${DISK_GB:-0}" -lt 1000 ]; then
|
||||||
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${btc_dbcache} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
|
SPEC_CUSTOM_ARGS="-server=1 -prune=550 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${BTC_DBCACHE} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
|
||||||
else
|
else
|
||||||
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=4096 -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
|
SPEC_CUSTOM_ARGS="-server=1 -txindex=1 -rpcallowip=0.0.0.0/0 -rpcbind=0.0.0.0:8332 -listen=1 -bind=0.0.0.0:8333 -dbcache=${BTC_DBCACHE} -par=0 -maxconnections=125 ${btc_rpc_headroom} ${btc_txrelay_flags}"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user