From c0751e2551f22e2d1e3b554b95c95fa266905d3b Mon Sep 17 00:00:00 2001 From: archipelago Date: Wed, 6 May 2026 09:23:57 -0400 Subject: [PATCH] chore(release): stage v1.7.54-alpha --- CHANGELOG.md | 8 + apps/grafana/manifest.yml | 1 + core/Cargo.lock | 2 +- core/archipelago/Cargo.toml | 2 +- .../archipelago/src/api/rpc/package/config.rs | 20 +- .../src/api/rpc/package/runtime.rs | 39 +- .../archipelago/src/api/rpc/package/stacks.rs | 2 +- core/archipelago/src/bootstrap.rs | 105 +- core/archipelago/src/container/companion.rs | 11 +- .../src/container/prod_orchestrator.rs | 236 +++- core/archipelago/src/container/quadlet.rs | 35 + docs/CHAT_TRANSCRIPT_2026-05-02.md | 317 +++++ docs/CONTAINER_LIFECYCLE_HANDOFF.md | 1033 +++++++++++++++++ .../_archived/build-auto-installer-iso.sh | 26 +- image-recipe/configs/nginx-archipelago.conf | 20 + neode-ui/package-lock.json | 4 +- neode-ui/package.json | 2 +- neode-ui/src/stores/app.ts | 3 +- neode-ui/src/stores/sync.ts | 7 + neode-ui/src/views/AppDetails.vue | 3 +- neode-ui/src/views/Apps.vue | 25 +- .../src/views/appSession/AppSessionFrame.vue | 2 +- neode-ui/src/views/apps/appsConfig.ts | 3 +- release-manifest.json | 34 +- scripts/container-specs.sh | 2 +- scripts/create-release-manifest.sh | 9 +- scripts/deploy-tailscale.sh | 2 +- scripts/deploy-to-target.sh | 4 +- scripts/first-boot-containers.sh | 14 +- scripts/tor/README.md | 2 +- 30 files changed, 1871 insertions(+), 102 deletions(-) create mode 100644 docs/CHAT_TRANSCRIPT_2026-05-02.md create mode 100644 docs/CONTAINER_LIFECYCLE_HANDOFF.md mode change 100644 => 100755 scripts/first-boot-containers.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index b2e7bec3..22e37451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## v1.7.54-alpha (2026-05-06) + +- Existing installs now self-repair nginx backend proxy locations for `/bitcoin-status` and `/api/app-catalog`, including hosts where `sites-enabled/archipelago` is a copied active file instead of a symlink. +- LND UI is consistently served on `18083` across first boot, Tor config, companion Quadlet reconciliation, OTA runtime payloads, and ISO scripts; stale companion units/images are rewritten instead of only checking service active state. +- OTA frontend tarballs now carry a clean runtime payload with updated scripts, docker UI sources, and canonical nginx config, preventing startup promotion from reintroducing stale host assets. +- Release ISO builds now support the primary HTTP app registry when bundling core images, so unbundled media includes File Browser/Cloud support instead of requiring a post-install Marketplace download. +- `.116` was live-updated with the new backend and runtime scripts; focused non-destructive lifecycle audit passes for Bitcoin Knots, LND, BTCPay, Mempool, and Grafana. + ## v1.7.53-alpha (2026-05-05) - Bitcoin Knots/Core config generation no longer duplicates RPC bind and port settings between `bitcoin.conf` and container command args, fixing `Unable to bind all endpoints for RPC server` startup failures. diff --git a/apps/grafana/manifest.yml b/apps/grafana/manifest.yml index 1900c1f0..85fe534a 100644 --- a/apps/grafana/manifest.yml +++ b/apps/grafana/manifest.yml @@ -8,6 +8,7 @@ app: image: grafana/grafana:10.2.0 image_signature: cosign://... pull_policy: if-not-present + data_uid: "472:472" dependencies: - storage: 5Gi diff --git a/core/Cargo.lock b/core/Cargo.lock index 44c2c4a6..e5242739 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -80,7 +80,7 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "archipelago" -version = "1.7.53-alpha" +version = "1.7.54-alpha" dependencies = [ "anyhow", "archipelago-container", diff --git a/core/archipelago/Cargo.toml b/core/archipelago/Cargo.toml index 5972d920..be53ccfd 100644 --- a/core/archipelago/Cargo.toml +++ b/core/archipelago/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "archipelago" -version = "1.7.53-alpha" +version = "1.7.54-alpha" edition = "2021" description = "Archipelago Bitcoin Node OS - Native backend" authors = ["Archipelago Team"] diff --git a/core/archipelago/src/api/rpc/package/config.rs b/core/archipelago/src/api/rpc/package/config.rs index 286dd676..842c75a6 100644 --- a/core/archipelago/src/api/rpc/package/config.rs +++ b/core/archipelago/src/api/rpc/package/config.rs @@ -192,7 +192,7 @@ pub(super) fn get_health_check_args(app_id: &str, _rpc_pass: &str) -> Vec ( - "curl -sf http://localhost:3000/api/health || exit 1", + "test -w /var/lib/grafana && test -w /var/lib/grafana/grafana.db && curl -sf http://localhost:3000/api/health || exit 1", "30s", "3", ), @@ -292,7 +292,8 @@ pub(super) fn get_memory_limit(app_id: &str) -> &'static str { "nginx-proxy-manager" => "256m", // Databases "archy-btcpay-db" | "archy-mempool-db" | "mysql-mempool" => "512m", - "immich_postgres" | "penpot-postgres" => "256m", + "immich_postgres" => "2g", + "penpot-postgres" => "256m", "immich_redis" | "penpot-valkey" => "128m", // Default _ => "512m", @@ -428,7 +429,7 @@ pub(super) async fn get_containers_for_app(package_id: &str) -> Result Result<()> { for name in to_start { ensure_runtime_host_port_listener(name).await?; } + if to_start.iter().any(|name| name == "indeedhub") { + super::install::patch_indeedhub_nostr_provider().await; + } Ok(()) } @@ -826,6 +829,9 @@ async fn do_package_restart(containers: &[String]) -> Result<()> { } ensure_runtime_host_port_listener(name).await?; } + if containers.iter().any(|name| name == "indeedhub") { + super::install::patch_indeedhub_nostr_provider().await; + } if !errors.is_empty() { return Err(anyhow::anyhow!("Restart failed: {}", errors.join("; "))); } @@ -842,7 +848,10 @@ async fn repair_before_package_start(container_name: &str) { "btcpay-server" | "archy-nbxplorer" => repair_btcpay_dirs().await, "indeedhub-postgres" | "indeedhub-redis" | "indeedhub-minio" | "indeedhub-relay" | "indeedhub-api" | "indeedhub-ffmpeg" | "indeedhub" => repair_indeedhub_network().await, - "grafana" => cleanup_stale_pasta_port("3000").await, + "grafana" => { + repair_grafana_dirs().await; + cleanup_stale_pasta_port("3000").await; + } "gitea" => cleanup_gitea_stale_ports().await, _ => {} } @@ -943,6 +952,34 @@ async fn repair_btcpay_dirs() { repair_btcpay_database_password().await; } +async fn repair_grafana_dirs() { + let _ = tokio::process::Command::new("sudo") + .args(["mkdir", "-p", "/var/lib/archipelago/grafana"]) + .output() + .await; + let podman_chown = tokio::process::Command::new("podman") + .args([ + "unshare", + "chown", + "-R", + "472:472", + "/var/lib/archipelago/grafana", + ]) + .output() + .await; + if !podman_chown.as_ref().is_ok_and(|o| o.status.success()) { + let _ = tokio::process::Command::new("sudo") + .args([ + "chown", + "-R", + "100471:100471", + "/var/lib/archipelago/grafana", + ]) + .output() + .await; + } +} + async fn repair_btcpay_database_password() { let Ok(db_pass) = tokio::fs::read_to_string("/var/lib/archipelago/secrets/btcpay-db-password").await diff --git a/core/archipelago/src/api/rpc/package/stacks.rs b/core/archipelago/src/api/rpc/package/stacks.rs index 2b5a6748..dae05f81 100644 --- a/core/archipelago/src/api/rpc/package/stacks.rs +++ b/core/archipelago/src/api/rpc/package/stacks.rs @@ -450,7 +450,7 @@ impl RpcHandler { "--cap-add=SETGID", "--cap-add=SETUID", "--security-opt=no-new-privileges:true", - "--memory=512m", + "--memory=2g", "--pids-limit=4096", "--health-cmd=pg_isready -U postgres || exit 1", "--health-interval=30s", diff --git a/core/archipelago/src/bootstrap.rs b/core/archipelago/src/bootstrap.rs index bf62dd16..399998b0 100644 --- a/core/archipelago/src/bootstrap.rs +++ b/core/archipelago/src/bootstrap.rs @@ -8,8 +8,8 @@ //! //! Two things are synced on startup: //! 1. Doctor artifacts (container-doctor.sh + service + timer). -//! 2. An nginx `location /api/app-catalog` proxy block — required for -//! the App Store catalog proxy to actually reach the backend. +//! 2. Missing nginx backend proxy blocks required for frontend fetches to +//! reach the backend instead of the SPA fallback. //! //! Idempotent: no-ops on boxes that are already in sync. All work is //! best-effort — failures are logged but never abort the backend. @@ -31,6 +31,7 @@ const DOCTOR_SERVICE_PATH: &str = "/etc/systemd/system/archipelago-doctor.servic const DOCTOR_TIMER_PATH: &str = "/etc/systemd/system/archipelago-doctor.timer"; const NGINX_CONF_PATH: &str = "/etc/nginx/sites-available/archipelago"; +const NGINX_ENABLED_CONF_PATH: &str = "/etc/nginx/sites-enabled/archipelago"; const RUNTIME_ASSETS_DIR: &str = "/opt/archipelago/web-ui/archipelago-runtime"; /// Inserted into every server block of the nginx config that lacks the @@ -38,6 +39,8 @@ const RUNTIME_ASSETS_DIR: &str = "/opt/archipelago/web-ui/archipelago-runtime"; /// image-recipe/configs/nginx-archipelago.conf. const NGINX_APP_CATALOG_BLOCK: &str = "\n # App Store catalog proxy — backend fetches from configured registries\n # so the browser doesn't hit CORS/CSP. Without this block nginx falls\n # through to the SPA index.html and the frontend gets HTML back instead\n # of JSON.\n location /api/app-catalog {\n proxy_pass http://127.0.0.1:5678;\n proxy_http_version 1.1;\n proxy_set_header Host $host;\n proxy_set_header X-Real-IP $remote_addr;\n proxy_set_header Cookie $http_cookie;\n proxy_connect_timeout 15s;\n proxy_read_timeout 30s;\n proxy_send_timeout 15s;\n error_page 502 503 = @backend_unavailable;\n error_page 504 = @backend_timeout;\n }\n\n"; +const NGINX_BITCOIN_STATUS_BLOCK: &str = "\n location /bitcoin-status {\n proxy_pass http://127.0.0.1:5678/bitcoin-status;\n proxy_http_version 1.1;\n proxy_set_header Host $host;\n proxy_connect_timeout 10s;\n proxy_read_timeout 10s;\n proxy_send_timeout 5s;\n error_page 502 503 = @backend_unavailable;\n error_page 504 = @backend_timeout;\n }\n"; + /// Entry point called from main startup. Never returns an error to the caller — /// failing to bootstrap host artifacts must not prevent the backend from serving. pub async fn ensure_doctor_installed() { @@ -57,8 +60,8 @@ pub async fn ensure_doctor_installed() { Err(e) => warn!("Doctor bootstrap failed (non-fatal): {:#}", e), } match run_nginx().await { - Ok(true) => info!("Patched nginx config to proxy /api/app-catalog"), - Ok(false) => debug!("Nginx already has /api/app-catalog block"), + Ok(true) => info!("Patched nginx config to proxy missing backend endpoints"), + Ok(false) => debug!("Nginx backend endpoint proxy blocks already present"), Err(e) => warn!("Nginx bootstrap failed (non-fatal): {:#}", e), } match run_bitcoin_rpc_repair().await { @@ -444,13 +447,10 @@ async fn write_root_if_needed(path: &str, content: &str) -> Result { Ok(true) } -/// Patch the nginx site config to add a `/api/app-catalog` proxy block if -/// it's missing. The original ISO shipped individual per-endpoint `location` -/// blocks and no catch-all `/api/`, so `/api/app-catalog` silently fell -/// through to the SPA `index.html` and the frontend got HTML instead of -/// JSON. We anchor the insert to the DWN comment that already sits right -/// after the `/api/blob` block, so the new block lands in both the HTTP -/// and HTTPS server blocks. +/// Patch the nginx site config to add missing backend proxy blocks. Older ISO +/// configs shipped individual per-endpoint `location` blocks, so missing +/// endpoints silently fell through to the SPA `index.html` and the frontend +/// got HTML instead of JSON. /// /// Validates via `nginx -t` before reloading. On failure the patch is /// rolled back from a backup written just before the write. @@ -465,51 +465,90 @@ async fn run_nginx() -> Result { return Ok(false); } - if !Path::new(NGINX_CONF_PATH).exists() { - debug!("{} missing — skipping nginx bootstrap", NGINX_CONF_PATH); - return Ok(false); + let mut changed = false; + let mut patched_paths = Vec::::new(); + for path in [NGINX_CONF_PATH, NGINX_ENABLED_CONF_PATH] { + let candidate = Path::new(path); + if !candidate.exists() { + debug!("{} missing — skipping nginx bootstrap", path); + continue; + } + let canonical = fs::canonicalize(candidate) + .await + .unwrap_or_else(|_| candidate.to_path_buf()); + if patched_paths.iter().any(|p| p == &canonical) { + continue; + } + patched_paths.push(canonical); + changed |= patch_nginx_conf(path).await?; } + Ok(changed) +} - let content = fs::read_to_string(NGINX_CONF_PATH) +async fn patch_nginx_conf(path: &str) -> Result { + let content = fs::read_to_string(path) .await - .with_context(|| format!("read {}", NGINX_CONF_PATH))?; - if content.contains("location /api/app-catalog") { + .with_context(|| format!("read {}", path))?; + let missing_app_catalog = !content.contains("location /api/app-catalog"); + let missing_bitcoin_status = !content.contains("location /bitcoin-status"); + if !missing_app_catalog && !missing_bitcoin_status { return Ok(false); } - // The DWN comment sits at the same indent right after the `/api/blob` - // block in both server blocks — a stable anchor that existed on every - // ISO shipped to date. If it's absent (config got heavily customized), - // we bail rather than guess where to splice. - let anchor = " # DWN endpoints — peer access over Tor (no auth)"; - if !content.contains(anchor) { - warn!("nginx conf missing DWN anchor — skipping /api/app-catalog patch"); - return Ok(false); + let mut patched = content.clone(); + + if missing_bitcoin_status { + let anchor = " location /electrs-status {"; + if !patched.contains(anchor) { + warn!("nginx conf missing electrs-status anchor — skipping /bitcoin-status patch"); + } else { + let replacement = format!("{}{}", NGINX_BITCOIN_STATUS_BLOCK, anchor); + patched = patched.replace(anchor, &replacement); + } } - let replacement = format!("{}{}", NGINX_APP_CATALOG_BLOCK, anchor); - let patched = content.replace(anchor, &replacement); + if missing_app_catalog { + // The DWN comment sits at the same indent right after the `/api/blob` + // block in both server blocks — a stable anchor that existed on every + // ISO shipped to date. If it's absent (config got heavily customized), + // skip rather than guess where to splice. + let anchor = " # DWN endpoints — peer access over Tor (no auth)"; + if !patched.contains(anchor) { + warn!("nginx conf missing DWN anchor — skipping /api/app-catalog patch"); + } else { + let replacement = format!("{}{}", NGINX_APP_CATALOG_BLOCK, anchor); + patched = patched.replace(anchor, &replacement); + } + } + + if patched == content { + return Ok(false); + } // Write patched config via a user-owned tmp + sudo mv, after stashing - // a backup so we can revert if `nginx -t` hates what we produced. + // a backup outside nginx include dirs so validation cannot load it too. let pid = std::process::id(); let tmp = format!("/tmp/archipelago-nginx-{}.conf", pid); fs::write(&tmp, &patched) .await .with_context(|| format!("write {}", tmp))?; - let backup = format!("/tmp/archipelago-nginx-backup-{}.conf", pid); - if let Err(e) = host_sudo(&["cp", NGINX_CONF_PATH, &backup]).await { + let backup = format!( + "/tmp/archipelago-nginx-backup-{}-{}.conf", + pid, + patched.len() + ); + if let Err(e) = host_sudo(&["cp", path, &backup]).await { let _ = fs::remove_file(&tmp).await; return Err(e.context("backup nginx conf")); } - let mv = host_sudo(&["mv", &tmp, NGINX_CONF_PATH]).await; + let mv = host_sudo(&["mv", &tmp, path]).await; match mv { Ok(s) if s.success() => {} Ok(s) => { let _ = fs::remove_file(&tmp).await; - anyhow::bail!("sudo mv nginx conf exited with {}", s); + anyhow::bail!("sudo mv nginx conf to {} exited with {}", path, s); } Err(e) => { let _ = fs::remove_file(&tmp).await; @@ -522,7 +561,7 @@ async fn run_nginx() -> Result { let valid = matches!(&test, Ok(s) if s.success()); if !valid { warn!("nginx -t failed after patch — reverting"); - let _ = host_sudo(&["mv", &backup, NGINX_CONF_PATH]).await; + let _ = host_sudo(&["mv", &backup, path]).await; if let Err(e) = test { return Err(e.context("nginx -t")); } diff --git a/core/archipelago/src/container/companion.rs b/core/archipelago/src/container/companion.rs index a0bb0ccd..5cca19e0 100644 --- a/core/archipelago/src/container/companion.rs +++ b/core/archipelago/src/container/companion.rs @@ -186,6 +186,7 @@ pub async fn install_one(spec: &CompanionSpec) -> Result<()> { /// URL for pull). async fn ensure_image_present(spec: &CompanionSpec) -> Result { let local_image = format!("localhost/{}:latest", spec.image_base); + let local_image_compat = format!("localhost/{}:local", spec.image_base); let registry_image = format!("{}/{}:latest", COMPANION_REGISTRY, spec.image_base); // Prefer local build — companions can carry build-time customizations @@ -193,6 +194,9 @@ async fn ensure_image_present(spec: &CompanionSpec) -> Result { for dir in spec.build_dir_candidates { let dockerfile = PathBuf::from(dir).join("Dockerfile"); if fs::try_exists(&dockerfile).await.unwrap_or(false) { + if image_exists(&local_image_compat).await { + return Ok(local_image_compat); + } if image_exists(&local_image).await { return Ok(local_image); } @@ -335,13 +339,18 @@ pub async fn reconcile(installed_apps: &[String]) -> Vec<(String, anyhow::Error) } /// Does this companion need install_one to be re-run? Returns true if -/// the unit file is missing OR the service is not active. +/// the unit file is missing, stale, or the service is not active. async fn needs_repair(spec: &CompanionSpec) -> Result { let dir = quadlet::unit_dir().await?; let unit_path = dir.join(format!("{}.container", spec.name)); if !fs::try_exists(&unit_path).await.unwrap_or(false) { return Ok(true); } + let expected_image = ensure_image_present(spec).await?; + let expected_unit = build_unit(spec, &expected_image); + if expected_unit.render() != fs::read_to_string(&unit_path).await.unwrap_or_default() { + return Ok(true); + } let svc = format!("{}.service", spec.name); Ok(!quadlet::is_active(&svc).await) } diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index a766a0f0..4d03c296 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -113,6 +113,118 @@ async fn chown_for_rootless_container(uid_gid: &str, path: &str) -> Result<()> { )) } +async fn wait_for_host_port(port: u16, timeout_secs: u64) -> bool { + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(timeout_secs); + loop { + if tokio::net::TcpStream::connect(("127.0.0.1", port)) + .await + .is_ok() + { + return true; + } + if std::time::Instant::now() >= deadline { + return false; + } + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } +} + +async fn patch_indeedhub_nostr_provider() { + let _ = tokio::process::Command::new("podman") + .args([ + "exec", + "indeedhub", + "sed", + "-i", + "/X-Frame-Options/d", + "/etc/nginx/conf.d/default.conf", + ]) + .output() + .await; + + let provider_src = "/opt/archipelago/web-ui/nostr-provider.js"; + if tokio::fs::metadata(provider_src).await.is_ok() { + let _ = tokio::process::Command::new("podman") + .args([ + "cp", + provider_src, + "indeedhub:/usr/share/nginx/html/nostr-provider.js", + ]) + .output() + .await; + } + + let check = tokio::process::Command::new("podman") + .args([ + "exec", + "indeedhub", + "grep", + "-q", + "nostr-provider", + "/etc/nginx/conf.d/default.conf", + ]) + .output() + .await; + let already_patched = check.map(|o| o.status.success()).unwrap_or(false); + + if !already_patched { + let cat_out = tokio::process::Command::new("podman") + .args(["exec", "indeedhub", "cat", "/etc/nginx/conf.d/default.conf"]) + .output() + .await; + if let Ok(out) = cat_out { + if out.status.success() { + let conf = String::from_utf8_lossy(&out.stdout).to_string(); + let conf = conf.replace( + "location = /sw.js {", + "location = /nostr-provider.js {\n\ + add_header Cache-Control \"no-cache, no-store, must-revalidate\";\n\ + expires off;\n\ + }\n\n\ + location = /sw.js {", + ); + let conf = if conf.contains("try_files") && !conf.contains("sub_filter") { + conf.replacen( + "try_files $uri $uri/ /index.html;", + "try_files $uri $uri/ /index.html;\n\ + sub_filter_once on;\n\ + sub_filter '' '';", + 1, + ) + } else { + conf + }; + + let tmp_path = "/tmp/indeedhub-nginx-patch.conf"; + if tokio::fs::write(tmp_path, &conf).await.is_ok() { + let _ = tokio::process::Command::new("podman") + .args(["cp", tmp_path, "indeedhub:/etc/nginx/conf.d/default.conf"]) + .output() + .await; + let _ = tokio::fs::remove_file(tmp_path).await; + } + } + } + } + + let _ = tokio::process::Command::new("podman") + .args([ + "exec", + "indeedhub", + "sed", + "-i", + "s|proxy_set_header X-Forwarded-Prefix /api;|proxy_set_header X-Forwarded-Prefix $http_x_forwarded_prefix/api;|", + "/etc/nginx/conf.d/default.conf", + ]) + .output() + .await; + + let _ = tokio::process::Command::new("podman") + .args(["exec", "indeedhub", "nginx", "-s", "reload"]) + .output() + .await; +} + /// Outcome of `reconcile_all` for a single app. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ReconcileAction { @@ -501,9 +613,10 @@ impl ProdContainerOrchestrator { let app_id = lm.manifest.app.id.clone(); if app_id == "indeedhub" { // IndeedHub is a multi-container stack installed by the package - // stack path. Reconciling its single manifest races stack installs - // and can recreate a broken frontend container with the same name. - return Ok(ReconcileAction::Left("stack-managed".to_string())); + // stack path. Boot reconcile must not fresh-install the catalog + // manifest, but it does need to start/repair an already-installed + // stack and reapply the frontend's Nostr provider patch after boot. + return self.reconcile_indeedhub_stack(mode).await; } let lock = self.app_lock(&app_id).await; let _guard = lock.lock().await; @@ -720,10 +833,24 @@ impl ProdContainerOrchestrator { async fn run_post_data_uid_hooks(&self, app_id: &str) -> Result<()> { match app_id { "fedimint" | "fedimint-gateway" => self.ensure_fedimint_dirs().await, + "grafana" => self.ensure_grafana_dirs().await, _ => Ok(()), } } + async fn ensure_grafana_dirs(&self) -> Result<()> { + let dir = "/var/lib/archipelago/grafana"; + let mkdir = host_sudo(&["mkdir", "-p", dir]) + .await + .context("mkdir grafana data dir")?; + if !mkdir.success() { + return Err(anyhow::anyhow!("mkdir -p {dir} failed with status {mkdir}")); + } + chown_for_rootless_container("472:472", dir) + .await + .context("chown grafana data dir for rootless uid 472") + } + /// Phase 3.3 in-place migration. When `use_quadlet_backends` flips /// from off → on, existing nodes have backend containers parented /// under archipelago.service's cgroup (the bad shape). They need to @@ -1138,6 +1265,59 @@ impl ProdContainerOrchestrator { Ok(()) } + async fn reconcile_indeedhub_stack(&self, mode: ReconcileMode) -> Result { + let frontend_status = match self.runtime.get_container_status("indeedhub").await { + Ok(status) => status, + Err(_) => { + if mode == ReconcileMode::ExistingOnly { + return Ok(ReconcileAction::Left("absent".to_string())); + } + // Fresh stack creation is owned by package::stacks so we do not + // create a single broken frontend container from the manifest. + return Ok(ReconcileAction::Left("stack-managed".to_string())); + } + }; + + self.start_indeedhub_backends().await?; + + let mut started = false; + match frontend_status.state { + ContainerState::Running => {} + ContainerState::Stopped | ContainerState::Exited | ContainerState::Created => { + self.runtime + .start_container("indeedhub") + .await + .context("start IndeedHub frontend during reconcile")?; + started = true; + } + ContainerState::Paused => return Ok(ReconcileAction::Left("paused".to_string())), + ContainerState::Unknown(s) => return Ok(ReconcileAction::Left(s)), + } + + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + self.repair_indeedhub_network_aliases().await; + patch_indeedhub_nostr_provider().await; + + if !wait_for_host_port(7778, 10).await { + tracing::warn!( + "IndeedHub frontend running but host port 7778 is not listening; restarting" + ); + let _ = self.runtime.stop_container("indeedhub").await; + self.runtime + .start_container("indeedhub") + .await + .context("restart IndeedHub frontend after missing host port")?; + tokio::time::sleep(std::time::Duration::from_secs(5)).await; + patch_indeedhub_nostr_provider().await; + } + + if started { + Ok(ReconcileAction::Started) + } else { + Ok(ReconcileAction::NoOp) + } + } + async fn repair_indeedhub_network_aliases(&self) { for (container, alias) in [ ("indeedhub-postgres", "postgres"), @@ -1302,6 +1482,10 @@ impl ProdContainerOrchestrator { return false; } + if self.container_command_drifted(name, manifest).await { + return true; + } + let inspect = tokio::process::Command::new("podman") .args([ "inspect", @@ -1334,6 +1518,52 @@ impl ProdContainerOrchestrator { }) } + async fn container_command_drifted(&self, name: &str, manifest: &AppManifest) -> bool { + if manifest.app.container.entrypoint.is_none() + && manifest.app.container.custom_args.is_empty() + { + return false; + } + + let inspect = tokio::process::Command::new("podman") + .args([ + "inspect", + name, + "--format", + "entry={{json .Config.Entrypoint}}\ncmd={{json .Config.Cmd}}", + ]) + .output() + .await; + let Ok(output) = inspect else { + return false; + }; + if !output.status.success() { + return false; + } + + let text = String::from_utf8_lossy(&output.stdout); + let current_entry = text + .lines() + .find_map(|line| line.strip_prefix("entry=")) + .and_then(|json| serde_json::from_str::>>(json).ok()) + .flatten() + .unwrap_or_default(); + let current_cmd = text + .lines() + .find_map(|line| line.strip_prefix("cmd=")) + .and_then(|json| serde_json::from_str::>>(json).ok()) + .flatten() + .unwrap_or_default(); + + let expected_entry = manifest + .app + .container + .entrypoint + .clone() + .unwrap_or_default(); + current_entry != expected_entry || current_cmd != manifest.app.container.custom_args + } + async fn apply_data_uid(&self, manifest: &AppManifest) -> Result<()> { let Some(uid_gid) = manifest.app.container.data_uid.as_ref() else { return Ok(()); diff --git a/core/archipelago/src/container/quadlet.rs b/core/archipelago/src/container/quadlet.rs index ce805e41..b99086c1 100644 --- a/core/archipelago/src/container/quadlet.rs +++ b/core/archipelago/src/container/quadlet.rs @@ -829,6 +829,41 @@ app: assert_eq!(u.restart_policy, RestartPolicy::OnFailure); } + #[test] + fn from_manifest_preserves_grafana_data_uid_and_volume_shape() { + let yaml = r#" +app: + id: grafana + name: Grafana + version: 10.2.0 + container: + image: grafana/grafana:10.2.0 + data_uid: "472:472" + volumes: + - type: bind + source: /var/lib/archipelago/grafana + target: /var/lib/grafana + options: [rw] + resources: + memory_limit: 1g +"#; + let m = AppManifest::parse(yaml).unwrap(); + assert_eq!(m.app.container.data_uid.as_deref(), Some("472:472")); + + let u = QuadletUnit::from_manifest(&m, "grafana"); + assert_eq!(u.memory_mb, Some(1024)); + assert_eq!(u.bind_mounts.len(), 1); + assert_eq!( + u.bind_mounts[0].host, + PathBuf::from("/var/lib/archipelago/grafana") + ); + assert_eq!( + u.bind_mounts[0].container, + PathBuf::from("/var/lib/grafana") + ); + assert!(!u.bind_mounts[0].read_only); + } + #[test] fn from_manifest_marks_ro_volumes_read_only() { let yaml = r#" diff --git a/docs/CHAT_TRANSCRIPT_2026-05-02.md b/docs/CHAT_TRANSCRIPT_2026-05-02.md new file mode 100644 index 00000000..71a926a2 --- /dev/null +++ b/docs/CHAT_TRANSCRIPT_2026-05-02.md @@ -0,0 +1,317 @@ +# Chat Transcript And Working Notes + +Date: 2026-05-02 + +This file captures the current chat context, decisions, progress, and next steps so work can continue from another device/session. + +## User Request + +The user asked to continue hardening Archipelago app/container lifecycle, then asked multiple times to save the plan/progress/next steps and finally to save the entire chat to Markdown. + +Key user constraints and corrections: + +- Continue if next steps are clear; ask only if blocked. +- Exhaustively harden app/container lifecycle before release. +- Preserve data during destructive lifecycle testing unless explicitly instructed otherwise. +- Do not rely on `/app/...` proxy paths for app launch/testing. The user corrected: “we never use paths only ports.” +- LND/Electrum wallet-connect tests must validate real connection details and QR, including Tor. + +## Earlier Progress Summary + +Before the latest work, the project already had substantial lifecycle hardening in progress: + +- Remote lifecycle harness exists at `tests/lifecycle/remote-lifecycle.sh`. +- `.198` SSH works with `/home/archipelago/.ssh/id_ed25519`. +- `.228` RPC works, but SSH is blocked with `Permission denied (publickey,password)`. +- Multiple backend release binaries were built and deployed to `.198` with backups in `/usr/local/bin/archipelago.bak-*`. +- Fixed stale package scanner state recovery from `Removing -> Running` when a container is actually live. +- Fixed startup ordering so crash recovery runs before BootReconciler. +- Removed dangerous automatic Podman runtime directory deletion on `podman info` failure. +- Narrowed generic crash recovery to safe legacy containers. +- Fixed companion reconciliation on install/start/restart. +- Fixed uninstall/reinstall behavior so uninstall disables manifest apps instead of deleting manifest availability, and reinstall re-enables them. +- Fixed LND config generation/repair: + - `bitcoin.active=true` + - `bitcoin.mainnet=true` + - `bitcoin.node=bitcoind` + - `bitcoind.rpchost=bitcoin-knots:8332` + - sudo fallback for writing container-owned config paths. +- `.198` had previously passed focused lifecycle for `filebrowser`, `bitcoin-knots`, and a looser LND launch test. + +## Major Files Touched In This Session + +- `docs/CONTAINER_LIFECYCLE_HANDOFF.md` +- `docs/CHAT_TRANSCRIPT_2026-05-02.md` +- `tests/lifecycle/remote-lifecycle.sh` +- `core/archipelago/src/container/lnd.rs` +- `core/archipelago/src/container/companion.rs` +- `core/archipelago/src/container/prod_orchestrator.rs` +- `core/archipelago/src/container/docker_packages.rs` +- `core/container/src/podman_client.rs` +- `core/archipelago/src/port_allocator.rs` +- `apps/lnd-ui/manifest.yml` +- `neode-ui/src/views/appSession/appSessionConfig.ts` +- `neode-ui/src/stores/container.ts` +- `neode-ui/src/stores/appLauncher.ts` +- `neode-ui/src/views/appDetails/appDetailsData.ts` +- nginx config/snippet files under `scripts/` and `image-recipe/` + +## LND Wallet Bootstrap Investigation + +Initial strict LND probe failed because `/lnd-connect-info` could not read `admin.macaroon`: + +```text +Failed to read LND admin macaroon — is LND installed? +direct: Permission denied (os error 13) +sudo: cat: /var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon: No such file or directory +``` + +LND logs showed the wallet was uninitialized/locked: + +```text +Waiting for wallet encryption password. Use lncli create... +``` + +Tests showed `lncli create` is interactive and does not support `--stdin`: + +```text +[lncli] flag provided but not defined: -stdin +``` + +`lncli unlock --stdin` is supported, so the final approach was: + +- Use LND REST unlocker endpoints for new wallet creation. +- Use `lncli unlock --stdin` only for an existing wallet. +- Treat “wallet already exists” from REST as a signal to unlock. +- Use sudo-aware checks/reads for wallet artifacts because LND data directories are container-owned and `0700`. + +Implemented in `core/archipelago/src/container/lnd.rs`: + +- `ensure_wallet_initialized()` +- `file_exists_as_root()` +- `read_file_as_root()` +- `init_wallet_via_rest()` +- `get_lnd_unlocker_json()` +- `post_lnd_unlocker_json()` +- `unlock_existing_wallet()` +- `wait_for_admin_macaroon()` +- `lnd_getinfo_ready()` + +Focused Rust test passes: + +```bash +cd /home/archipelago/Projects/archy/core +cargo test -p archipelago --bin archipelago lnd +``` + +Result: + +```text +7 passed; 0 failed +``` + +## LND UI Port Collision + +The strict LND UI test then failed with `502`. + +Investigation found a real port collision: + +- `nostr-rs-relay` uses host `8081`. +- Old `archy-lnd-ui` also used host `8081`. +- nginx `/app/lnd/` proxy also pointed at `8081`. + +Fix implemented: + +- Move LND UI companion to host port `18083`, container port `80`. +- Keep `nostr-rs-relay` on `8081`. +- Update app metadata/routing to `18083`. +- Update tests to expect direct port launch. + +Important correction from user: + +```text +we never use paths only ports, how many times do you need to be told +``` + +Action taken after correction: + +- Stop validating through `/app/lnd/` and `/app/electrumx/` in the lifecycle harness. +- Switch `launch_url_for()` to direct app ports. +- Switch app session resolver to direct `http://host:port` launch, even from HTTPS parent pages. +- Remove use of `HTTPS_PROXY_PATHS[id]` in `resolveAppUrl()`. + +Direct-port LND audit command: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd tests/lifecycle/remote-lifecycle.sh +``` + +Result: + +```text +### 192.168.1.198 iteration 1 / 1 ### +lnd state=running +all checks passed +``` + +The audit now validates `http://192.168.1.198:18083/`, not `/app/lnd/`. + +## Lifecycle Harness Changes + +`tests/lifecycle/remote-lifecycle.sh` changes made: + +- Normalize package states with `ascii_downcase` because API returned `Running`. +- Direct port launch URLs: + - LND: `http://${ARCHY_HOST}:18083/` + - Electrum/Electrs: `http://${ARCHY_HOST}:50002/` + - Bitcoin UI: `http://${ARCHY_HOST}:8334/` + - Other apps mapped to direct ports where known. +- LND probe checks: + - `Connect Your Wallet` + - `id="lndQrBox"` + - `id="connHost"` + - `value="rest-tor"` + - `value="grpc-tor"` + - `value="rest-local"` + - `value="grpc-local"` + - `Copy lndconnect URI` + - `/lnd-connect-info` cert, macaroon, ports, and Tor onion. +- Electrum probe checks: + - local QR container and address field + - Tor QR container and onion field + - port `50001` + - QR renderer + - direct `http://${ARCHY_HOST}:50002/qrcode.js` + - `/electrs-status` Tor onion. +- Full lifecycle now fails immediately on any failed phase with `|| return 1` so a later reinstall cannot mask a failed restart/probe. + +## Deployments To `.198` + +Several release builds were made and deployed: + +```bash +cd /home/archipelago/Projects/archy/core +cargo build -p archipelago --bin archipelago --release +``` + +Deploy pattern: + +```bash +scp -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ + /home/archipelago/Projects/archy/core/target/release/archipelago \ + archipelago@192.168.1.198:/tmp/archipelago.new + +ssh -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no \ + archipelago@192.168.1.198 \ + "sudo cp /usr/local/bin/archipelago /usr/local/bin/archipelago.bak- && \ + sudo install -m 0755 /tmp/archipelago.new /usr/local/bin/archipelago && \ + sudo systemctl restart archipelago.service && \ + systemctl is-active archipelago.service" +``` + +Latest deploy returned: + +```text +active +``` + +## `.198` Current Observations + +After forcing LND package restart, companion reconciliation succeeded: + +```text +nostr-rs-relay Up ... 0.0.0.0:8081->8080/tcp +lnd Up ... 0.0.0.0:8080->8080/tcp, 0.0.0.0:9735->9735/tcp, 0.0.0.0:10009->10009/tcp +archy-lnd-ui Up ... 0.0.0.0:18083->80/tcp +``` + +Direct UI test from `.198` returned `200`: + +```bash +curl -i http://127.0.0.1:18083/ +``` + +Strict direct-port LND audit is green: + +```text +lnd state=running +all checks passed +``` + +## Full LND Lifecycle Status + +Full direct-port lifecycle was started: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +It reached: + +```text +### 192.168.1.198 iteration 1 / 1 ### +== lnd: install == +== lnd: stop == +``` + +Then the user aborted the command while asking to save memory/transcript. + +The next continuation point is to rerun full LND direct-port lifecycle from scratch and inspect the stop phase if it hangs/fails. + +## Handoff File + +A durable handoff file was also created: + +```text +docs/CONTAINER_LIFECYCLE_HANDOFF.md +``` + +It contains the plan, progress, current blockers, and next steps. + +## Immediate Next Steps + +1. Rerun full strict LND direct-port lifecycle: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +2. If it hangs/fails at `stop`, inspect package runtime stop path and logs: + +```bash +ssh -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no archipelago@192.168.1.198 \ + 'journalctl -u archipelago.service -n 260 --no-pager | egrep -i "package\.(stop|start|restart|install|uninstall)|lnd|companion|error|failed" | sed -n "1,220p"; podman ps -a --format "{{.Names}} {{.Status}} {{.Ports}}" | egrep "lnd|nostr" || true' +``` + +3. If stop is unreliable, inspect/fix: + +- `core/archipelago/src/api/rpc/package/runtime.rs` +- `core/archipelago/src/container/prod_orchestrator.rs` + +Likely causes to check: + +- Reconciler restarting LND while stop is expected. +- State scanner reporting stale `running`. +- Companion handling interfering with parent app state. +- Async lifecycle returning before actual stop completes. + +4. Once LND full lifecycle is green, run Electrum strict lifecycle with direct port `50002`: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=electrumx ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +5. Continue with app groups after LND/Electrum: + +- `filebrowser` +- `bitcoin-knots` +- `lnd` +- `electrumx` +- `mempool` +- `btcpay-server` +- `fedimint` +- remaining catalog apps. + +## Important Instruction To Preserve + +Use ports only for app launch/testing. Do not add or rely on `/app/...` path proxy launch behavior unless the user explicitly changes this requirement. diff --git a/docs/CONTAINER_LIFECYCLE_HANDOFF.md b/docs/CONTAINER_LIFECYCLE_HANDOFF.md new file mode 100644 index 00000000..19b0ce90 --- /dev/null +++ b/docs/CONTAINER_LIFECYCLE_HANDOFF.md @@ -0,0 +1,1033 @@ +# Container Lifecycle Handoff + +Last updated: 2026-05-06 + +## Resume Prompt + +```text +Resume Archipelago lifecycle testing from /home/archipelago/Projects/archy. Read docs/CONTAINER_LIFECYCLE_HANDOFF.md first. Preserve data unless explicitly told otherwise. Do not revert unrelated dirty worktree changes. Keep untracked docs/CONTAINER_LIFECYCLE_HANDOFF.md and docs/CHAT_TRANSCRIPT_2026-05-02.md. + +Current focus: multi-node non-destructive hardening across .228, .116, and .67. .228 was live-repaired and verified for dashboard, Bitcoin UI, LND UI, Immich, and authenticated Bitcoin RPC. .116 was live-repaired for stale Bitcoin Knots command drift, Grafana rootless ownership, nginx /bitcoin-status proxying, and stale LND UI companion image/unit drift; focused non-destructive lifecycle audit now passes for bitcoin-knots,lnd,btcpay-server,mempool,grafana. v1.7.54-alpha release artifacts were regenerated from current source and verified to carry runtime payload fixes. .67 remains unreachable from this workspace despite confirmed credentials archipelago/archipelago. + +Durable fixes implemented locally: Bitcoin container entrypoint/cmd drift recreation, Grafana data_uid/rootless ownership repair, Immich Postgres 2g memory, IndeedHub boot/start Nostr provider reapply, Apps loading/launch readiness UI fixes, nginx /bitcoin-status backend proxy repair, and LND UI 18083 companion/spec drift repair. + +Latest deployed backend checksum on `.116` after live deploy: `c6c7830f14dc80b0e22d803997ad3df31c9ab3d4b08829b3bddc1b03ce77bd0a`. Latest live verification: nginx `/bitcoin-status` returns JSON, LND UI `http://127.0.0.1:18083/` returns HTTP 200 from `localhost/lnd-ui:local`, runtime payload scripts and promoted `/opt/archipelago/scripts` both carry `18083`, and focused non-destructive lifecycle audit passed for `bitcoin-knots,lnd,btcpay-server,mempool,grafana`. Next action: publish/tag v1.7.54-alpha if approved, then continue to `.228` deploy or `.67` reachability. + +Regenerated release artifacts: +- `releases/v1.7.54-alpha/archipelago`: `77e3a236a6196a5ab9ec2411b150490e78ffc95ea6ab8eb34ab29b3df53cd632` +- `releases/v1.7.54-alpha/archipelago-frontend-1.7.54-alpha.tar.gz`: `a010ac43a2dd02f528202cb2f7b99b61ceab80adc6827877594e41df4ea951fb` +- `releases/manifest.json` and `release-manifest.json`: `0fb73c808ef87c1535c5e5f560ea331bacaded86c8c81abd5cdd2893a0415b6f` +- Unbundled ISO: `image-recipe/results/archipelago-installer-1.7.54-alpha-unbundled-x86_64.iso`, sha256 `9828b244e6ffdd5f1b1d5184c1b22bef7474b32078b1ceb4ec3584d9bdb6775b`, size `2.3G`. +``` + +## 2026-05-06 Resume Checkpoint + +- Goal: make container lifecycle and health recovery durable for every install and existing Archipelago server, while preserving app data. +- `.228` state: + - SSH key auth still fails, but password SSH works with password `archipelago`. + - Quarantined stale Quadlet blocker `~/.config/containers/systemd/bitcoin-core.container.disabled-20260506`. + - Started companion Bitcoin/LND UI services; external ports `8334` and `18083` return HTTP 200. + - Recreated stale `bitcoin-knots` container record only, preserving `/var/lib/archipelago/bitcoin` and `BITCOIN_RPC_PASS`; authenticated local RPC works. + - Diagnosed Immich reset loop as `immich_postgres` memory cap `512MiB`; raised live cap to `2g`/`4g` swap and made it persistent in code. + - Final external checks passed: dashboard 200, Bitcoin UI 200, LND UI 200, Immich 200, Bitcoin RPC unauthenticated 405 expected. +- `.116` state: + - Removed stale update override `/etc/systemd/system/archipelago.service.d/update-url.conf`. + - Valid RPC/password auth is `archipelago`; `password123` failed. + - Recreated stale `bitcoin-knots` preserving data and RPC password; direct authenticated RPC works. + - Fixed Grafana with `podman unshare chown -R 472:472 /var/lib/archipelago/grafana`; Grafana health returns 200. + - Deployed locally built fixed backend to `/usr/local/bin/archipelago`; previous binary was backed up and service restarted. + - Backend deploy checksum now `c6c7830f14dc80b0e22d803997ad3df31c9ab3d4b08829b3bddc1b03ce77bd0a`. + - Repaired active nginx config and canonical config so `curl http://127.0.0.1/bitcoin-status` returns JSON instead of SPA HTML. + - Repaired LND UI companion drift: generated quadlet was using stale `localhost/lnd-ui:latest`, whose nginx listened on container port 8081 while the unit mapped `18083:80`. Updated the live unit to use `localhost/lnd-ui:local`; `http://127.0.0.1:18083/` returns HTTP 200 and survives `systemctl --user restart archy-lnd-ui.service`. + - Focused non-destructive lifecycle audit passed: `ARCHY_HOST=192.168.1.116 ARCHY_SCHEME=http ARCHY_PASSWORD=archipelago ARCHY_APPS=bitcoin-knots,lnd,btcpay-server,mempool,grafana ARCHY_STABILITY_SECONDS=5 ARCHY_TIMEOUT=300 tests/lifecycle/remote-lifecycle.sh`. + - Deployed newest local backend and script fixes live to `.116`, restarted Archipelago twice, and re-ran the focused non-destructive audit successfully. Important release/OTA note: startup promoted stale `/opt/archipelago/web-ui/archipelago-runtime/scripts` over `/opt/archipelago/scripts` once; after refreshing the runtime payload scripts too, restart preserved `18083` everywhere. + - Recent Bitcoin/ElectrumX status warnings appear transient during Bitcoin IBD/UTXO flushes. Live `/bitcoin-status` is `ok=true`, `stale=false`; ElectrumX reports `waiting` because it is indexed beyond the local Bitcoin node and is waiting for Bitcoin catch-up. +- `.67` state: + - User confirmed credentials `archipelago`/`archipelago`. + - This workspace cannot reach it: SSH `No route to host`, HTTP `000`, ping 100% loss, neighbor incomplete/failed. + - IndeedHub reboot/Nostr signing fix still needs live verification from a host that can reach `.67`. +- Local durable fixes in progress/done: + - Bitcoin/Grafana/Immich/IndeedHub backend fixes are implemented locally. + - UI loading/launch readiness fixes are implemented locally. + - Nginx canonical config now includes `/bitcoin-status` proxy next to `/electrs-status`. + - Startup bootstrap now patches older nginx configs that are missing `/bitcoin-status` and still patches `/api/app-catalog` when needed. It handles both `sites-available/archipelago` and copied `sites-enabled/archipelago` layouts. + - LND UI companion/spec drift is fixed locally: first-boot/container specs now use host `18083`, and companion reconcile now rewrites stale quadlet units/images instead of only checking active state. + - Release packaging now includes `image-recipe/configs/nginx-archipelago.conf` in the OTA runtime payload and strips `__pycache__`, `.pyc`, `.bak`, `.bak-*`, and logs from runtime assets. + - Regenerated `v1.7.54-alpha` frontend tarball was explicitly verified to contain LND UI `18083`, LND UI container nginx `listen 80`, and `/bitcoin-status` nginx blocks; no pycache/pyc/bak junk remains. + - ISO builder now configures both `146.59.87.168:3000` and `git.tx1138.com` as insecure for Podman and passes `--tls-verify=false` for primary HTTP registry pulls. The unbundled ISO now successfully pulls and saves `filebrowser.tar` instead of warning that Cloud/File Browser will be missing. + - ISO output filenames now include the release version and alpha suffix, e.g. `archipelago-installer-1.7.54-alpha-unbundled-x86_64.iso`. +- Verification already passed before latest nginx change: + - `cargo fmt` + - `cargo check -p archipelago --bin archipelago` + - `cargo build -p archipelago --bin archipelago --release` + - `bash -n scripts/first-boot-containers.sh` + - `bash -n image-recipe/build-debian-iso.sh image-recipe/archipelago-scripts/install-to-disk.sh image-recipe/write-usb-dd.sh image-recipe/create-fat32-usb.sh image-recipe/_archived/build-auto-installer-iso.sh scripts/create-release-manifest.sh scripts/container-specs.sh scripts/first-boot-containers.sh scripts/self-update.sh` + - `cd neode-ui && npm run build` + - `cd neode-ui && npm run type-check` + - `cd neode-ui && npm test -- appsConfig.test.ts appLauncher.test.ts --run` + - `scripts/check-release-manifest.sh` + - `sudo -n env UNBUNDLED=1 BUILD_FROM_SOURCE=1 bash build-debian-iso.sh` from `image-recipe/` passed and produced the v1.7.54-alpha unbundled ISO. +- Next steps: + - Re-check `.116` Archipelago logs for `Bitcoin status: RPC failure: getblockchaininfo` after Bitcoin IBD/UTXO flushing calms down. + - Deploy the fixed backend to `.228` if desired so durable repairs run there too. + - Optional next gate: run a full bundled/core-image ISO build if you need offline app images. The prior File Browser HTTP registry blocker is fixed for the builder path. + - Verify IndeedHub on `.67` only from a reachable network path. + +## 2026-05-05 Botfights, Gitea, Icons + +## 2026-05-06 Multi-Node Non-Destructive Audit + +### 2026-05-06 `.228` Live Repair + +- Access notes: + - SSH key auth to `.228` still fails, but password SSH works with password `archipelago`. + - Dashboard/RPC health reports `version=1.7.53-alpha`. +- Companion UI repair: + - Root cause: a stale rootless Quadlet unit at `~/.config/containers/systemd/bitcoin-core.container` blocked user Quadlet generation, so `archy-bitcoin-ui.service` and `archy-lnd-ui.service` were missing even though their `.container` files existed. + - Quarantined only the stale blocker: `~/.config/containers/systemd/bitcoin-core.container.disabled-20260506`. + - Ran user daemon reload and started generated companion services. + - Final verification: `archy-bitcoin-ui.service` and `archy-lnd-ui.service` are active; external `http://192.168.1.228:8334/` and `http://192.168.1.228:18083/` both return HTTP 200. +- Bitcoin Knots repair: + - Root cause: existing `bitcoin-knots` container record was stale and still launched `exec bitcoind`; current image only provides `/opt/bitcoin-29.3.knots20260210/bin/bitcoind` on PATH/fallback. + - Removed and recreated only the `bitcoin-knots` container record, preserving `/var/lib/archipelago/bitcoin` and the existing `BITCOIN_RPC_PASS`. + - New command matches the deployed manifest fallback: resolve `command -v bitcoind`, then search `/opt -path '*/bin/bitcoind'`. + - Final verification: container is running, ports `8332`/`8333` are listening, authenticated local RPC `getblockchaininfo` works, and the node is in initial block/header sync. +- Immich repair: + - Root cause: `immich_postgres` was capped at `512MiB`; during Immich v2.7.4 reverse-geocoding geodata import, Postgres child processes were SIGKILLed while bulk inserting into `geodata_places`, forcing DB recovery and causing `immich_server` to reset connections on `2283`. + - Raised only the Postgres container memory limit with `podman update --memory=2g --memory-swap=4g immich_postgres`, then restarted `immich_postgres` and `immich_server`; preserved `/var/lib/archipelago/immich-db` and `/var/lib/archipelago/immich`. + - Final logs showed `Successfully imported 224210 geodata records`, `Initialized local reverse geocoder`, and both Immich API/microservices successfully started. + - Final external verification: `http://192.168.1.228:2283/` returns HTTP 200. +- Final `.228` external status after repair: + - Dashboard `http://192.168.1.228/`: HTTP 200. + - Bitcoin UI `http://192.168.1.228:8334/`: HTTP 200. + - LND UI `http://192.168.1.228:18083/`: HTTP 200. + - Immich `http://192.168.1.228:2283/`: HTTP 200. + - Bitcoin RPC no-auth probe `http://192.168.1.228:8332/`: HTTP 405, expected for reachable RPC without credentials. +- Still outstanding from this audit: + - `.116` has the same stale Bitcoin Knots container-command symptom but RPC password `password123` fails; do not repair until valid auth/SSH access is confirmed. + - `.67` remains unreachable from this machine even with confirmed credentials `archipelago`/`archipelago`: SSH reports `No route to host`, HTTP probes return `000`, local route is via `wlp3s0` from `192.168.1.116`, and ping has 100% packet loss. IndeedHub reboot behavior still needs diagnosis from a host that can reach `.67`. + - The `.228` ad-hoc Immich Postgres memory repair was made persistent locally after the live fix: `install_immich_stack` now creates `immich_postgres` with `--memory=2g`, and `get_memory_limit("immich_postgres")` returns `2g`. Verification passed with `cargo fmt` and `cargo check -p archipelago --bin archipelago`. +- IndeedHub reboot/Nostr signing root cause and local fix: + - User confirmed IndeedHub works after a manual restart, but after server boot it fails to come back correctly and forgets the Nostr signing/provider behavior. + - Root cause in code: `ProdContainerOrchestrator::ensure_running_with_mode` returned `stack-managed` immediately for `indeedhub`, so the boot reconciler never started/repaired the installed stack and never reapplied the imperative frontend nginx/Nostr-provider mutation. + - Additional gap: package start/restart repaired IndeedHub network aliases but did not reapply `nostr-provider.js` / nginx patch after the frontend container was started. + - Local fix: boot reconcile now handles an existing IndeedHub stack without fresh-installing the single manifest: starts backend containers, starts frontend if stopped/exited/created, repairs network aliases, reapplies the Nostr provider/nginx patch, and restarts the frontend if host port `7778` is not listening. + - Local fix: package start/restart now reapplies the IndeedHub Nostr provider patch whenever `indeedhub` is in the started/restarted set. + - Verification passed locally with `cargo fmt` and `cargo check -p archipelago --bin archipelago`. + - Not live-verified on `.67` because this workspace still cannot reach `.67`; deploy the backend build to a reachable test node or run from a host that can reach `.67`, then reboot and confirm `http://:7778/` plus Nostr signing in the iframe. +- Bitcoin/Grafana permanent repair notes: + - `.116` showed `Unable to connect to Bitcoin node` because `bitcoin-knots` had the same stale container command as `.228`: existing container record still executed bare `bitcoind`, but the current image only has `/opt/bitcoin-29.3.knots20260210/bin/bitcoind` discoverable via PATH/fallback. + - Local permanent fix: `ProdContainerOrchestrator::container_env_drifted` now also checks entrypoint/cmd drift against the current manifest. Existing stale containers whose command no longer matches the deployed manifest are removed/recreated by boot reconcile/start/install flows, preserving bind-mounted data. + - `.116` Grafana served `/api/health` but logs showed `GF_PATHS_DATA='/var/lib/grafana' is not writable` and repeated `attempt to write a readonly database`; live data ownership had mixed rootless mapped owners. + - Local permanent fix: `apps/grafana/manifest.yml` now declares `data_uid: "472:472"`, and Grafana start/reconcile paths repair `/var/lib/archipelago/grafana` ownership before start/restart. This makes fresh installs and already-installed nodes self-heal instead of relying on manual `chown`. + - Verification passed with `cargo fmt` and `cargo check -p archipelago --bin archipelago`. + +- Current local branch state during audit: + - `main` is 31 commits ahead of `tx1138/main`. + - Tracked worktree is clean. + - Untracked docs: `docs/CONTAINER_LIFECYCLE_HANDOFF.md` and `docs/CHAT_TRANSCRIPT_2026-05-02.md`. +- Connectivity and service health: + - `.198`: SSH reachable with `/home/archipelago/.ssh/id_ed25519`; `archipelago.service` active; local health returns `status=ok`, `version=1.7.53-alpha`. + - `.116`: SSH reachable with `/home/archipelago/.ssh/id_ed25519`; `archipelago.service` active; local health returns `status=ok`, `version=1.7.51-alpha`. + - `.228`: SSH still blocked with `Permission denied (publickey,password)`; dashboard/RPC is reachable over HTTP/HTTPS. +- Broad non-destructive lifecycle audit results: + - `.198` passed cleanly: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_STABILITY_SECONDS=5 ARCHY_TIMEOUT=180 tests/lifecycle/remote-lifecycle.sh`. + - `.228` failed two checks with RPC-only audit: Bitcoin Knots UI direct port `http://192.168.1.228:8334/` returned `status=000`, and LND UI direct port `http://192.168.1.228:18083/` returned `status=000`. Dashboard itself returns HTTP 200. SSH-level diagnosis is blocked until credentials/key access are fixed. + - `.116` audit did not complete within 15 minutes and showed degraded state: `container-health` returned `unknown` for `bitcoin-knots`, `btcpay-server`, and `lnd`; LND direct port `http://192.168.1.116:18083/` returned `status=000`. Direct probes showed dashboard HTTP 200, Bitcoin UI `http://192.168.1.116:8334/` HTTP 200, old LND UI `http://192.168.1.116:8081/` HTTP 200, BTCPay `http://192.168.1.116:23000/` HTTP 302, and Mempool `http://192.168.1.116:4080/` HTTP 200. +- `.116` live diagnostics: + - Deployed backend checksum: `f761e659d661f0a83cd3a67a086bb2279398bc05e50ee3c52e769e52d11e476c`. + - Service has `ARCHIPELAGO_DEV_MODE=true` override and `ARCHIPELAGO_UPDATE_URL=http://192.168.1.116:3000/lfg2025/archy/raw/branch/main/releases/manifest.json`. + - `archy-lnd-ui` is still mapped to `0.0.0.0:8081->80/tcp`, while the current lifecycle harness expects LND UI on `18083`; treat `.116` as stale relative to the current LND port migration. + - `lnd` is `Up ... (unhealthy)` on `8080`, `9735`, and `10009`. + - `btcpay-server` is `Up ... (unhealthy)` on `23000`. + - `bitcoin-knots` is `Up ... (reset)` and backend logs show repeated Bitcoin RPC failures for `getblockchaininfo`. + - Backend logs show ElectrumX status also failing Bitcoin RPC. +- `.198` live diagnostics: + - Deployed backend checksum observed during this audit: `86cf408ed84c7a7a72d1b5529aa97561dd02db38aab57c523999d1f5e7bf48b7`. +- Local smoke verification passed: + - `cargo check -p archipelago --bin archipelago` from `core/`. + - `npm run type-check` from `neode-ui/`. + - `npm test -- appsConfig.test.ts appLauncher.test.ts --run` from `neode-ui/` (`27 passed`). +- Next focused actions: + - Fix `.228` SSH access first if deeper runtime diagnosis is required; RPC-only audit already identifies closed/unreachable direct app ports `8334` and `18083`. + - Bring `.116` forward to the current deployed release/runtime expectations before treating lifecycle failures as fresh regressions. It is on `1.7.51-alpha`, has dev-mode/update-url overrides, and still launches LND UI on legacy port `8081`. + - After `.116` is updated, rerun focused non-destructive checks for `bitcoin-knots`, `lnd`, `btcpay-server`, `mempool`, and ElectrumX/Bitcoin RPC status before a full broad audit. + +## 2026-05-05 Tailscale And Grafana Recheck + +## 2026-05-05 Release v1.7.52-alpha Staging + +- Release target corrected to `1.7.52-alpha`. +- Version bumped locally in: + - `core/archipelago/Cargo.toml` + - `core/Cargo.lock` + - `neode-ui/package.json` + - `neode-ui/package-lock.json` +- `.52` release notes added to `CHANGELOG.md`. +- Debian 13/Trixie security mitigation added for rebuilt media: + - `_archived/build-auto-installer-iso.sh` now runs `apt-get -y full-upgrade` after enabling Debian/Trixie security repositories during rootfs, Tailscale, FIPS, and installer environment creation. + - `image-recipe/archipelago-scripts/install-to-disk.sh` now runs `apt-get -y full-upgrade` after writing `trixie-security` sources and before installing kernel/bootloader/packages. + - This does not retroactively patch already-built ISOs; `.52` media must be rebuilt. +- Active ISO command restored: + - Added `image-recipe/build-debian-iso.sh` wrapper around the archived builder so documented ISO commands no longer point at a missing script. + - USB helper scripts now default to `results/archipelago-installer-x86_64.iso` / unbundled fallback and allow `ARCHIPELAGO_ISO=/path/to.iso`. +- `.52` release artifacts staged: + - `releases/v1.7.52-alpha/archipelago` + - `releases/v1.7.52-alpha/archipelago-frontend-1.7.52-alpha.tar.gz` + - `releases/manifest.json` + - `release-manifest.json` +- Manifest validation passed: `scripts/check-release-manifest.sh`. +- Frontend dependency audit: + - Ran `npm audit fix`, removing the critical `protobufjs` advisory and high advisories. + - Remaining audit finding is moderate `uuid <14` via `dockerode`; `npm audit fix --force` would upgrade to breaking `dockerode@5.0.0`, so this was not forced during release staging. +- Final verification passed: + - `cargo build -p archipelago --bin archipelago --release` with existing `reconcile_all` dead-code warning. + - `cargo check -p archipelago --bin archipelago` with same warning. + - `cd neode-ui && npm run build`. + - `cd neode-ui && npm run type-check && npm test -- appsConfig.test.ts appLauncher.test.ts --run`. + - `bash -n image-recipe/build-debian-iso.sh image-recipe/archipelago-scripts/install-to-disk.sh image-recipe/write-usb-dd.sh image-recipe/create-fat32-usb.sh image-recipe/_archived/build-auto-installer-iso.sh`. + - `npm audit --audit-level=high` reports only moderate findings and exits with the remaining moderate `dockerode`/`uuid` issue. +- Not yet done in this pass: + - Full bundled ISO build was not run; unbundled ISO build passed. + - `.52` release artifacts were staged locally but not committed, tagged, or pushed. + - No git commit was created. + +### 2026-05-05 Warning Fix And ISO Build + +- Removed the `reconcile_all` dead-code warning by making the install-missing reconcile helper test-only with `#[cfg(test)]`; production uses `reconcile_existing`. +- Verification now passes without Rust warnings: + - `cargo check -p archipelago --bin archipelago` + - `cargo build -p archipelago --bin archipelago --release` +- Refreshed `.52` backend artifact and manifests after the warning fix: + - `scripts/check-release-manifest.sh` passes. + - Backend sha256: `fc47c3bc42f67472252cb854bb03e200a92929ab38aeac519422704486af18d4`. + - Frontend tarball sha256: `329e57a0491e91966afcd5a82f5c00920657695b01ecc6c9e99c6814b44abf29`. +- Built unbundled `.52` Debian ISO: + - Command: `sudo -n env UNBUNDLED=1 BUILD_FROM_SOURCE=1 bash image-recipe/build-debian-iso.sh` from `image-recipe/`. + - Output: `image-recipe/results/archipelago-installer-unbundled-x86_64.iso`. + - Size: `2.3G`. + - sha256: `547ba5dcd0ad61aeaa52ce0beaff4f447e2ab2c59bf6b1fa127529606fe0209d`. +- ISO build note: + - The unbundled ISO completed successfully. + - Optional File Browser core image pull failed during Step 3b because `146.59.87.168:3000` answered HTTP while Podman tried HTTPS: `server gave HTTP response to HTTPS client`. + - This was non-fatal for unbundled media; Cloud/File Browser may need post-install Marketplace download unless registry TLS/insecure registry config is corrected before a bundled/core-image ISO. + +- Backend build deployed to `.198`: `eb539aaa11b32776888be1b23b90c9c0c78b46d8a86dc55ccce7f5b15bbda16e`. +- Tailscale is now qualified: + - Root cause: container command started `tailscale web` before `tailscaled`, so the web UI exited because `/var/run/tailscale/tailscaled.sock` did not exist yet. + - Fixed backend config and first-boot script to start `tailscaled --tun=userspace-networking` first, then bind `tailscale web --listen 0.0.0.0:8240`. + - Removed only the stale `tailscale` container on `.198`; preserved `/var/lib/archipelago/tailscale`. + - Full preserve-data lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=tailscale ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Frontend launch now opens local app port `http://:8240/` instead of the external Tailscale admin site. + - Browser launch passed: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=tailscale ARCHY_APP_TITLE=Tailscale ARCHY_APP_CARD_TITLE=Tailscale ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:8240/ ARCHY_EXPECTED_LAUNCH_MODE=popup ARCHY_EXPECTED_BODY_PATTERN='Tailscale|Connect|Login|Sign|Authorize|Machines|Admin|Tailnet|VPN' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. +- Grafana regression was found during broad audit: + - RPC/container state was `running`, but direct launch failed on `http://192.168.1.198:3000/` with `status=000`; Podman reported a port mapping while `ss` had no host listener. + - Extended existing host-port listener repair to include Grafana port `3000` on install/adoption/start/restart paths. + - Full Grafana lifecycle passed after repair, then focused Grafana audit passed. +- Broad `.198` audit passed after Tailscale and Grafana repairs: + - Command: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_STABILITY_SECONDS=5 ARCHY_TIMEOUT=300 tests/lifecycle/remote-lifecycle.sh`. + - Running apps included `tailscale`, `grafana`, and the previously qualified app set. + - Absent and tolerated: `ollama`, `photoprism`, `electrumx`, `dwn`. +- Local verification passed: + - `cargo fmt` + - `cargo build -p archipelago --bin archipelago --release` with existing `reconcile_all` dead-code warning. + - `cargo check -p archipelago --bin archipelago` with same warning. + - `bash -n scripts/first-boot-containers.sh` + - `cd neode-ui && npm run build` + - `cd neode-ui && npm run type-check` + - `cd neode-ui && npm test -- appsConfig.test.ts appLauncher.test.ts --run` + +- Backend build deployed to `.198`: `4b92ecea7d0a988c4ebe814b47f49f00277867d5f1eb0dca2cb1cd906b536fe6`. +- Gitea regression re-tested and repaired after later launch failure: + - Failure reproduced during full lifecycle after restart: `launch failed: gitea http://192.168.1.198:3001/ status=000 bytes=0`. + - Live diagnosis: Gitea was healthy internally on container port `3000` and `ROOT_URL` was correct, but Podman's rootless `pasta` host listener on `:3001` accepted no traffic. + - Changed Gitea install networking in `core/archipelago/src/api/rpc/package/install.rs` to `--network=slirp4netns:allow_host_loopback=true`, matching the Uptime Kuma rootless listener repair path. + - Backend build deployed to `.198`: `9db6c192c2e633c4648fafc0372ea0f3cb0749aacc5396bb12f7710c8bac4aa7`. + - Full preserve-data lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=gitea ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Direct check passed: `http://192.168.1.198:3001/` returned `HTTP 200`; final container inspect showed `network=slirp4netns` and `rootlessport` listening on `:3001`. +- Botfights is qualified: + - Initial failure was stale `pasta.avx2` listener on host port `9100`; no Botfights container owned it. + - Killed stale pid `211879` and reran full lifecycle. + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=botfights ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. +- Gitea is qualified: + - User-visible launch error was broken asset root: Gitea generated `/app/gitea/assets/...` URLs while the UI/lifecycle launched direct port `http://192.168.1.198:3001/`. + - Fixed backend post-install hook in `core/archipelago/src/api/rpc/package/install.rs` to set `ROOT_URL = http://:3001/` instead of `/app/gitea/`. + - Added install/start/restart stale listener cleanup and host-port verification for Gitea host ports `3001`, `2222`, and legacy stale `3000`. + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=gitea ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. +- Icons updated locally: + - Replacement files found at `/home/archipelago/immich.png`, `/home/archipelago/electrumx.png`, and `/home/archipelago/grafana.png`. + - Replaced `neode-ui/public/assets/img/app-icons/immich.png`, `neode-ui/public/assets/img/app-icons/grafana.png`, and `neode-ui/public/assets/img/grafana.png`. + - Added `neode-ui/public/assets/img/app-icons/electrumx.png` and updated catalog/curated/marketplace references from `.webp` to `.png`. + - Installed Gitea icon now falls back to existing `/assets/img/app-icons/gitea.svg` instead of nonexistent `/assets/img/app-icons/gitea.png`. + - `AppHeroSection.vue` now uses `resolveAppIcon()` so app details uses the same fallback behavior. + - Verification passed: `npm test -- appsConfig.test.ts --run`. + +## 2026-05-05 Nextcloud, Uptime Kuma, ElectrumX Warning + +- Backend build deployed to `.198`: `1796cccd44e7d8f34b495b2dc04bc933d85a32c8c77cee31800653cc5f7b05d0`. +- Nextcloud live `403 Forbidden` was caused by unreadable Apache/PHP entry files inside the container: + - `.htaccess`, `index.php`, and `status.php` were `0600 root:root`. + - Added targeted Nextcloud permission repair in `core/archipelago/src/api/rpc/package/install.rs` instead of broad recursive ownership/mode changes. + - Manually repaired live container file modes and restarted Nextcloud. + - Retested `http://192.168.1.198:8085/status.php` and `http://192.168.1.198:8085/`; both returned `HTTP/1.1 200 OK`. +- Uptime Kuma root cause was rootless host port listener instability: + - The app was healthy internally on `127.0.0.1:3001` and returned `302 /dashboard`, while the host `3002` listener was missing despite Podman showing a mapping. + - Changed Uptime Kuma install networking in `core/archipelago/src/api/rpc/package/install.rs` to `--network=slirp4netns:allow_host_loopback=true`. + - Ran `cargo fmt`, `cargo check -p archipelago --bin archipelago`, and `cargo build -p archipelago --bin archipelago --release` successfully before deploy. + - Recreated Uptime Kuma through local backend RPC on `.198` with preserve-data uninstall/reinstall; preserved `/var/lib/archipelago/uptime-kuma`. + - Retested `http://192.168.1.198:3002/`; final response was `HTTP/1.1 302 Found` with `Location: /dashboard`. +- ElectrumX archival-node UI warning implemented in `neode-ui`: + - `Marketplace.vue`, `MarketplaceAppDetails.vue`, and `Discover.vue` fetch `/bitcoin-status` and only block ElectrumX/electrs/mempool-electrs installs when `blockchain_info.pruned === true`. + - Failed or unavailable prune-status fetches remain fail-safe and do not block install attempts. + - Warning text shown via toast/error paths: `You need a full archival bitcoin node before downloading ElectrumX`. + - `MarketplaceAppCard.vue` blocked warning button is clickable so the toast path can display the popup text instead of silently disabling the button. + - Frontend verification passed: `npm run type-check` from `neode-ui`. +- Icon replacement remains blocked: + - Searched likely upload locations and repo icon paths; no replacement icon files were found. + - Existing icon directory is `neode-ui/public/assets/img/app-icons/`. + - Continue once the actual replacement files/path are provided. + +## 2026-05-04 Testing Continuation + +- SearXNG rootless listener fix deployed and qualified after reconnection: + - Backend build deployed to `.198`: `0773e8719cfd1099ffeae27d9f046749353ebb7fa795c36097b674bd54c28820`. + - Root cause: the new-container install path repaired a missing rootless `pasta` host listener on port `8888`, but the legacy "container already exists, adopt it" path could return success without the same repair. This left Podman reporting `0.0.0.0:8888->8080/tcp` while `ss` showed no listener and launch probes returned `000`. + - Code fix: `core/archipelago/src/api/rpc/package/install.rs` now calls `ensure_host_port_listener(package_id, package_id)` before returning success from the existing-container adoption path. + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=searxng ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=180 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser launch passed in panel mode: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=searxng ARCHY_APP_TITLE=SearXNG ARCHY_APP_CARD_TITLE=SearXNG ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:8888/ ARCHY_EXPECTED_LAUNCH_MODE=panel ARCHY_EXPECTED_BODY_PATTERN='SearXNG|Search' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. +- Jellyfin is qualified: + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=jellyfin ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser launch passed in panel mode: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=jellyfin ARCHY_APP_TITLE=Jellyfin ARCHY_APP_CARD_TITLE=Jellyfin ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:8096/ ARCHY_EXPECTED_LAUNCH_MODE=panel ARCHY_EXPECTED_BODY_PATTERN='Jellyfin|jellyfin' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. +- ElectrumX is blocked on `.198`: + - Reproduced failure: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=electrumx ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=300 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` stayed `absent` after install. + - Backend log shows install was rejected before container creation: `electrumx requires an unpruned Bitcoin node while indexing. Current Bitcoin is pruned`. + - Direct Bitcoin RPC confirmed `pruned: true`, `prune_target_size: 576716800`, IBD `blocks=472928`, `headers=947914`. + - Disk check showed `/var/lib/archipelago` has about `384G` free, likely not enough for unpruned mainnet plus ElectrumX index. User selected `Mark blocked`; do not reconfigure Bitcoin on `.198` unless explicitly requested. +- PhotoPrism is pending/blocked on image pull speed: + - `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=photoprism ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` stayed `installing` because the container image was still pulling. + - No `photoprism` container was created yet; no port `2342` listener. + - Backend logs show `146.59.87.168:3000/lfg2025/photoprism:240915` timed out after 600s, then `git.tx1138.com/lfg2025/photoprism:240915` timed out after 600s, then retry attempt 1/3 restarted the primary registry pull. + - Treat as image/registry-pull pending rather than app runtime failure unless a later pull completes and the container fails to start. +- Stuck-installing backend fix deployed after PhotoPrism exposed long pull retries: + - Backend build deployed to `.198`: `1f0dd8b9fe801d289557ac050f68011c395374f2b0d5c4677b884d6081612de0`. + - Single-container image pulls now try the configured registry list once with a 300s per-URL timeout instead of repeating the whole list three times with 600s per URL. This turns missing/stalled image pulls into visible failed installs instead of leaving cards in `installing` for close to an hour. + - Scanner now removes stale absent transitional entries after `TRANSITIONAL_STUCK_TIMEOUT`; previously an `Installing` entry with no container could survive indefinitely after a backend restart or killed pull task. + - Verified PhotoPrism state recovered to `absent` with `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=photoprism ARCHY_TIMEOUT=60 ARCHY_STABILITY_SECONDS=1 tests/lifecycle/remote-lifecycle.sh`. +- Nginx Proxy Manager is qualified: + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=nginx-proxy-manager ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser launch passed as a new-tab app: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=nginx-proxy-manager ARCHY_APP_TITLE='Nginx Proxy Manager' ARCHY_APP_CARD_TITLE='Nginx Proxy Manager' ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:81/ ARCHY_EXPECTED_LAUNCH_MODE=popup ARCHY_EXPECTED_BODY_PATTERN='Nginx|Proxy|Manager|Sign in|Email' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. +- Portainer is qualified: + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=portainer ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser launch passed as a new-tab app: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=portainer ARCHY_APP_TITLE=Portainer ARCHY_APP_CARD_TITLE=Portainer ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:9000/ ARCHY_EXPECTED_LAUNCH_MODE=popup ARCHY_EXPECTED_BODY_PATTERN='Portainer|Username|Password|Create administrator' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. +- Uptime Kuma is blocked on `.198`: + - Initial failure was a recipe bug: code overrode the image entrypoint to `/usr/bin/dumb-init` but did not pass a program, causing repeated `dumb-init` usage exits. + - Fixed recipe by passing `-- node server/server.js`; deployed backend `540aefb2e1d19aa64b7a5da316bf12c1933145d7ea536afedffb6068371a476f`. + - Added install/start/restart listener repair for host port `3002`; latest deployed backend is `bbcba3f32fab8e11349962f8bb5227ec0374cf36200a768a716c00485dcd121b`. + - Remaining blocker: Uptime Kuma container stays healthy and listens internally on `3001`, Podman reports `0.0.0.0:3002->3001/tcp`, but `ss` loses the actual host listener and direct curl returns `000`. + - Manual `podman restart uptime-kuma` makes `127.0.0.1:3002` return `302 32` for about 105 seconds, then the listener disappears while the container remains healthy. Treat as unstable rootless `pasta` listener, not an app process crash. +- Immich is qualified: + - Backend build deployed to `.198`: `22c8129b8f4e93b58cce9baef8f9e1d071cb243faf85bee1b56457d48f46bbfc`. + - Root cause of lifecycle failure: `container-health` was called with app id `immich`, but the fallback health/status aliases only inspected `immich` and `archy-immich`; the stack's real service container is `immich_server`. The scanner already reports the stack as `immich`, so state was running while health returned `unknown`. + - Code fix: `core/archipelago/src/api/rpc/container.rs` now includes `immich_server` in health/status app-id and container-name candidates for `immich`. + - Full lifecycle passed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=immich ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=1800 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser launch passed in panel mode from `neode-ui`: `ARCHY_BASE_URL=http://192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APP_ID=immich ARCHY_APP_TITLE=Immich ARCHY_APP_CARD_TITLE=Immich ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:2283/ ARCHY_EXPECTED_LAUNCH_MODE=panel ARCHY_EXPECTED_BODY_PATTERN='Immich|Login|Admin|Photos' npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line`. + - Note: an earlier `/tmp/archipelago.new` transfer was truncated/mismatched and crashed with `SIGSEGV`; restored `bbcba3f32fab8e11349962f8bb5227ec0374cf36200a768a716c00485dcd121b`, recopied verified local release to `/tmp/archipelago.local-release`, then deployed it successfully. +- DWN is blocked on missing/unpullable image: + - Full lifecycle failed: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=dwn ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Failure: `dwn did not reach running within 900s (last=absent)`. + - Backend journal shows both pull attempts failed before container creation: `146.59.87.168:3000/lfg2025/dwn-server:main` and `git.tx1138.com/lfg2025/dwn-server:main`, ending with `Image pull failed from all 2 configured registries`. + - No `dwn` container or image exists on `.198`; treat as image/catalog publishing blocker unless a local fallback image is built or registry image is restored. +- Botfights handoff point: + - Lifecycle command was started but user interrupted during install while switching computers: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=botfights ARCHY_FULL_LIFECYCLE=1 ARCHY_TIMEOUT=900 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Last visible output before abort: `== botfights: install ==`. + - On resume, inspect current `botfights` state/container/image before rerunning because the backend install task may have continued after the local harness was aborted. + +- Broad `.198` audit passed: + - Command: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - Running/healthy enough for audit: `bitcoin-knots`, `btcpay-server`, `lnd`, `mempool`, `homeassistant`, `grafana`, `searxng`, `nextcloud`, `vaultwarden`, `filebrowser`, `fedimint`, `indeedhub`. + - Absent and tolerated by audit at the time: `ollama`, `jellyfin`, `photoprism`, `immich`, `nginx-proxy-manager`, `portainer`, `uptime-kuma`, `electrumx`, `dwn`, `botfights`, `gitea`. +- Focused full preserve-data lifecycle passed in this continuation: + - `btcpay-server`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=btcpay-server ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `nextcloud`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=nextcloud ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `mempool`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=mempool ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `homeassistant`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=homeassistant ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `grafana`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=grafana ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `vaultwarden`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=vaultwarden ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - `filebrowser`: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=filebrowser ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` +- Focused full preserve-data lifecycle still known-passing from prior handoff: `lnd`, `bitcoin-knots`, `fedimint`, `indeedhub`. +- SearXNG regression reproduced: + - Command failed at install launch probe: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=searxng ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` + - Failure: `launch failed: searxng http://192.168.1.198:8888/ status=000 bytes=0`. + - Post-failure state: container `searxng` is `Up ... (healthy)` and `podman port searxng` reports `8080/tcp -> 0.0.0.0:8888`, but `ss -ltn` has no `*:8888` listener and both `curl http://127.0.0.1:8888/` and `curl http://192.168.1.198:8888/` return `000 0`. + - A `package.restart` temporarily recreated the listener and direct curl returned `200 6316`, but the next full lifecycle reinstall reproduced the missing listener. +- Remaining focused full-lifecycle candidates after this continuation: + - Blocked on `.198`: `electrumx`, `uptime-kuma`. + - Pending on image pull: `photoprism`. + - Absent apps not yet qualified in this pass: `botfights`, `gitea`. + - Botfights lifecycle attempt was interrupted during install; inspect state first on resume. + - Blocked on missing image: `dwn`. + - Skip `ollama` until image/manifest/catalog entry is restored. + - `electrumx` is absent but was mentioned as a possible follow-up in earlier handoff; run only if it remains in scope. + +## 2026-05-04 IndeedHub And LND Update + +- Latest deployed backend hash observed on `.198`: `83ad80ec793095f2b19746ad8c3d76ab2e7b57b132e4182a28ea9ff86067908b`. +- Frontend bundle redeployed to `/opt/archipelago/web-ui`; dashboard `Last-Modified: Mon, 04 May 2026 10:15:11 GMT`. +- LND was intentionally switched back to panel/iframe launch per user request: + - Removed `lnd` from `NEW_TAB_APPS`, `TAB_LAUNCH_APPS`, and `NEW_TAB_APP_IDS`. + - Browser panel launch qualification passed against `http://192.168.1.198:18083/`. +- IndeedHub is now qualified: + - Full backend/container lifecycle passed. + - Browser Launch qualification passed in panel/iframe mode. + - `/nostr-provider.js` is served by IndeedHub and contains the NIP-07/NIP-98 bridge markers. + +### IndeedHub Issues Fixed + +- Stack restart failed because restarted backend containers lost network aliases (`minio`, `postgres`, `redis`, `relay`, `api`). +- Added alias repair for IndeedHub stack restart/start paths: + - `core/archipelago/src/api/rpc/package/stacks.rs` + - `core/archipelago/src/api/rpc/package/runtime.rs` + - `core/archipelago/src/container/prod_orchestrator.rs` +- The frontend nginx container failed under read-only root with: + - `open() "/run/nginx.pid" failed (30: Read-only file system)` +- Added writable tmpfs mounts for stack-created IndeedHub frontend: + - `/run` + - `/var/cache/nginx` +- The boot reconciler raced the async stack installer by recreating the single-container manifest `indeedhub:latest` while `package.install indeedhub` was still pulling stack images. This stole the `indeedhub` container name and caused stack frontend creation to fail. +- Fixed by marking IndeedHub as stack-managed in `ProdContainerOrchestrator::ensure_running_with_mode`, so generic manifest reconciliation no longer installs/recreates it. +- Lifecycle harness now waits for async install transition states to settle before checking `running`, avoiding stale-container false positives. + +### Passing Commands + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=indeedhub ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=indeedhub \ +ARCHY_APP_TITLE=IndeedHub \ +ARCHY_APP_CARD_TITLE=IndeedHub \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:7778/ \ +ARCHY_EXPECTED_LAUNCH_MODE=panel \ +ARCHY_EXPECTED_BODY_PATTERN='Indee|Indeed|Bitcoin|documentary|nostr' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=lnd \ +ARCHY_APP_TITLE=LND \ +ARCHY_APP_CARD_TITLE=LND \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:18083/ \ +ARCHY_EXPECTED_LAUNCH_MODE=panel \ +ARCHY_EXPECTED_BODY_PATTERN='Connect Your Wallet|lndconnect|REST|gRPC|Copy lndconnect URI' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +### Next Recommended Work After IndeedHub + +- Grafana is now qualified: + - Full backend/container lifecycle passed. + - Browser Launch qualification passed against `http://192.168.1.198:3000/` / `/login`. +- Home Assistant is now qualified: + - Full backend/container lifecycle passed. + - Browser Launch qualification passed; first-run redirect to `/onboarding.html` is accepted. +- SearXNG is now qualified: + - Full backend/container lifecycle passed. + - Browser Launch qualification passed in panel/iframe mode against `http://192.168.1.198:8888/`. + - Fixed stale rootless `pasta` listener recovery for port `8888` before install/retry. + - Fixed manifest image drift by aligning `apps/searxng/manifest.yml` with package install image `146.59.87.168:3000/lfg2025/searxng:latest`; backend restart was required on `.198` to reload the deployed manifest. +- SearXNG recheck after user reported UI not loading: + - RPC/container state showed `running` and Podman reported `0.0.0.0:8888->8080/tcp`, but `ss` showed no actual listener and direct `curl http://192.168.1.198:8888/` failed. + - Restarted SearXNG through `package.restart`, which recreated the rootless port listener on `*:8888`. + - Re-ran audit: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=searxng ARCHY_TIMEOUT=180 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` passed. + - Re-ran browser launch qualification for SearXNG in panel mode; Playwright passed. +- Ollama is currently blocked/unqualified: + - `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=ollama ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh` failed after install because `container-list` stayed `absent` for 900s. + - No `apps/ollama/manifest.yml` exists and `ollama` is absent from `app-catalog/catalog.json` / `neode-ui/public/catalog.json`. + - Confirmed configured image is missing: `podman manifest inspect --tls-verify=false 146.59.87.168:3000/lfg2025/ollama:latest` returns `manifest unknown`. + - This matches `CHANGELOG.md` v1.7.45 note that Ollama was removed because it hung installs due to no source image in registries. +- Nextcloud is now qualified: + - Full backend/container lifecycle passed with preserve-data uninstall/reinstall: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=nextcloud ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser Launch qualification passed as a new-tab app against `http://192.168.1.198:8085/`. + - Note: Nextcloud sends `X-Frame-Options: SAMEORIGIN`; panel/iframe launch leaves an empty iframe body from dashboard origin, so qualify it with `ARCHY_EXPECTED_LAUNCH_MODE=popup`. +- Vaultwarden is now qualified: + - Initial audit found `vaultwarden` absent by RPC but a stale rootless `pasta` listener still bound to `*:8082`; cleared with `pkill -f "pasta.*8082"` before install. + - Full backend/container lifecycle passed with preserve-data uninstall/reinstall: `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=vaultwarden ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh`. + - Browser Launch qualification passed as a new-tab app against `http://192.168.1.198:8082/`. +- Continue one-by-one lifecycle/browser qualification with `jellyfin`, `photoprism`, `immich`, `nginx-proxy-manager`, `portainer`, `uptime-kuma`, `dwn`, `botfights`, and `gitea`. Skip Ollama until an image/manifest/catalog entry is restored. + +## 2026-05-04 Fedimint Update + +- Latest deployed backend hash observed on `.198`: `cb464ede6625c00f4fa9e8940d933d7a69d29b0537cfabd8da783f0116a0c587`. +- Fedimint Guardian is now qualified under the current release standard: + - Full backend/container lifecycle passed with preserve-data uninstall/reinstall. + - Browser Launch qualification passed in panel/iframe mode against `http://192.168.1.198:8175/`. +- Root-cause fix: Fedimint image runs as uid `0` inside the rootless container, so its bind-mounted data directory must be host-owned by `1000:1000`, not subuid `100000:100000`. +- Implemented ownership repair in `core/archipelago/src/container/prod_orchestrator.rs` via the Fedimint pre-start/data-dir hook. +- Passing lifecycle command: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=fedimint ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +- Passing browser launch command: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=fedimint \ +ARCHY_APP_TITLE='Fedimint Guardian' \ +ARCHY_APP_CARD_TITLE='Fedimint Guardian' \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:8175/ \ +ARCHY_EXPECTED_LAUNCH_MODE=panel \ +ARCHY_EXPECTED_BODY_PATTERN='Fedimint|Guardian|Federation|Mint|Bitcoin' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +- Result: `1 passed (11.7s)`. +- Note: backend scanner currently reports Fedimint `lan_address` from the first exposed port (`8173`), but the frontend app-session mapping correctly launches the UI on `8175`. + +### Next Recommended Work After Fedimint + +- Continue with IndeedHub full lifecycle and browser Launch qualification. + +## 2026-05-04 Mempool Update + +- Latest deployed backend hash on `.198`: `02d79360df86d653c9e7b06a05bdf039a0454b81a65220dbe16fa57cafeed236`. +- Mempool is now qualified: + - Full backend/container lifecycle passed. + - Browser Launch qualification passed in panel/iframe mode. + +### Mempool Issues Fixed + +- Initial Mempool lifecycle failed after install with `bad health: mempool is unknown`. +- Root cause: package id `mempool` maps to manifest/app id `archy-mempool-web` with container name `mempool`; `container-health` called `orchestrator.health("mempool")` directly and bypassed alias candidates. +- Added alias handling in `core/archipelago/src/api/rpc/container.rs`: + - `mempool` / `mempool-web` status candidates include `archy-mempool-web`. + - specific `container-health { app_id: "mempool" }` now tries alias candidates and direct Podman container-name fallback. +- After deploy, short audit passed: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=mempool ARCHY_TIMEOUT=60 ARCHY_STABILITY_SECONDS=0 tests/lifecycle/remote-lifecycle.sh +``` + +- Mempool full lifecycle passed: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=mempool ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +- Result: `all checks passed`. + +### Mempool Browser Launch + +- Mempool is an in-panel/iframe app, not a new-tab app. +- Initial browser test failed because the generic spec expected a popup. +- Updated `neode-ui/e2e/app-launch.spec.ts`: + - `ARCHY_EXPECTED_LAUNCH_MODE=panel` verifies an app session iframe instead of popup. + - Card selection now matches a card heading exactly via `APP_CARD_TITLE`/`APP_TITLE`, avoiding false matches from description text (ElectrumX description mentions Mempool). + - Panel iframe selector tolerates source URLs without a trailing slash. +- Passing command: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=mempool \ +ARCHY_APP_TITLE=Mempool \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:4080/ \ +ARCHY_EXPECTED_LAUNCH_MODE=panel \ +ARCHY_EXPECTED_BODY_PATTERN='Mempool|Bitcoin|Block|Transaction' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +- Result: `1 passed (15.8s)`. + +### Next Recommended Work After Mempool + +- Continue installed app qualification with `electrumx` or `filebrowser`. +- ElectrumX already had prior focused work but should get the current browser launch standard if not already rerun after these Playwright spec changes. +- Suggested ElectrumX backend lifecycle: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=electrumx ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +- Suggested ElectrumX browser launch: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=electrumx \ +ARCHY_APP_TITLE=ElectrumX \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:50002/ \ +ARCHY_EXPECTED_LAUNCH_MODE=panel \ +ARCHY_EXPECTED_BODY_PATTERN='ElectrumX|Connect Your Wallet|50001' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +## 2026-05-04 Resume Snapshot + +- Another agent changed the worktree before this session; do not revert unrelated dirty files. +- `.198` service is active, `archipelago-doctor.timer` inactive, `archipelago-reconcile.timer` inactive. +- Latest deployed backend hash on `.198`: `02d79360df86d653c9e7b06a05bdf039a0454b81a65220dbe16fa57cafeed236`. +- LND remains qualified from prior session: full backend lifecycle passed and browser Launch opens `http://192.168.1.198:18083/` with wallet-connect content. +- BTCPay is now qualified: + - Full backend/container lifecycle passed after stop-state normalization fix. + - Browser Launch qualification passed against `.198`; first-run redirect to `/register` is accepted. + +### 2026-05-04 Work Completed + +- Rechecked local/remote state after separate-agent work. +- Ran BTCPay full lifecycle: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=btcpay-server ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +- Initial BTCPay run failed at stop because BTCPay containers were explicitly stopped, but Podman reports stopped containers as `exited`; scanner overwrote package state from `Stopped` to `Exited`, and the harness waited for `stopped`. +- Fixed scanner merge path in `core/archipelago/src/server.rs`: scanned `Exited` package entries are normalized to `Stopped` when the app id is present in `/var/lib/archipelago/user-stopped.json` via configured `data_dir`. +- Rebuilt and deployed backend to `.198`; new hash `6bd9db024ab37017cadd684cb3296c6adbcf290ac27e1238a6bf1e7c0f883e3e`. +- Verified BTCPay then reports `state=stopped` after explicit stop. +- Reran BTCPay full lifecycle; result: `all checks passed`. +- Updated `neode-ui/e2e/app-launch.spec.ts` to support app-specific URL/body regexes: + - `ARCHY_EXPECTED_LAUNCH_URL_PATTERN` + - `ARCHY_EXPECTED_BODY_PATTERN` +- Ran BTCPay browser launch qualification: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=btcpay-server \ +ARCHY_APP_TITLE=BTCPay \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:23000/ \ +ARCHY_EXPECTED_LAUNCH_URL_PATTERN='^http://192\.168\.1\.198:23000/(register)?$' \ +ARCHY_EXPECTED_BODY_PATTERN='BTCPay|Create.*account|Register|Store' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +- Result: `1 passed (10.3s)`. + +### Next Recommended Work + +- Mempool is now complete. Continue app-by-app qualification with ElectrumX or File Browser. +- Prior suggested Mempool command, now passing: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=mempool ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +- If Mempool backend lifecycle passes, run browser launch qualification: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=mempool \ +ARCHY_APP_TITLE=Mempool \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:4080/ \ +ARCHY_EXPECTED_BODY_PATTERN='Mempool|Bitcoin|Block|Transaction' \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +### Updated Resume Prompt + +```text +Resume Archipelago container lifecycle hardening from /home/archipelago/Projects/archy. Read docs/CONTAINER_LIFECYCLE_HANDOFF.md first. Remote node is 192.168.1.198, SSH key /home/archipelago/.ssh/id_ed25519, ARCHY_PASSWORD=password123. Preserve data unless explicitly told otherwise. Keep archipelago-doctor.timer and archipelago-reconcile.timer paused. Do not revert unrelated dirty worktree changes because another agent has been working too. LND, BTCPay, and Mempool now have full backend lifecycle plus browser Launch qualification passing. Latest deployed backend hash on .198 is 02d79360df86d653c9e7b06a05bdf039a0454b81a65220dbe16fa57cafeed236. Continue with the next installed app, likely ElectrumX or File Browser, using full lifecycle and then Playwright browser launch qualification. +``` + +## 2026-05-03 Resume Snapshot + +- Remote node under test: `192.168.1.198`. +- SSH key: `/home/archipelago/.ssh/id_ed25519`. +- Lifecycle password: `ARCHY_PASSWORD=password123`. +- Current qualification target: BTCPay full lifecycle. LND user-facing launch flow is now qualified. +- Do not proceed to broad release/audit until app launch qualification includes a real browser click/open-tab check, not just backend/direct-port curl. +- Preserve data during lifecycle testing unless explicitly told otherwise. +- Legacy timers should remain paused during deterministic qualification: `archipelago-doctor.timer` and `archipelago-reconcile.timer` inactive/disabled. + +### Latest Deployed State On `.198` + +- Backend deployed to `/usr/local/bin/archipelago`; service observed active. +- Latest backend hash observed on `.198`: `abbd9fa4e6beace75f590c1988a1904b9de62b4b21fade1291926ac039c4747b`. +- Frontend bundle was rebuilt with LND new-tab config and deployed to `/opt/archipelago/web-ui`. +- Dashboard entrypoint at `http://192.168.1.198/` returns `200` and fresh `Last-Modified: Sun, 03 May 2026 20:09:08 GMT`. +- Dashboard CSP allows direct app ports via `connect-src ... http://192.168.1.198:*` and `frame-src ... http://192.168.1.198:*`. +- LND direct UI still works from the test environment: + +```bash +curl -fsS -D - http://192.168.1.198:18083/ -o /tmp/opencode/lnd-ui.html +``` + +Expected: `HTTP/1.1 200 OK`, wallet-connect page content including `Connect Your Wallet`, `lndQrBox`, `rest-tor`, `grpc-tor`, and `Copy lndconnect URI`. + +### LND Status + +- Backend/container lifecycle for LND passed after the latest backend changes: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +- Result: `all checks passed` through install, stop, start, restart, preserve-data uninstall, reinstall. +- Direct LND UI is reachable at `http://192.168.1.198:18083/`. +- Product/UI launch is now qualified by Playwright against `.198`. User previously saw browser launch failures (`refused to connect` / `This site can't be reached`), but the deployed frontend/backend now open the direct LND UI URL successfully. +- Frontend changes intended to fix this: + - `neode-ui/src/views/appSession/appSessionConfig.ts`: `lnd` added to `NEW_TAB_APPS`. + - `neode-ui/src/views/apps/appsConfig.ts`: `lnd` added to `TAB_LAUNCH_APPS`. + - `neode-ui/src/stores/appLauncher.ts`: `lnd` added to `NEW_TAB_APP_IDS`. + +### Browser-Level Launch Check Added + +- Added `neode-ui/e2e/app-launch.spec.ts` as a reusable Playwright qualification test. +- Intended run command: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=lnd \ +ARCHY_APP_TITLE=LND \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:18083/ \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +- Current result: passing against `.198`. +- Passing command: + +```bash +cd /home/archipelago/Projects/archy/neode-ui +ARCHY_BASE_URL=http://192.168.1.198 \ +ARCHY_PASSWORD=password123 \ +ARCHY_APP_ID=lnd \ +ARCHY_APP_TITLE=LND \ +ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:18083/ \ +npx playwright test e2e/app-launch.spec.ts --config=playwright.config.ts --project=chromium --reporter=line +``` + +- Result: `1 passed (12.3s)`. +- The test clicks the real My Apps `Launch` button, waits for the popup, verifies URL `http://192.168.1.198:18083/`, and checks wallet-connect text in the popup body. + +### New Root-Cause Findings To Continue + +- `AppDetails` can render `App Not Found` before package data has arrived. The route still does not wait for the WebSocket initial package snapshot; the launch qualification now uses My Apps card launch, which matches user behavior. +- `server.get-state` frontend call was broken against the deployed backend: + +```text +RPC method: server.get-state +RPC error on server.get-state: Unknown method: server.get-state +``` + +- Fixed by adding `server.get-state` dispatch support in `core/archipelago/src/api/rpc/dispatcher.rs` and deploying the new backend to `.198`. +- Verified browser-authenticated `server.get-state` returns `hasLnd=true`, `status=200`, `error=null`. +- WebSocket initial data still works; logs showed `WebSocket /ws/db connected` and initial state dumps. +- Earlier browser-test failures were due to wrong Playwright `baseURL` defaulting to `.228` and/or empty package state on that node, not LND direct UI reachability. +- Direct unauthenticated `container-list` is allowed by auth rules, but authenticated browser calls without CSRF fail with `403`; the Playwright test should not rely on raw RPC calls without CSRF unless using exempt read-only methods. + +### Immediate Resume Steps + +1. Proceed to BTCPay full lifecycle: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=btcpay-server ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +2. If BTCPay passes backend lifecycle, add/run browser-level launch qualification for BTCPay using the same Playwright spec with `ARCHY_APP_ID=btcpay-server`, `ARCHY_APP_TITLE=BTCPay`, and `ARCHY_EXPECTED_LAUNCH_URL=http://192.168.1.198:23000/`. + +3. Fix stale `boot_reconciler` unit tests for existing-only production behavior if running the full backend test suite. + +### Verification Commands Before Resuming + +```bash +ssh -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no archipelago@192.168.1.198 'systemctl is-active archipelago.service; systemctl is-active archipelago-doctor.timer 2>/dev/null || true; systemctl is-active archipelago-reconcile.timer 2>/dev/null || true; podman ps -a --format "{{.Names}} {{.Status}} {{.Ports}}" | egrep "lnd|btcpay|nbxplorer|bitcoin|electrs" || true' +``` + +```bash +curl -fsS -D - http://192.168.1.198:18083/ -o /tmp/opencode/lnd-ui.html +``` + +### Files Touched In This Latest Session + +- `neode-ui/e2e/app-launch.spec.ts`: new parameterized Playwright launch qualification spec. +- `neode-ui/playwright.config.ts`: `baseURL` can now be overridden with `ARCHY_BASE_URL`. +- `core/archipelago/src/api/rpc/dispatcher.rs`: added `server.get-state` dispatch handler. +- `neode-ui/src/views/appSession/appSessionConfig.ts`: LND forced new-tab session behavior. +- `neode-ui/src/views/apps/appsConfig.ts`: LND marked as tab-launch app. +- `neode-ui/src/stores/appLauncher.ts`: LND forced new-tab from legacy/open URL path. +- `docs/CONTAINER_LIFECYCLE_HANDOFF.md`: this handoff update. + +### Still Dirty / Important + +- Worktree is dirty with many lifecycle/backend/frontend changes and untracked files. Do not revert other changes. +- `git status --short` currently includes untracked `tests/lifecycle/remote-lifecycle.sh`, `core/archipelago/src/container/lnd.rs`, `neode-ui/e2e/app-launch.spec.ts`, and this handoff doc. +- No commit was created. + +### Resume Prompt + +Use this prompt in a fresh remote session: + +```text +Resume Archipelago lifecycle hardening from /home/archipelago/Projects/archy. Read docs/CONTAINER_LIFECYCLE_HANDOFF.md first. Current remote node is 192.168.1.198, SSH key /home/archipelago/.ssh/id_ed25519, ARCHY_PASSWORD=password123. LND backend lifecycle and browser launch qualification are now passing; latest deployed backend hash on .198 is abbd9fa4e6beace75f590c1988a1904b9de62b4b21fade1291926ac039c4747b. Continue with BTCPay full lifecycle, then add/run the same browser launch qualification for BTCPay. Preserve data unless explicitly told otherwise, keep doctor/reconcile timers paused, and do not revert unrelated dirty worktree changes. +``` + +## Operator Snapshot + +- Plan: harden app/container lifecycle before release using strict lifecycle tests and app-specific probes. +- Current target: run broad `.198` audit after focused fixes for LND, Bitcoin Knots, Fedimint, and IndeedHub. +- LND status on `.198`: strict audit and full preserve-data lifecycle passed on 2026-05-02. +- Bitcoin Knots status on `.198`: full preserve-data lifecycle passed on 2026-05-02. +- Fedimint status on `.198`: full preserve-data lifecycle passed on 2026-05-02. +- IndeedHub status on `.198`: full preserve-data lifecycle passed on 2026-05-02. +- Last known local status: focused lifecycle/orchestrator/container unit tests pass and release build succeeds. +- Do not release until broad audit and app-specific UI probes pass. + +## Goal + +Harden and verify Archipelago app/container lifecycle before release. Required coverage is install, launch, stop, start, restart, uninstall with `preserve_data=true`, reinstall, and launch again. UI checks must validate app-specific functionality, not only HTTP 200. + +## Current Focus + +Run broad lifecycle audit on node `192.168.1.198`, then continue app-by-app for any installed package that is non-running or unhealthy. LND, Bitcoin Knots, Fedimint, and IndeedHub have each passed focused strict lifecycle validation. + +Strict LND criteria: + +- `lnd` container reaches `running`. +- `archy-lnd-ui` companion serves `/app/lnd/`. +- LND wallet is initialized or unlocked non-interactively. +- `/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon` exists. +- `/lnd-connect-info` returns certificate, macaroon, REST/gRPC ports, and Tor onion. +- LND UI contains all connection modes: REST local, REST Tor, gRPC local, gRPC Tor. +- QR/connect controls are present and backed by real connection info. + +## Important Nodes + +- `.198`: SSH works with `/home/archipelago/.ssh/id_ed25519`. +- `.228`: RPC works, SSH still blocked with `Permission denied (publickey,password)`. + +## Test Harness + +Primary remote harness: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd tests/lifecycle/remote-lifecycle.sh +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +Harness changes made: + +- Normalizes package states with `ascii_downcase` because API can return `Running`. +- Audit mode allows `absent`, fails installed non-running states. +- Full lifecycle uses preserve-data uninstall. +- LND probe checks DOM, all four connection modes, `/lnd-connect-info`, macaroon/cert lengths, REST/gRPC ports, and Tor onion. +- Electrum probe now checks local and Tor QR containers/fields, `qrcode.js`, and `/electrs-status` Tor onion. +- Added `ARCHY_STABILITY_SECONDS` observation window, default `15`, so a single `running` snapshot is not enough. +- Audit/full lifecycle now call `container-health` after install/start/restart/reinstall and fail anything other than `healthy`. +- Focused validation passed for LND, Bitcoin Knots, Fedimint, and IndeedHub. + +## Implemented Backend Changes + +### Lifecycle/Reconcile + +- `core/archipelago/src/server.rs` + - Scanner merge now recovers stale `Removing -> Running` if the container is actually live. + - Added stale-removing recovery test. +- `core/archipelago/src/main.rs` + - Crash recovery now runs synchronously before BootReconciler. +- `core/archipelago/src/bootstrap.rs` + - Removed automatic deletion of `/run/user/1000/{containers,libpod}` when `podman info` fails. +- `core/archipelago/src/crash_recovery.rs` + - Generic boot recovery narrowed to safe containers only. +- `core/archipelago/src/container/prod_orchestrator.rs` + - Uninstall disables manifests rather than deleting manifest availability. + - Explicit reinstall re-enables disabled manifests. + - LND pre-start writes/repairs config. + - LND post-start initializes/unlocks wallet in production. + - Post-start hook is skipped in `cfg(test)` so unit tests do not mutate host LND state. + - `stop` disables desired-state reconcile until explicit start. + - Reconciler respects `/var/lib/archipelago/user-stopped.json` across daemon restarts. + - Start path recreates containers when stale rootless Podman runtime state prevents startup. +- `core/archipelago/src/api/rpc/package/install.rs` + - Install reconciles companion UIs synchronously. +- `core/archipelago/src/api/rpc/package/runtime.rs` + - Start/restart reconcile companions. + - Missing known companion containers are tolerated during stop/restart. +- `core/archipelago/src/health_monitor.rs` + - Added Bitcoin variant conflict guard for auto-restart: `bitcoin-core` and `bitcoin-knots` can both be installed, but the monitor must not auto-start one into default `8332/8333` while the other is already running. + - Added unit tests for the conflict guard. +- `core/archipelago/src/api/rpc/package/install.rs` + - Removed install-time hard block between `bitcoin-core` and `bitcoin-knots`; users may install both. Runtime still needs alternate ports or one inactive variant to run both simultaneously. +- `core/archipelago/src/api/rpc/package/config.rs` + - Bitcoin variant container resolution is precise, so package operations for one variant do not target the other. +- `core/container/src/podman_client.rs` + - Custom network containers now receive container-name DNS aliases. + - Containers get `host.archipelago:10.89.0.1` for host RPC access from rootless networks. +- `apps/fedimint/manifest.yml` and `apps/fedimint-gateway/manifest.yml` + - Fedimint data owner fixed to `1000:1000`. + - Bitcoin RPC host changed to `http://host.archipelago:8332`. + +### Companions + +- `core/archipelago/src/container/companion.rs` + - LND UI uses bridge networking, not host networking. + - LND UI moved from host `8081` to host `18083` to avoid `nostr-rs-relay` conflict. + - Test updated to expect `18083:80`. +- Routing/metadata moved LND UI to `18083`: + - `apps/lnd-ui/manifest.yml` + - `core/archipelago/src/container/docker_packages.rs` + - `core/container/src/podman_client.rs` + - `core/archipelago/src/port_allocator.rs` + - `neode-ui/src/views/appSession/appSessionConfig.ts` + - `neode-ui/src/stores/container.ts` + - `neode-ui/src/stores/appLauncher.ts` + - `neode-ui/src/views/appDetails/appDetailsData.ts` + - nginx snippets/configs for `/app/lnd/` now proxy to `127.0.0.1:18083`. + +### LND + +- New/expanded `core/archipelago/src/container/lnd.rs`. +- `ensure_config()` writes required Bitcoin backend flags: + - `bitcoin.active=true` + - `bitcoin.mainnet=true` + - `bitcoin.node=bitcoind` + - `bitcoind.rpchost=bitcoin-knots:8332` +- Handles permission denied writing `lnd.conf` via sudo. +- `ensure_wallet_initialized()` now: + - Checks wallet/macaroons via sudo-aware helpers because LND data is container-owned `0700`. + - Uses REST unlocker `GET /v1/genseed` and `POST /v1/initwallet` for new wallets. + - Falls back to `lncli unlock --stdin` if wallet already exists. + - Uses sudo-aware read for macaroon when checking `/v1/getinfo` readiness. + +## Verified Locally + +Recent focused test passes: + +```bash +cd /home/archipelago/Projects/archy/core +cargo test -p archipelago --bin archipelago health_monitor +cargo test -p archipelago --bin archipelago prod_orchestrator +cargo test -p archipelago --bin archipelago bitcoin_variant_container_names_are_precise +cargo test -p archipelago-container podman_network_settings_uses_networks_map_for_custom_networks +bash -n ../tests/lifecycle/remote-lifecycle.sh +``` + +Release build succeeds: + +```bash +cd /home/archipelago/Projects/archy/core +cargo build -p archipelago --bin archipelago --release +``` + +## `.198` Current State + +Recent deployment: + +- Built release binary with sudo-aware LND wallet checks and LND UI port `18083`. +- Deployed to `/usr/local/bin/archipelago` on `.198` with backup. +- Restarted `archipelago.service`; it returned `active`. +- nginx on `.198` was already updated so `/app/lnd/` proxies to `127.0.0.1:18083`. + +Known `.198` observations: + +- LND wallet artifacts exist after previous bootstrap: + - `/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/admin.macaroon` + - `/var/lib/archipelago/lnd/data/chain/bitcoin/mainnet/wallet.db` +- `nostr-rs-relay` occupies `8081`; LND UI must stay on `18083`. +- LND strict audit passed on 2026-05-02: + - `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd tests/lifecycle/remote-lifecycle.sh` +- LND full preserve-data lifecycle passed on 2026-05-02: + - `ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh` +- Final observed state after LND lifecycle: + - `archipelago.service` active. + - `nginx` active. + - `lnd` running on `8080`, `9735`, and `10009`. + - `archy-lnd-ui` running on `18083`. + - `archy-electrs-ui` running and `50002` listening. +- Active default Bitcoin backend is currently `bitcoin-knots`; `bitcoin-core` is installed but user-stopped. +- `/var/lib/archipelago/user-stopped.json` should include `bitcoin-core` so daemon restart does not resurrect it into a default-port conflict. +- Fedimint fixed issues: + - stale rootless Podman runtime storage was handled by recreate-on-start-failure path. + - data ownership fixed for gateway and federation DB lock files. + - Bitcoin RPC DNS fixed via `host.archipelago` host alias. +- IndeedHub full lifecycle passed after forcing the dedicated stack installer path, which removes stale stack containers and recreates network aliases and volumes. + +## Focused Remote Passes + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=bitcoin-knots ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=fedimint ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=indeedhub ARCHY_FULL_LIFECYCLE=1 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +Result for each focused run: `all checks passed`. + +## Immediate Next Steps + +1. Run broad audit: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_STABILITY_SECONDS=5 tests/lifecycle/remote-lifecycle.sh +``` + +2. Continue app-by-app for any installed package that broad audit reports as non-running or unhealthy. + +3. Resume Electrum full lifecycle with strict Tor/QR checks if Electrum remains in scope. Previous run was user-aborted during `electrumx: install`: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=electrumx ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +4. If Electrum fails, capture current service and port state: + +```bash +ssh -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no archipelago@192.168.1.198 'systemctl is-active archipelago.service; systemctl is-active nginx; ss -ltn | grep -E ":(50001|50002|18083|8081|8080|10009|9735)" || true; podman ps -a --format "{{.Names}} {{.Status}} {{.Ports}}" | egrep "electrs|electrum|lnd|nostr" || true' +``` + +5. LND commands that passed and can be rerun as a regression check: + +```bash +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd tests/lifecycle/remote-lifecycle.sh +ARCHY_HOST=192.168.1.198 ARCHY_PASSWORD=password123 ARCHY_APPS=lnd ARCHY_FULL_LIFECYCLE=1 tests/lifecycle/remote-lifecycle.sh +``` + +6. If `/app/lnd/` regresses to `502`, inspect companion unit and logs: + +```bash +ssh -i /home/archipelago/.ssh/id_ed25519 -o StrictHostKeyChecking=no archipelago@192.168.1.198 'systemctl --user status archy-lnd-ui.service --no-pager -l 2>&1 | sed -n "1,160p"; test -f ~/.config/containers/systemd/archy-lnd-ui.container && sed -n "1,160p" ~/.config/containers/systemd/archy-lnd-ui.container || true; journalctl --user -u archy-lnd-ui.service -n 160 --no-pager 2>&1 | sed -n "1,160p"' +``` + +7. If `package.stop lnd` regresses and does not stop the container, inspect runtime stop path in: + +- `core/archipelago/src/api/rpc/package/runtime.rs` +- `core/archipelago/src/container/prod_orchestrator.rs` + +Likely issue: state scanner/reconciler or companion handling re-starts LND during stop/uninstall, or stop path waits on package state while container is being reconciled. + +## Previously Fixed Live Issues On `.198` + +- stale `fedimint=removing` recovered. +- orphaned `filebrowser` rootlessport on `8083` cleared. +- orphaned `bitcoin-core` rootlessport on `8332/8333` cleared. +- LND missing `bitcoin.active`/backend config fixed. +- LND config permission denied fixed via sudo write. +- Companion start/restart race mostly fixed by synchronous companion reconciliation. +- Bitcoin Core/Knots install-time conflict removed while preserving runtime default-port safety. +- Bitcoin Core unintended resurrection after daemon restart fixed through persistent user-stopped state. +- Fedimint DB lock permission errors fixed through `1000:1000` data ownership. +- Fedimint Bitcoin RPC DNS errors fixed through `host.archipelago`. +- IndeedHub stale stopped stack fixed by reinstalling through the dedicated stack installer. + +## Do Not Forget + +- Do not release until strict lifecycle and app-specific UI probes pass. +- Preserve data during destructive lifecycle testing unless explicitly instructed otherwise. +- Do not revert user/other-agent worktree changes. +- `.228` still needs SSH fixed or must be tested RPC/UI-only. diff --git a/image-recipe/_archived/build-auto-installer-iso.sh b/image-recipe/_archived/build-auto-installer-iso.sh index c178df34..a4f647cb 100755 --- a/image-recipe/_archived/build-auto-installer-iso.sh +++ b/image-recipe/_archived/build-auto-installer-iso.sh @@ -211,10 +211,15 @@ check_tools() { fi fi - # Ensure insecure registry config for Archipelago app registry (HTTP) - if [ "$CONTAINER_CMD" = "podman" ]; then + # Ensure insecure registry config for Archipelago app registries that are + # intentionally served over HTTP during ISO builds. + if [[ "$CONTAINER_CMD" == podman* ]]; then mkdir -p /etc/containers/registries.conf.d cat > /etc/containers/registries.conf.d/archipelago.conf <<'REGCONF' +[[registry]] +location = "146.59.87.168:3000" +insecure = true + [[registry]] location = "git.tx1138.com" insecure = true @@ -227,6 +232,15 @@ check_tools mkdir -p "$WORK_DIR" mkdir -p "$OUTPUT_DIR" +container_pull() { + local image="$1" + if [[ "$CONTAINER_CMD" == podman* && "$image" == 146.59.87.168:3000/* ]]; then + $CONTAINER_CMD pull --tls-verify=false --platform "$CONTAINER_PLATFORM" "$image" + else + $CONTAINER_CMD pull --platform "$CONTAINER_PLATFORM" "$image" + fi +} + # ============================================================================= # STEP 1: Build complete root filesystem using Docker # ============================================================================= @@ -1289,7 +1303,7 @@ if [ "$UNBUNDLED" = "1" ]; then echo " ✅ Using cached: $CORE_FILE" else echo " Pulling $CORE_IMAGE ($CONTAINER_PLATFORM)..." - if $CONTAINER_CMD pull --platform $CONTAINER_PLATFORM "$CORE_IMAGE"; then + if container_pull "$CORE_IMAGE"; then $CONTAINER_CMD save "$CORE_IMAGE" -o "$IMAGES_DIR/$CORE_FILE" 2>/dev/null && \ echo " ✅ Saved core: $CORE_FILE ($(du -h "$IMAGES_DIR/$CORE_FILE" | cut -f1))" || \ echo " ⚠️ Failed to save $CORE_IMAGE" @@ -1367,7 +1381,7 @@ echo "$CONTAINER_IMAGES" | while read -r image filename; do echo " ✅ Using cached: $filename" else echo " Pulling $image ($CONTAINER_PLATFORM)..." - if $CONTAINER_CMD pull --platform $CONTAINER_PLATFORM "$image"; then + if container_pull "$image"; then echo " Saving $filename..." if $CONTAINER_CMD save "$image" -o "$tarpath" 2>/dev/null; then echo " ✅ Saved: $(du -h "$tarpath" | cut -f1)" @@ -3456,9 +3470,9 @@ echo "" echo "Step 6: Creating bootable ISO..." if [ "$UNBUNDLED" = "1" ]; then - OUTPUT_ISO="$OUTPUT_DIR/archipelago-installer-unbundled-${ARCH}.iso" + OUTPUT_ISO="$OUTPUT_DIR/archipelago-installer-${BUILD_VERSION}-unbundled-${ARCH}.iso" else - OUTPUT_ISO="$OUTPUT_DIR/archipelago-installer-${ARCH}.iso" + OUTPUT_ISO="$OUTPUT_DIR/archipelago-installer-${BUILD_VERSION}-${ARCH}.iso" fi # Use the proven MBR code for hybrid USB boot diff --git a/image-recipe/configs/nginx-archipelago.conf b/image-recipe/configs/nginx-archipelago.conf index 5d436450..139ec083 100644 --- a/image-recipe/configs/nginx-archipelago.conf +++ b/image-recipe/configs/nginx-archipelago.conf @@ -156,6 +156,16 @@ server { error_page 502 503 = @backend_unavailable; error_page 504 = @backend_timeout; } + location /bitcoin-status { + proxy_pass http://127.0.0.1:5678/bitcoin-status; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_connect_timeout 10s; + proxy_read_timeout 10s; + proxy_send_timeout 5s; + error_page 502 503 = @backend_unavailable; + error_page 504 = @backend_timeout; + } location /electrs-status { proxy_pass http://127.0.0.1:5678/electrs-status; proxy_http_version 1.1; @@ -969,6 +979,16 @@ server { error_page 502 503 = @backend_unavailable; error_page 504 = @backend_timeout; } + location /bitcoin-status { + proxy_pass http://127.0.0.1:5678/bitcoin-status; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_connect_timeout 10s; + proxy_read_timeout 10s; + proxy_send_timeout 5s; + error_page 502 503 = @backend_unavailable; + error_page 504 = @backend_timeout; + } location /electrs-status { proxy_pass http://127.0.0.1:5678/electrs-status; proxy_http_version 1.1; diff --git a/neode-ui/package-lock.json b/neode-ui/package-lock.json index c3bab477..14ca6f38 100644 --- a/neode-ui/package-lock.json +++ b/neode-ui/package-lock.json @@ -1,12 +1,12 @@ { "name": "neode-ui", - "version": "1.7.53-alpha", + "version": "1.7.54-alpha", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "neode-ui", - "version": "1.7.53-alpha", + "version": "1.7.54-alpha", "dependencies": { "@types/dompurify": "^3.0.5", "@vue-leaflet/vue-leaflet": "^0.10.1", diff --git a/neode-ui/package.json b/neode-ui/package.json index bade5448..d3bc74f0 100644 --- a/neode-ui/package.json +++ b/neode-ui/package.json @@ -1,7 +1,7 @@ { "name": "neode-ui", "private": true, - "version": "1.7.53-alpha", + "version": "1.7.54-alpha", "type": "module", "scripts": { "start": "./start-dev.sh", diff --git a/neode-ui/src/stores/app.ts b/neode-ui/src/stores/app.ts index 319da6eb..a54de62a 100644 --- a/neode-ui/src/stores/app.ts +++ b/neode-ui/src/stores/app.ts @@ -14,7 +14,7 @@ export const useAppStore = defineStore('app', () => { // Writable refs — delegate reads and writes to the sub-stores const { isAuthenticated, isLoading, error } = storeToRefs(auth) - const { data, isConnected, isReconnecting } = storeToRefs(sync) + const { data, isConnected, isReconnecting, hasLoadedInitialData } = storeToRefs(sync) // Read-only computed — delegate to sub-stores const { serverInfo, packages, peerHealth, uiData } = storeToRefs(sync) @@ -30,6 +30,7 @@ export const useAppStore = defineStore('app', () => { data, isConnected, isReconnecting, + hasLoadedInitialData, // Sync computed (read-only) serverInfo, diff --git a/neode-ui/src/stores/sync.ts b/neode-ui/src/stores/sync.ts index 75f992d2..38284333 100644 --- a/neode-ui/src/stores/sync.ts +++ b/neode-ui/src/stores/sync.ts @@ -11,6 +11,7 @@ export const useSyncStore = defineStore('sync', () => { const data = ref(null) const isConnected = ref(false) const isReconnecting = ref(false) + const hasLoadedInitialData = ref(false) let isWsSubscribed = false let isWsConnecting = false @@ -47,12 +48,14 @@ export const useSyncStore = defineStore('sync', () => { if (update?.type === 'initial' && update?.data) { if (import.meta.env.DEV) console.log('[Store] Received initial data from mock backend') data.value = update.data + hasLoadedInitialData.value = true isConnected.value = true isReconnecting.value = false } // Handle real backend format: {rev: 0, data: {...}} else if (update?.data && update?.rev !== undefined) { data.value = update.data + hasLoadedInitialData.value = true isConnected.value = true isReconnecting.value = false } @@ -90,6 +93,7 @@ export const useSyncStore = defineStore('sync', () => { const freshState = await rpcClient.call<{ data: DataModel }>({ method: 'server.get-state' }) if (freshState?.data) { data.value = freshState.data + hasLoadedInitialData.value = true } } catch { // Non-fatal: WebSocket patches will still work @@ -149,11 +153,13 @@ export const useSyncStore = defineStore('sync', () => { theme: 'dark', }, } + hasLoadedInitialData.value = false } /** Reset sync state on logout — called by auth store */ function resetOnLogout(): void { data.value = null + hasLoadedInitialData.value = false isWsSubscribed = false wsClient.disconnect() isConnected.value = false @@ -165,6 +171,7 @@ export const useSyncStore = defineStore('sync', () => { data, isConnected, isReconnecting, + hasLoadedInitialData, // Computed serverInfo, diff --git a/neode-ui/src/views/AppDetails.vue b/neode-ui/src/views/AppDetails.vue index 10e7ff43..e8d65a52 100644 --- a/neode-ui/src/views/AppDetails.vue +++ b/neode-ui/src/views/AppDetails.vue @@ -267,8 +267,7 @@ const canLaunch = computed(() => { if (!pkg.value) return false if (isWebOnly.value) return true const hasUI = !!(pkg.value.manifest.interfaces?.main?.ui || pkg.value.installed?.['interface-addresses']?.main) - const isRunning = pkg.value.state === 'running' - return hasUI && isRunning + return hasUI && pkg.value.state === 'running' && pkg.value.health !== 'starting' && pkg.value.health !== 'unhealthy' }) const features = computed(() => [ diff --git a/neode-ui/src/views/Apps.vue b/neode-ui/src/views/Apps.vue index b332a010..f1d72aee 100644 --- a/neode-ui/src/views/Apps.vue +++ b/neode-ui/src/views/Apps.vue @@ -40,19 +40,14 @@ -
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+ + + + +

Loading apps

+

Checking the latest app status before showing launch controls.

@@ -222,6 +217,8 @@ const packages = computed(() => { const categoriesWithApps = useCategoriesWithApps(packages, ALL_CATEGORIES) +const isLoadingApps = computed(() => !store.hasLoadedInitialData && !connectionError.value) + // Connection error state const connectionError = ref('') let connectionTimer: ReturnType | undefined @@ -230,7 +227,7 @@ onMounted(() => { appsAnimationDone = true if (!store.isConnected) { connectionTimer = setTimeout(() => { - if (!store.isConnected && sortedPackageEntries.value.length === 0) { + if (!store.hasLoadedInitialData && sortedPackageEntries.value.length === 0) { connectionError.value = 'Unable to connect to server. Check that the backend is running.' } }, 15000) diff --git a/neode-ui/src/views/appSession/AppSessionFrame.vue b/neode-ui/src/views/appSession/AppSessionFrame.vue index d8e1205b..4848b3a7 100644 --- a/neode-ui/src/views/appSession/AppSessionFrame.vue +++ b/neode-ui/src/views/appSession/AppSessionFrame.vue @@ -34,7 +34,7 @@

-
+