diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 1abc5dee..e0b54a2c 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -532,13 +532,37 @@ impl ProdContainerOrchestrator { lm: &LoadedManifest, name: &str, ) -> Result> { + // Skip companion apps — bitcoin-ui / electrs-ui / lnd-ui have shipped + // via Quadlet since v1.7.41 (companion.rs renders the unit). Running + // migration for them races companion rendering: when migration ran + // for archy-bitcoin-ui on .228 2026-05-02 it tore down the running + // companion + tried to start a duplicate unit and failed. Companions + // are managed by their own path, never the legacy podman-create one, + // so there's nothing here to migrate. + let app_id = lm.manifest.app.id.as_str(); + if UI_APP_IDS.contains(&app_id) { + return Ok(None); + } + let unit_dir = quadlet::unit_dir() .await .context("locate user quadlet unit dir for migration check")?; let unit_path = unit_dir.join(format!("{name}.container")); - if tokio::fs::try_exists(&unit_path).await.unwrap_or(false) { - // Already on the Quadlet path — nothing to migrate. - return Ok(None); + // Treat any IO error from try_exists as "the unit might be there" — + // migrating on top of an existing unit is destructive (we'd podman rm + // the running container then fight the existing service over startup + // ordering), so be cautious when we can't read the answer. + match tokio::fs::try_exists(&unit_path).await { + Ok(true) => return Ok(None), + Ok(false) => {} // proceed + Err(e) => { + tracing::warn!( + path = %unit_path.display(), + error = %e, + "Phase 3.3: skipping migration — couldn't determine if Quadlet unit already exists" + ); + return Ok(None); + } } // No unit on disk. If a pre-Phase-3 container exists for this diff --git a/core/archipelago/src/container/quadlet.rs b/core/archipelago/src/container/quadlet.rs index 5c3a06b8..ecd5b07a 100644 --- a/core/archipelago/src/container/quadlet.rs +++ b/core/archipelago/src/container/quadlet.rs @@ -365,8 +365,14 @@ impl QuadletUnit { /// /// Supported shapes: /// - type: tcp, endpoint: "host:port" → `nc -z host port` -/// - type: http, endpoint: "host:port", path → `curl -fsS http://host:port` +/// - type: http, endpoint: "host:port" or "http(s)://host:port", path → curl /// - type: cmd, endpoint: "" → `` verbatim +/// +/// For type=http we accept the endpoint with or without scheme; manifests +/// in the wild use both forms (`localhost:8175` and +/// `http://localhost:8175/`). Earlier we blindly prepended `http://` even +/// when one was already there, producing `http://http://...` HealthCmds +/// that pasted on .228 2026-05-02 and failed every probe. fn translate_health_check( hc: &archipelago_container::HealthCheck, ) -> Option { @@ -379,11 +385,34 @@ fn translate_health_check( format!("nc -z {host} {port}") } "http" => { - let endpoint = hc.endpoint.as_deref()?; - let path = hc.path.as_deref().unwrap_or("/"); + let endpoint = hc.endpoint.as_deref()?.trim(); + // Accept either bare host:port or a full URL. If endpoint + // already includes a scheme we use it as-is; otherwise we + // prepend http://. This keeps existing http://foo manifests + // working and stops the http://http:// double-prefix bug. + let url = if endpoint.starts_with("http://") || endpoint.starts_with("https://") { + endpoint.to_string() + } else { + format!("http://{endpoint}") + }; + // If the endpoint already carried a path component, honour it + // and ignore hc.path (manifests that bake the path into the + // endpoint don't expect to merge a separate path field). + // Otherwise append hc.path (default "/"). + let already_has_path = url + .splitn(4, '/') + .nth(3) + .map(|p| !p.is_empty()) + .unwrap_or(false); + let final_url = if already_has_path { + url + } else { + let path = hc.path.as_deref().unwrap_or("/"); + format!("{url}{path}") + }; // -fsS: fail on non-2xx, silent except on error, show errors. // -m 5: per-request timeout matches the default manifest timeout. - format!("curl -fsS -m 5 http://{endpoint}{path}") + format!("curl -fsS -m 5 {final_url}") } "cmd" => hc.endpoint.as_deref()?.to_string(), _ => return None, @@ -921,6 +950,38 @@ app: assert!(translate_health_check(&badtcp).is_none()); } + #[test] + fn translate_health_check_http_does_not_double_prefix_scheme() { + // Regression: on .228 2026-05-02 we shipped HealthCmds reading + // `curl -fsS -m 5 http://http://localhost:8175/` because manifests + // in the wild carry the scheme inside the endpoint string. Every + // probe failed and the unit looped. Now we accept either form. + use archipelago_container::HealthCheck; + let with_scheme = HealthCheck { + check_type: "http".into(), + endpoint: Some("http://localhost:8175".into()), + path: Some("/".into()), + interval: "30s".into(), + timeout: "5s".into(), + retries: 3, + }; + let h = translate_health_check(&with_scheme).expect("with-scheme must translate"); + assert_eq!(h.cmd, "curl -fsS -m 5 http://localhost:8175/"); + assert!(!h.cmd.contains("http://http://"), "got: {}", h.cmd); + + let with_https = HealthCheck { + check_type: "http".into(), + endpoint: Some("https://example.local/health".into()), + path: None, + interval: "30s".into(), + timeout: "5s".into(), + retries: 3, + }; + let h = translate_health_check(&with_https).expect("https must translate"); + // Endpoint already has /health → don't append the default "/". + assert_eq!(h.cmd, "curl -fsS -m 5 https://example.local/health"); + } + #[test] fn from_manifest_picks_up_health_check() { let yaml = r#"