fix(rpc): async container stop/start/restart; widen state mapping
RPC handlers no longer block on podman operations. container-stop on
bitcoin-core used to hold the connection for up to 600s while the UI
showed a frozen spinner; it now returns in under a second with
{status: stopping} after flipping the package state to Stopping and
broadcasting over WebSocket. Same treatment for container-start and
the new container-restart route.
Widens container-list state mapping to emit the transitional variants
(stopping, starting, restarting, installing, updating, removing,
installed, and the backup states) instead of collapsing them to
"unknown". Keeps the mapping in sync with the UI ContainerStatus.state
union so the dashboard can render the right transitional label.
Mirrors the treatment in package/runtime.rs for package.start,
package.stop, and package.restart. The body of each handler is lifted
into pure do_package_* helpers that the background task runs; state
flipping is bracketed around the spawn with revert on error. The
pre-existing post-start exit-check verification and restart stop+start
fallback run inside the spawned task, not the RPC body.
Adds container-restart route to the dispatcher. mark_user_stopped
continues to run BEFORE the spawn, preserving the ordering contract
with the crash recovery layer at runtime.rs:145-148.
This commit is contained in:
parent
5baced5f5b
commit
39dd1d9dcc
@ -1,4 +1,5 @@
|
||||
use super::package::validate_app_id;
|
||||
use super::transitional::Op;
|
||||
use super::RpcHandler;
|
||||
use anyhow::{Context, Result};
|
||||
|
||||
@ -62,11 +63,6 @@ impl RpcHandler {
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let orchestrator = self
|
||||
.orchestrator
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow::anyhow!("Container orchestrator not available"))?;
|
||||
|
||||
let params = params.ok_or_else(|| anyhow::anyhow!("Missing params"))?;
|
||||
let app_id = params
|
||||
.get("app_id")
|
||||
@ -74,23 +70,23 @@ impl RpcHandler {
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_id"))?;
|
||||
validate_app_id(app_id)?;
|
||||
|
||||
orchestrator
|
||||
.start(app_id)
|
||||
.await
|
||||
.context("Failed to start container")?;
|
||||
// User explicitly started the app — clear the user-stopped marker so
|
||||
// crash recovery / health monitor won't second-guess it. Must happen
|
||||
// BEFORE the spawn (see runtime.rs:145-148 for the symmetric stop
|
||||
// side and the ordering contract crash recovery depends on).
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, app_id).await;
|
||||
|
||||
Ok(serde_json::json!({ "status": "started" }))
|
||||
// spawn_transitional returns as soon as the background task is
|
||||
// launched (<1s). The UI sees Starting… immediately via WebSocket.
|
||||
self.spawn_transitional(Op::Start, app_id.to_string()).await?;
|
||||
|
||||
Ok(serde_json::json!({ "status": "starting" }))
|
||||
}
|
||||
|
||||
pub(super) async fn handle_container_stop(
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let orchestrator = self
|
||||
.orchestrator
|
||||
.as_ref()
|
||||
.ok_or_else(|| anyhow::anyhow!("Container orchestrator not available"))?;
|
||||
|
||||
let params = params.ok_or_else(|| anyhow::anyhow!("Missing params"))?;
|
||||
let app_id = params
|
||||
.get("app_id")
|
||||
@ -98,12 +94,40 @@ impl RpcHandler {
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_id"))?;
|
||||
validate_app_id(app_id)?;
|
||||
|
||||
orchestrator
|
||||
.stop(app_id)
|
||||
.await
|
||||
.context("Failed to stop container")?;
|
||||
// Mark as user-stopped BEFORE the spawn — ordering is load-bearing
|
||||
// (crash recovery / health monitor inspect this flag concurrently
|
||||
// with the in-flight stop; see runtime.rs:145-148 for the package
|
||||
// path that also writes this in the same order).
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, app_id).await;
|
||||
|
||||
Ok(serde_json::json!({ "status": "stopped" }))
|
||||
// podman stop -t 600 (bitcoin-core) / -t 330 (lnd) runs in the
|
||||
// background; the RPC returns now with "stopping".
|
||||
self.spawn_transitional(Op::Stop, app_id.to_string()).await?;
|
||||
|
||||
Ok(serde_json::json!({ "status": "stopping" }))
|
||||
}
|
||||
|
||||
pub(super) async fn handle_container_restart(
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
) -> Result<serde_json::Value> {
|
||||
let params = params.ok_or_else(|| anyhow::anyhow!("Missing params"))?;
|
||||
let app_id = params
|
||||
.get("app_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_id"))?;
|
||||
validate_app_id(app_id)?;
|
||||
|
||||
// Restart does not mark user-stopped (the user wants the app to
|
||||
// keep running). Clear the marker as a defensive measure in case a
|
||||
// prior stop left it set and the restart is intended to revive the
|
||||
// normal running state.
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, app_id).await;
|
||||
|
||||
self.spawn_transitional(Op::Restart, app_id.to_string())
|
||||
.await?;
|
||||
|
||||
Ok(serde_json::json!({ "status": "restarting" }))
|
||||
}
|
||||
|
||||
pub(super) async fn handle_container_remove(
|
||||
@ -145,12 +169,25 @@ impl RpcHandler {
|
||||
.package_data
|
||||
.iter()
|
||||
.map(|(id, pkg)| {
|
||||
// Keep this mapping in sync with the UI's
|
||||
// ContainerStatus.state union in
|
||||
// neode-ui/src/api/container-client.ts. The UI maps
|
||||
// transitional variants to single-button labels
|
||||
// (Stopping… / Starting… / Restarting…).
|
||||
let state = match &pkg.state {
|
||||
crate::data_model::PackageState::Running => "running",
|
||||
crate::data_model::PackageState::Stopped => "stopped",
|
||||
crate::data_model::PackageState::Exited => "exited",
|
||||
crate::data_model::PackageState::Starting => "created",
|
||||
_ => "unknown",
|
||||
crate::data_model::PackageState::Starting => "starting",
|
||||
crate::data_model::PackageState::Stopping => "stopping",
|
||||
crate::data_model::PackageState::Restarting => "restarting",
|
||||
crate::data_model::PackageState::Installing => "installing",
|
||||
crate::data_model::PackageState::Installed => "installed",
|
||||
crate::data_model::PackageState::Updating => "updating",
|
||||
crate::data_model::PackageState::Removing => "removing",
|
||||
crate::data_model::PackageState::CreatingBackup => "creating-backup",
|
||||
crate::data_model::PackageState::RestoringBackup => "restoring-backup",
|
||||
crate::data_model::PackageState::BackingUp => "backing-up",
|
||||
};
|
||||
let lan = pkg
|
||||
.installed
|
||||
|
||||
@ -36,6 +36,7 @@ impl RpcHandler {
|
||||
"container-install" => self.handle_container_install(params).await,
|
||||
"container-start" => self.handle_container_start(params).await,
|
||||
"container-stop" => self.handle_container_stop(params).await,
|
||||
"container-restart" => self.handle_container_restart(params).await,
|
||||
"container-remove" => self.handle_container_remove(params).await,
|
||||
"container-list" => self.handle_container_list().await,
|
||||
"container-status" => self.handle_container_status(params).await,
|
||||
|
||||
@ -3,7 +3,10 @@ use super::dependencies::ordered_containers_for_start;
|
||||
use super::install::install_log;
|
||||
use super::validation::validate_app_id;
|
||||
use crate::api::rpc::RpcHandler;
|
||||
use crate::data_model::PackageState;
|
||||
use anyhow::{Context, Result};
|
||||
use std::sync::Arc;
|
||||
use tracing::warn;
|
||||
|
||||
/// Per-container graceful shutdown timeout in seconds.
|
||||
/// Bitcoin Core needs 600s to flush UTXO set, LND 330s for channel state,
|
||||
@ -25,6 +28,12 @@ pub fn stop_timeout_secs(container_name: &str) -> &'static str {
|
||||
|
||||
impl RpcHandler {
|
||||
/// Start a package: start all containers in dependency order.
|
||||
///
|
||||
/// Returns immediately with `{ "status": "starting" }` after flipping
|
||||
/// the package state to `Starting` in the StateManager. The actual
|
||||
/// podman-start sequence + post-start exit verification runs in a
|
||||
/// background task. On success the state becomes `Running`; on error
|
||||
/// it reverts to the pre-transition state.
|
||||
pub(in crate::api::rpc) async fn handle_package_start(
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
@ -42,83 +51,52 @@ impl RpcHandler {
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
// Clear user-stopped flag — user explicitly started this app
|
||||
// Clear user-stopped flag — user explicitly started this app.
|
||||
// Must happen BEFORE the spawn (ordering contract with crash recovery).
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, package_id).await;
|
||||
for name in &to_start {
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
|
||||
let package_id_owned = package_id.to_string();
|
||||
let state_manager = Arc::clone(&self.state_manager);
|
||||
let pre_state =
|
||||
flip_package_state(&state_manager, &package_id_owned, PackageState::Starting).await;
|
||||
|
||||
install_log(&format!(
|
||||
"START: {} (containers: {:?})",
|
||||
package_id, to_start
|
||||
package_id_owned, to_start
|
||||
))
|
||||
.await;
|
||||
let mut errors = Vec::new();
|
||||
for (i, name) in to_start.iter().enumerate() {
|
||||
// Brief delay between dependent containers to allow initialization
|
||||
if i > 0 {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
}
|
||||
tracing::info!("Starting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to start {}: {}", name, stderr);
|
||||
install_log(&format!("START FAIL: {} — {}", name, stderr)).await;
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Start failed: {}", errors.join("; ")));
|
||||
}
|
||||
|
||||
// Verify containers actually reached running state (podman start can
|
||||
// succeed even if the container exits immediately after)
|
||||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||
for name in &to_start {
|
||||
let status = tokio::process::Command::new("podman")
|
||||
.args(["inspect", name, "--format", "{{.State.Status}}"])
|
||||
.output()
|
||||
.await;
|
||||
if let Ok(o) = status {
|
||||
let state = String::from_utf8_lossy(&o.stdout).trim().to_string();
|
||||
if state == "exited" {
|
||||
let logs = tokio::process::Command::new("podman")
|
||||
.args(["logs", "--tail", "5", name])
|
||||
.output()
|
||||
tokio::spawn(async move {
|
||||
match do_package_start(&to_start).await {
|
||||
Ok(()) => {
|
||||
set_package_state(&state_manager, &package_id_owned, PackageState::Running)
|
||||
.await;
|
||||
let log_text = logs
|
||||
.map(|o| {
|
||||
let combined = format!(
|
||||
"{}{}",
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr)
|
||||
);
|
||||
combined.chars().take(200).collect::<String>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
tracing::error!("Container {} exited after start: {}", name, log_text);
|
||||
install_log(&format!("START EXITED: {} — {}", name, log_text)).await;
|
||||
errors.push(format!("{}: exited after start", name));
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("package.start {} failed: {:#}", package_id_owned, e);
|
||||
install_log(&format!("START FAIL: {} — {:#}", package_id_owned, e)).await;
|
||||
if let Some(prev) = pre_state {
|
||||
set_package_state(&state_manager, &package_id_owned, prev).await;
|
||||
} else {
|
||||
warn!(
|
||||
"package.start {}: no pre-state recorded; relying on next scan",
|
||||
package_id_owned
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Containers exited after start: {}",
|
||||
errors.join("; ")
|
||||
));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
Ok(serde_json::json!({ "status": "starting" }))
|
||||
}
|
||||
|
||||
/// Stop a package: mark as user-stopped and stop all containers.
|
||||
///
|
||||
/// Returns immediately with `{ "status": "stopping" }`. podman stop
|
||||
/// (up to 600s for bitcoin-core) runs in the background.
|
||||
pub(in crate::api::rpc) async fn handle_package_stop(
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
@ -136,43 +114,48 @@ impl RpcHandler {
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
install_log(&format!(
|
||||
"STOP: {} (containers: {:?})",
|
||||
package_id, containers
|
||||
))
|
||||
.await;
|
||||
// Mark as user-stopped so health monitor and crash recovery don't auto-restart
|
||||
// Mark as user-stopped BEFORE the spawn so health monitor and
|
||||
// crash recovery don't auto-restart mid-flight. Ordering is
|
||||
// load-bearing — see runtime.rs:145-148 original note.
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, package_id).await;
|
||||
for name in &containers {
|
||||
crate::crash_recovery::mark_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
|
||||
let mut errors = Vec::new();
|
||||
for name in &containers {
|
||||
tracing::info!(
|
||||
"Stopping container: {} (timeout: {}s)",
|
||||
name,
|
||||
stop_timeout_secs(name)
|
||||
);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman stop {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to stop {}: {}", name, stderr);
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
let package_id_owned = package_id.to_string();
|
||||
let state_manager = Arc::clone(&self.state_manager);
|
||||
let pre_state =
|
||||
flip_package_state(&state_manager, &package_id_owned, PackageState::Stopping).await;
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Stop failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
install_log(&format!(
|
||||
"STOP: {} (containers: {:?})",
|
||||
package_id_owned, containers
|
||||
))
|
||||
.await;
|
||||
|
||||
tokio::spawn(async move {
|
||||
match do_package_stop(&containers).await {
|
||||
Ok(()) => {
|
||||
set_package_state(&state_manager, &package_id_owned, PackageState::Stopped)
|
||||
.await;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("package.stop {} failed: {:#}", package_id_owned, e);
|
||||
install_log(&format!("STOP FAIL: {} — {:#}", package_id_owned, e)).await;
|
||||
if let Some(prev) = pre_state {
|
||||
set_package_state(&state_manager, &package_id_owned, prev).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(serde_json::json!({ "status": "stopping" }))
|
||||
}
|
||||
|
||||
/// Restart a package: restart all containers.
|
||||
///
|
||||
/// Returns immediately with `{ "status": "restarting" }`. The restart
|
||||
/// (up to 600s per container for bitcoin-core) runs in the background.
|
||||
pub(in crate::api::rpc) async fn handle_package_restart(
|
||||
&self,
|
||||
params: Option<serde_json::Value>,
|
||||
@ -190,55 +173,42 @@ impl RpcHandler {
|
||||
return Err(anyhow::anyhow!("No containers found for {}", package_id));
|
||||
}
|
||||
|
||||
// Restart does not mark user-stopped; user wants the app to keep
|
||||
// running. Clear any lingering marker so downstream layers don't
|
||||
// interpret the brief podman stop as user intent.
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, package_id).await;
|
||||
for name in &containers {
|
||||
crate::crash_recovery::clear_user_stopped(&self.config.data_dir, name).await;
|
||||
}
|
||||
|
||||
let package_id_owned = package_id.to_string();
|
||||
let state_manager = Arc::clone(&self.state_manager);
|
||||
let pre_state =
|
||||
flip_package_state(&state_manager, &package_id_owned, PackageState::Restarting).await;
|
||||
|
||||
install_log(&format!(
|
||||
"RESTART: {} (containers: {:?})",
|
||||
package_id, containers
|
||||
package_id_owned, containers
|
||||
))
|
||||
.await;
|
||||
let mut errors = Vec::new();
|
||||
for name in &containers {
|
||||
tracing::info!("Restarting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["restart", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman restart {}", name))?;
|
||||
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::warn!(
|
||||
"podman restart {} failed: {}, trying stop+start",
|
||||
name,
|
||||
stderr
|
||||
);
|
||||
|
||||
// Fallback: stop then start (handles rootless podman loopback issues)
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await;
|
||||
let start_out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
|
||||
if !start_out.status.success() {
|
||||
let start_err = String::from_utf8_lossy(&start_out.stderr)
|
||||
.trim()
|
||||
.to_string();
|
||||
tracing::error!("stop+start {} also failed: {}", name, start_err);
|
||||
errors.push(format!("{}: {}", name, start_err));
|
||||
} else {
|
||||
tracing::info!("Restarted {} via stop+start fallback", name);
|
||||
tokio::spawn(async move {
|
||||
match do_package_restart(&containers).await {
|
||||
Ok(()) => {
|
||||
set_package_state(&state_manager, &package_id_owned, PackageState::Running)
|
||||
.await;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("package.restart {} failed: {:#}", package_id_owned, e);
|
||||
install_log(&format!("RESTART FAIL: {} — {:#}", package_id_owned, e)).await;
|
||||
if let Some(prev) = pre_state {
|
||||
set_package_state(&state_manager, &package_id_owned, prev).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Restart failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(serde_json::Value::Null)
|
||||
Ok(serde_json::json!({ "status": "restarting" }))
|
||||
}
|
||||
|
||||
/// Uninstall a package: stop and remove all related containers, clean data.
|
||||
@ -579,3 +549,186 @@ impl RpcHandler {
|
||||
Ok(serde_json::json!({ "status": "stopped", "app_id": app_id }))
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Background workers for async package lifecycle RPCs.
|
||||
//
|
||||
// Extracted from the pre-async RPC handlers so the transitional state is
|
||||
// visible to the UI immediately. Each worker is pure IO over podman + the
|
||||
// crash_recovery helpers — no StateManager access here so we don't need
|
||||
// a handler reference. The caller does state flipping before/after.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Start containers in dependency order. Includes the post-start 3s wait +
|
||||
/// exit-check verification from the original synchronous handler (critical
|
||||
/// for catching "podman start succeeded but container immediately exited"
|
||||
/// failure modes).
|
||||
async fn do_package_start(to_start: &[String]) -> Result<()> {
|
||||
let mut errors = Vec::new();
|
||||
for (i, name) in to_start.iter().enumerate() {
|
||||
if i > 0 {
|
||||
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
||||
}
|
||||
tracing::info!("Starting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to start {}: {}", name, stderr);
|
||||
install_log(&format!("START FAIL: {} — {}", name, stderr)).await;
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Start failed: {}", errors.join("; ")));
|
||||
}
|
||||
|
||||
// Post-start exit verification (podman start can succeed even if the
|
||||
// container exits immediately after).
|
||||
tokio::time::sleep(std::time::Duration::from_secs(3)).await;
|
||||
for name in to_start {
|
||||
let status = tokio::process::Command::new("podman")
|
||||
.args(["inspect", name, "--format", "{{.State.Status}}"])
|
||||
.output()
|
||||
.await;
|
||||
if let Ok(o) = status {
|
||||
let state = String::from_utf8_lossy(&o.stdout).trim().to_string();
|
||||
if state == "exited" {
|
||||
let logs = tokio::process::Command::new("podman")
|
||||
.args(["logs", "--tail", "5", name])
|
||||
.output()
|
||||
.await;
|
||||
let log_text = logs
|
||||
.map(|o| {
|
||||
let combined = format!(
|
||||
"{}{}",
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr)
|
||||
);
|
||||
combined.chars().take(200).collect::<String>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
tracing::error!("Container {} exited after start: {}", name, log_text);
|
||||
install_log(&format!("START EXITED: {} — {}", name, log_text)).await;
|
||||
errors.push(format!("{}: exited after start", name));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Containers exited after start: {}",
|
||||
errors.join("; ")
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Stop all containers with their per-container graceful-shutdown timeout.
|
||||
async fn do_package_stop(containers: &[String]) -> Result<()> {
|
||||
let mut errors = Vec::new();
|
||||
for name in containers {
|
||||
tracing::info!(
|
||||
"Stopping container: {} (timeout: {}s)",
|
||||
name,
|
||||
stop_timeout_secs(name)
|
||||
);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman stop {}", name))?;
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::error!("Failed to stop {}: {}", name, stderr);
|
||||
errors.push(format!("{}: {}", name, stderr));
|
||||
}
|
||||
}
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Stop failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Restart via `podman restart`, falling back to stop+start when restart
|
||||
/// fails (rootless podman loopback issues).
|
||||
async fn do_package_restart(containers: &[String]) -> Result<()> {
|
||||
let mut errors = Vec::new();
|
||||
for name in containers {
|
||||
tracing::info!("Restarting container: {}", name);
|
||||
let out = tokio::process::Command::new("podman")
|
||||
.args(["restart", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman restart {}", name))?;
|
||||
|
||||
if !out.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string();
|
||||
tracing::warn!(
|
||||
"podman restart {} failed: {}, trying stop+start",
|
||||
name,
|
||||
stderr
|
||||
);
|
||||
// Fallback: stop then start
|
||||
let _ = tokio::process::Command::new("podman")
|
||||
.args(["stop", "-t", stop_timeout_secs(name), name])
|
||||
.output()
|
||||
.await;
|
||||
let start_out = tokio::process::Command::new("podman")
|
||||
.args(["start", name])
|
||||
.output()
|
||||
.await
|
||||
.context(format!("Failed to exec podman start {}", name))?;
|
||||
if !start_out.status.success() {
|
||||
let start_err = String::from_utf8_lossy(&start_out.stderr)
|
||||
.trim()
|
||||
.to_string();
|
||||
tracing::error!("stop+start {} also failed: {}", name, start_err);
|
||||
errors.push(format!("{}: {}", name, start_err));
|
||||
} else {
|
||||
tracing::info!("Restarted {} via stop+start fallback", name);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !errors.is_empty() {
|
||||
return Err(anyhow::anyhow!("Restart failed: {}", errors.join("; ")));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Flip the primary package entry's state and return the pre-transition
|
||||
/// state for revert on error. Mirrors `transitional::flip_to_transitional`
|
||||
/// but lives here because the package path keys by `package_id` (which may
|
||||
/// differ from the container name used by orchestrator-level entries).
|
||||
async fn flip_package_state(
|
||||
state_manager: &crate::state::StateManager,
|
||||
package_id: &str,
|
||||
transitional: PackageState,
|
||||
) -> Option<PackageState> {
|
||||
let (mut data, _) = state_manager.get_snapshot().await;
|
||||
let prev = data.package_data.get(package_id).map(|e| e.state.clone());
|
||||
if let Some(entry) = data.package_data.get_mut(package_id) {
|
||||
entry.state = transitional;
|
||||
state_manager.update_data(data).await;
|
||||
}
|
||||
prev
|
||||
}
|
||||
|
||||
/// Write the package entry's final state. No-op if the entry has since
|
||||
/// been removed (uninstall race).
|
||||
async fn set_package_state(
|
||||
state_manager: &crate::state::StateManager,
|
||||
package_id: &str,
|
||||
new_state: PackageState,
|
||||
) {
|
||||
let (mut data, _) = state_manager.get_snapshot().await;
|
||||
if let Some(entry) = data.package_data.get_mut(package_id) {
|
||||
if entry.state != new_state {
|
||||
entry.state = new_state;
|
||||
state_manager.update_data(data).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user