- Fedimint ecash alongside Cashu: fedimint-clientd (fmcd) HTTP bridge, fedimint_client, fedimint RPC, wallet wiring - Paid peer content: content invoices + streaming content server + content RPCs - Seed-phrase ceremony/reveal RPCs and CLI ceremony tool - LND wallet, mesh status/messaging, app-stack (netbird HTTPS), and decoupled-update wiring; Fedimint Client core app in catalog Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2234 lines
92 KiB
Rust
2234 lines
92 KiB
Rust
//! Update system: check for updates, download deltas, apply with rollback.
|
|
|
|
use anyhow::{Context, Result};
|
|
use chrono::Timelike;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::path::Path;
|
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
|
use tokio::fs;
|
|
use tracing::{debug, info, warn};
|
|
|
|
/// Live download progress counters. Updated by download_component_resumable
|
|
/// as bytes arrive and read by the update.status RPC so the UI can show
|
|
/// a real progress bar instead of a fake creep. Global because the
|
|
/// download runs in one place at a time; no need for per-handler state.
|
|
pub static DOWNLOAD_BYTES: AtomicU64 = AtomicU64::new(0);
|
|
pub static DOWNLOAD_TOTAL: AtomicU64 = AtomicU64::new(0);
|
|
/// Set true to ask the in-flight download loop to bail out at the next
|
|
/// chunk boundary. Read via `is_canceled`; reset at the start of every
|
|
/// `download_update` run. Also flipped by the `cancel_download` RPC.
|
|
pub static DOWNLOAD_CANCEL: AtomicBool = AtomicBool::new(false);
|
|
/// Monotonic ms timestamp of the last time DOWNLOAD_BYTES advanced.
|
|
/// Lets `update.status` flag a download as "stalled" when no bytes have
|
|
/// arrived for a while, so the UI can offer a Cancel button with more
|
|
/// confidence than "looks stuck at 0%".
|
|
pub static DOWNLOAD_PROGRESS_AT: AtomicU64 = AtomicU64::new(0);
|
|
|
|
fn now_ms() -> u64 {
|
|
use std::time::{SystemTime, UNIX_EPOCH};
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.map(|d| d.as_millis() as u64)
|
|
.unwrap_or(0)
|
|
}
|
|
|
|
fn is_canceled() -> bool {
|
|
DOWNLOAD_CANCEL.load(Ordering::Relaxed)
|
|
}
|
|
|
|
/// Parse "MAJOR.MINOR.PATCH[-suffix]" into a tuple; suffix is ignored.
|
|
/// Returns None if the numeric portion can't be parsed — callers should
|
|
/// fall back to string comparison in that case so we don't silently
|
|
/// mis-rank versions we don't understand.
|
|
fn parse_version_triple(v: &str) -> Option<(u32, u32, u32)> {
|
|
let core = v.split('-').next().unwrap_or(v);
|
|
let mut parts = core.split('.');
|
|
let major: u32 = parts.next()?.parse().ok()?;
|
|
let minor: u32 = parts.next()?.parse().ok()?;
|
|
let patch: u32 = parts.next()?.parse().ok()?;
|
|
Some((major, minor, patch))
|
|
}
|
|
|
|
/// Is `candidate` strictly newer than `current`? Used to guard against
|
|
/// the manifest offering a version we've already passed (e.g. a stale
|
|
/// cached manifest or a node that sideloaded past the manifest's
|
|
/// latest). Falls back to string inequality if either version doesn't
|
|
/// parse, preserving the old behaviour for unusual version strings.
|
|
fn is_newer(candidate: &str, current: &str) -> bool {
|
|
match (
|
|
parse_version_triple(candidate),
|
|
parse_version_triple(current),
|
|
) {
|
|
(Some(a), Some(b)) => a > b,
|
|
_ => candidate != current,
|
|
}
|
|
}
|
|
|
|
const DEFAULT_UPDATE_MANIFEST_URL: &str =
|
|
"http://146.59.87.168:3000/lfg2025/archy/raw/branch/main/releases/manifest.json";
|
|
const UPDATE_STATE_FILE: &str = "update_state.json";
|
|
const UPDATE_MIRRORS_FILE: &str = "update-mirrors.json";
|
|
/// Marker written by apply_update() just before the service restart and
|
|
/// consumed by verify_pending_update() in the NEW binary's startup path.
|
|
/// If present, the new binary probes the frontend; if the probe fails,
|
|
/// rollback_update() runs and the service restarts on the old binary.
|
|
/// Closes the "OTA broke nginx fleet-wide with no auto-rollback" failure
|
|
/// mode from 2026-04-22 (v1.7.38/39 tarball-perms bug).
|
|
const PENDING_VERIFY_FILE: &str = "update-pending-verify.json";
|
|
/// Probe timeout for the frontend health check (total time including
|
|
/// retries). Generous: the new binary has to come fully up, health
|
|
/// monitor settles, nginx has to re-read any snippet changes. 90s is
|
|
/// comfortably longer than the slowest observed startup.
|
|
const PENDING_VERIFY_WINDOW_SECS: u64 = 90;
|
|
/// If the marker is older than this on read, treat it as stale and
|
|
/// delete without probing. Guards against a node that somehow failed
|
|
/// to run verification at all (e.g. crashed during startup) from
|
|
/// spontaneously rolling back days later when the user reboots.
|
|
const PENDING_VERIFY_MAX_AGE_SECS: i64 = 600;
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
|
pub struct UpdateMirror {
|
|
/// Full URL to `manifest.json`. Download URLs in the fetched
|
|
/// manifest are origin-rewritten to match this URL's scheme+host+
|
|
/// port, so hitting a mirror pulls its components from the same
|
|
/// mirror rather than whatever absolute host the publisher baked in.
|
|
pub url: String,
|
|
/// Human-readable label for the UI ("Server 1", "Home VPS", …).
|
|
#[serde(default)]
|
|
pub label: String,
|
|
}
|
|
|
|
fn mirrors_path(data_dir: &Path) -> std::path::PathBuf {
|
|
data_dir.join(UPDATE_MIRRORS_FILE)
|
|
}
|
|
|
|
fn default_mirrors() -> Vec<UpdateMirror> {
|
|
vec![UpdateMirror {
|
|
url: DEFAULT_UPDATE_MANIFEST_URL.to_string(),
|
|
label: "Server 1 (OVH)".to_string(),
|
|
}]
|
|
}
|
|
|
|
/// Load the operator-configured mirror list. Returns defaults if the
|
|
/// file doesn't exist yet, so a node OTA'd from a pre-mirrors release
|
|
/// starts with the current default mirrors available without any
|
|
/// manual config.
|
|
///
|
|
/// Migration: any default mirror URL that isn't already in the saved
|
|
/// list gets appended at the end. This lets us add new default mirrors
|
|
/// (e.g. a new Server 3) and have them appear on existing nodes after
|
|
/// an update, without requiring manual config edits. Explicit removals
|
|
/// stick — once an operator removes a URL it stays gone unless it's
|
|
/// later re-added to defaults.
|
|
pub async fn load_mirrors(data_dir: &Path) -> Result<Vec<UpdateMirror>> {
|
|
let path = mirrors_path(data_dir);
|
|
if !path.exists() {
|
|
return Ok(default_mirrors());
|
|
}
|
|
let bytes = fs::read(&path)
|
|
.await
|
|
.with_context(|| format!("read {}", path.display()))?;
|
|
let mut list: Vec<UpdateMirror> =
|
|
serde_json::from_slice(&bytes).with_context(|| format!("parse {}", path.display()))?;
|
|
if list.is_empty() {
|
|
return Ok(default_mirrors());
|
|
}
|
|
|
|
// One-time migrations: drop decommissioned release servers that may be
|
|
// baked into existing nodes' saved mirror lists. Strip them on load so
|
|
// we don't spend seconds per install timing out against a dead/stale host.
|
|
// - 23.182.128.160: Hetzner VPS, decommissioned 2026-04-23.
|
|
// - git.tx1138.com: retired as a release server 2026-06-13 — its main
|
|
// branch had diverged and stopped receiving releases, so it only
|
|
// ever served a stale manifest as the secondary mirror.
|
|
// Exception to the usual "explicit removals stick" rule: the user never
|
|
// chose to add these — they were defaults.
|
|
let before = list.len();
|
|
list.retain(|m| !m.url.contains("23.182.128.160") && !m.url.contains("git.tx1138.com"));
|
|
let mut changed = list.len() != before;
|
|
|
|
// Merge in any default URLs the saved config is missing.
|
|
let known: std::collections::HashSet<String> = list.iter().map(|m| m.url.clone()).collect();
|
|
let defaults = default_mirrors();
|
|
for def in &defaults {
|
|
if !known.contains(&def.url) {
|
|
list.push(def.clone());
|
|
changed = true;
|
|
}
|
|
}
|
|
let before_order: Vec<String> = list.iter().map(|m| m.url.clone()).collect();
|
|
force_ovh_update_primary(&mut list);
|
|
changed = changed || before_order != list.iter().map(|m| m.url.clone()).collect::<Vec<_>>();
|
|
if changed {
|
|
let _ = save_mirrors(data_dir, &list).await;
|
|
}
|
|
Ok(list)
|
|
}
|
|
|
|
fn force_ovh_update_primary(list: &mut Vec<UpdateMirror>) {
|
|
let defaults = default_mirrors();
|
|
for def in &defaults {
|
|
if !list.iter().any(|m| m.url == def.url) {
|
|
list.push(def.clone());
|
|
}
|
|
}
|
|
for mirror in list.iter_mut() {
|
|
if mirror.url == DEFAULT_UPDATE_MANIFEST_URL {
|
|
mirror.label = "Server 1 (OVH)".to_string();
|
|
}
|
|
}
|
|
list.sort_by_key(|m| {
|
|
if m.url == DEFAULT_UPDATE_MANIFEST_URL {
|
|
0
|
|
} else {
|
|
1
|
|
}
|
|
});
|
|
}
|
|
|
|
pub async fn save_mirrors(data_dir: &Path, mirrors: &[UpdateMirror]) -> Result<()> {
|
|
fs::create_dir_all(data_dir)
|
|
.await
|
|
.with_context(|| format!("mkdir {}", data_dir.display()))?;
|
|
let path = mirrors_path(data_dir);
|
|
let tmp = path.with_extension("json.tmp");
|
|
let json = serde_json::to_vec_pretty(mirrors).context("serialize mirrors")?;
|
|
fs::write(&tmp, json)
|
|
.await
|
|
.with_context(|| format!("write {}", tmp.display()))?;
|
|
fs::rename(&tmp, &path)
|
|
.await
|
|
.with_context(|| format!("rename {} -> {}", tmp.display(), path.display()))?;
|
|
Ok(())
|
|
}
|
|
|
|
// ─── Update/app fetch source (origin vs DHT swarm) ──────────────────────────
|
|
//
|
|
// User-selectable per node, persisted in `data_dir/update-source.json`. This is
|
|
// the live-testing switch: keep `Origin` (default) to pull releases/app blobs
|
|
// purely over HTTP from the configured mirrors — the known-good path — or flip
|
|
// to `Swarm` on a test node to exercise the DHT (iroh swarm-assist), knowing the
|
|
// origin still always wins as fallback. Independent of the compile-time
|
|
// `iroh-swarm` feature and the `swarm_enabled` config: if the swarm engine isn't
|
|
// present, `Swarm` simply has no peers to consult and behaves like `Origin`.
|
|
|
|
const UPDATE_SOURCE_FILE: &str = "update-source.json";
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
|
|
#[serde(rename_all = "lowercase")]
|
|
pub enum UpdateSource {
|
|
/// HTTP origin/mirrors only. The safe default and the universal fallback.
|
|
#[default]
|
|
Origin,
|
|
/// Try DHT swarm peers first for content-addressed blobs, origin always wins.
|
|
Swarm,
|
|
}
|
|
|
|
fn default_true() -> bool {
|
|
true
|
|
}
|
|
|
|
/// Node-level swarm preferences, persisted together in `update-source.json`.
|
|
/// Two independent switches:
|
|
/// - `source`: where THIS node fetches (origin vs swarm). Default origin.
|
|
/// - `provide_dht`: whether this node SEEDS/serves blobs to peers. Default on
|
|
/// (opt-out) so the swarm has providers; nodes that don't want to serve can
|
|
/// turn it off without affecting how they fetch.
|
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
|
struct SwarmPrefs {
|
|
#[serde(default)]
|
|
source: UpdateSource,
|
|
#[serde(default = "default_true")]
|
|
provide_dht: bool,
|
|
}
|
|
|
|
impl Default for SwarmPrefs {
|
|
fn default() -> Self {
|
|
Self {
|
|
source: UpdateSource::default(),
|
|
provide_dht: true,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn update_source_path(data_dir: &Path) -> std::path::PathBuf {
|
|
data_dir.join(UPDATE_SOURCE_FILE)
|
|
}
|
|
|
|
async fn load_swarm_prefs(data_dir: &Path) -> SwarmPrefs {
|
|
match fs::read_to_string(update_source_path(data_dir)).await {
|
|
Ok(s) => serde_json::from_str::<SwarmPrefs>(&s).unwrap_or_default(),
|
|
Err(_) => SwarmPrefs::default(),
|
|
}
|
|
}
|
|
|
|
async fn save_swarm_prefs(data_dir: &Path, prefs: &SwarmPrefs) -> Result<()> {
|
|
fs::create_dir_all(data_dir)
|
|
.await
|
|
.with_context(|| format!("mkdir {}", data_dir.display()))?;
|
|
let path = update_source_path(data_dir);
|
|
let tmp = path.with_extension("json.tmp");
|
|
let json = serde_json::to_vec_pretty(prefs).context("serialize swarm prefs")?;
|
|
fs::write(&tmp, json)
|
|
.await
|
|
.with_context(|| format!("write {}", tmp.display()))?;
|
|
fs::rename(&tmp, &path)
|
|
.await
|
|
.with_context(|| format!("rename {} -> {}", tmp.display(), path.display()))?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Load the node's selected fetch source. Missing/corrupt file → `Origin`.
|
|
pub async fn load_update_source(data_dir: &Path) -> UpdateSource {
|
|
load_swarm_prefs(data_dir).await.source
|
|
}
|
|
|
|
/// Persist the node's selected fetch source (preserving `provide_dht`).
|
|
pub async fn save_update_source(data_dir: &Path, source: UpdateSource) -> Result<()> {
|
|
let mut prefs = load_swarm_prefs(data_dir).await;
|
|
prefs.source = source;
|
|
save_swarm_prefs(data_dir, &prefs).await
|
|
}
|
|
|
|
/// Whether this node seeds/serves blobs to peers. Default true (opt-out).
|
|
pub async fn load_provide_dht(data_dir: &Path) -> bool {
|
|
load_swarm_prefs(data_dir).await.provide_dht
|
|
}
|
|
|
|
/// Persist whether this node provides to the swarm (preserving `source`).
|
|
pub async fn save_provide_dht(data_dir: &Path, provide: bool) -> Result<()> {
|
|
let mut prefs = load_swarm_prefs(data_dir).await;
|
|
prefs.provide_dht = provide;
|
|
save_swarm_prefs(data_dir, &prefs).await
|
|
}
|
|
|
|
/// Parse a manifest URL and return its `scheme://host[:port]` prefix.
|
|
/// Used by `rewrite_manifest_origins` so a manifest fetched from a
|
|
/// mirror points component downloads back at the same mirror rather
|
|
/// than whatever absolute URL the publisher baked in.
|
|
fn manifest_origin(manifest_url: &str) -> Option<String> {
|
|
let rest = manifest_url
|
|
.strip_prefix("https://")
|
|
.map(|r| ("https", r))
|
|
.or_else(|| manifest_url.strip_prefix("http://").map(|r| ("http", r)))?;
|
|
let (scheme, after_scheme) = rest;
|
|
let host_and_port = after_scheme.split('/').next()?;
|
|
if host_and_port.is_empty() {
|
|
return None;
|
|
}
|
|
Some(format!("{}://{}", scheme, host_and_port))
|
|
}
|
|
|
|
/// Rewrite every component `download_url` so its origin matches the
|
|
/// manifest URL we just fetched. Preserves the path portion (which is
|
|
/// consistent across mirrors — every gitea serves `/lfg2025/archy/raw/…`).
|
|
/// Leaves URLs with a different path shape untouched (some operator
|
|
/// might mirror with a custom layout; in that case we don't guess).
|
|
fn rewrite_manifest_origins(manifest: &mut UpdateManifest, manifest_url: &str) {
|
|
let Some(new_origin) = manifest_origin(manifest_url) else {
|
|
return;
|
|
};
|
|
for c in manifest.components.iter_mut() {
|
|
if let Some(orig_origin) = manifest_origin(&c.download_url) {
|
|
if orig_origin != new_origin {
|
|
let path = c.download_url.trim_start_matches(&orig_origin).to_string();
|
|
c.download_url = format!("{}{}", new_origin, path);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Which manifest URL to try FIRST — operator override via env wins,
|
|
/// otherwise the first entry in the mirrors list, otherwise the hard
|
|
/// default. Callers that need the full mirror walk should use
|
|
/// `load_mirrors` directly.
|
|
fn update_manifest_url() -> String {
|
|
std::env::var("ARCHIPELAGO_UPDATE_URL")
|
|
.unwrap_or_else(|_| DEFAULT_UPDATE_MANIFEST_URL.to_string())
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct UpdateManifest {
|
|
pub version: String,
|
|
pub release_date: String,
|
|
pub changelog: Vec<String>,
|
|
pub components: Vec<ComponentUpdate>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct ComponentUpdate {
|
|
pub name: String,
|
|
pub current_version: String,
|
|
pub new_version: String,
|
|
pub download_url: String,
|
|
pub sha256: String,
|
|
pub size_bytes: u64,
|
|
/// DHT Phase 1: BLAKE3 content address (bare hex or `"blake3:<hex>"`), the
|
|
/// iroh-native, range-verifiable hash. Optional during the migration
|
|
/// window — when present it is verified ALONGSIDE the mandatory SHA-256.
|
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
pub blake3: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
#[serde(rename_all = "snake_case")]
|
|
#[derive(Default)]
|
|
pub enum UpdateSchedule {
|
|
Manual,
|
|
#[default]
|
|
DailyCheck,
|
|
AutoApply,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct UpdateState {
|
|
pub current_version: String,
|
|
pub last_check: Option<String>,
|
|
pub available_update: Option<UpdateManifest>,
|
|
pub update_in_progress: bool,
|
|
pub rollback_available: bool,
|
|
#[serde(default)]
|
|
pub schedule: UpdateSchedule,
|
|
/// URL of the mirror whose manifest populated `available_update`.
|
|
/// Surfaces in the UI so operators can tell at a glance which mirror
|
|
/// their node actually hit (vs. just which is configured primary).
|
|
#[serde(default)]
|
|
pub manifest_mirror: Option<String>,
|
|
}
|
|
|
|
impl Default for UpdateState {
|
|
fn default() -> Self {
|
|
Self {
|
|
current_version: env!("CARGO_PKG_VERSION").to_string(),
|
|
last_check: None,
|
|
available_update: None,
|
|
update_in_progress: false,
|
|
rollback_available: false,
|
|
schedule: UpdateSchedule::DailyCheck,
|
|
manifest_mirror: None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Marker written by apply_update() just before the service restart and
|
|
/// consumed by verify_pending_update() in the NEW binary's startup path.
|
|
/// See PENDING_VERIFY_FILE for the full rationale — this is the hook
|
|
/// that turns "nginx 500 on every page after OTA" from an unrecoverable
|
|
/// field incident into an automatic rollback.
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct PendingVerification {
|
|
/// RFC3339 timestamp of the apply that wrote this marker.
|
|
pub applied_at: String,
|
|
/// Version we just applied (what the NEW binary should be running).
|
|
pub new_version: String,
|
|
/// Version the outgoing binary was running (what we roll back to).
|
|
pub previous_version: String,
|
|
/// Unix epoch seconds after which the probe should give up and
|
|
/// trigger rollback. Prevents a probe from retrying forever if e.g.
|
|
/// nginx is totally wedged.
|
|
pub deadline_ts: i64,
|
|
}
|
|
|
|
async fn write_pending_verification(data_dir: &Path, marker: &PendingVerification) -> Result<()> {
|
|
let path = data_dir.join(PENDING_VERIFY_FILE);
|
|
let data = serde_json::to_string_pretty(marker).context("serialize pending-verify marker")?;
|
|
fs::write(&path, data)
|
|
.await
|
|
.with_context(|| format!("write pending-verify marker to {}", path.display()))?;
|
|
Ok(())
|
|
}
|
|
|
|
async fn read_pending_verification(data_dir: &Path) -> Option<PendingVerification> {
|
|
let path = data_dir.join(PENDING_VERIFY_FILE);
|
|
let data = fs::read_to_string(&path).await.ok()?;
|
|
serde_json::from_str(&data).ok()
|
|
}
|
|
|
|
async fn clear_pending_verification(data_dir: &Path) {
|
|
let path = data_dir.join(PENDING_VERIFY_FILE);
|
|
let _ = fs::remove_file(&path).await;
|
|
}
|
|
|
|
/// Probe the local frontend through nginx. Returns Ok(()) on the first
|
|
/// response that's 2xx or 3xx; errors on timeout / connection refused /
|
|
/// any 4xx/5xx. `accept_self_signed` because nodes use a self-signed
|
|
/// cert the reqwest default root-set doesn't trust.
|
|
async fn probe_frontend_once() -> Result<()> {
|
|
let client = reqwest::Client::builder()
|
|
.danger_accept_invalid_certs(true)
|
|
.timeout(std::time::Duration::from_secs(5))
|
|
.build()
|
|
.context("build probe client")?;
|
|
// Prefer HTTPS since that's the failure mode we're catching (nginx
|
|
// 500 on the PWA). HTTP usually redirects to HTTPS and would mask
|
|
// the bug. BUT not every node binds 443 on loopback (.116 serves
|
|
// plain HTTP; 443 there belongs to tailscale) — on a *connect*
|
|
// error, fall back to HTTP so a healthy node isn't "verified" into
|
|
// a rollback. An HTTP error status stays fatal on whichever scheme
|
|
// answered.
|
|
let resp = match client.get("https://127.0.0.1/").send().await {
|
|
Ok(resp) => resp,
|
|
Err(e) if e.is_connect() => client
|
|
.get("http://127.0.0.1/")
|
|
.send()
|
|
.await
|
|
.context("probe GET http://127.0.0.1/ (https not bound on loopback)")?,
|
|
Err(e) => return Err(e).context("probe GET https://127.0.0.1/"),
|
|
};
|
|
let status = resp.status();
|
|
if status.is_success() || status.is_redirection() {
|
|
return Ok(());
|
|
}
|
|
anyhow::bail!("frontend probe returned HTTP {}", status);
|
|
}
|
|
|
|
/// Called from main.rs startup. If a pending-verification marker is
|
|
/// present, probe the frontend; on failure, trigger rollback and
|
|
/// restart the service so the OLD binary boots.
|
|
///
|
|
/// This is the "post-OTA auto-rollback" guardrail. If ANY problem in
|
|
/// the new version takes down the PWA (bad tarball perms as in v1.7.38,
|
|
/// a broken service worker, a missing asset, a backend panic on first
|
|
/// boot), the node self-heals back to the previous working state
|
|
/// without SSH intervention.
|
|
pub async fn verify_pending_update(data_dir: &Path) {
|
|
let marker = match read_pending_verification(data_dir).await {
|
|
Some(m) => m,
|
|
None => return, // No update pending; nothing to verify.
|
|
};
|
|
|
|
// Guard against a marker left behind by some earlier crash path —
|
|
// don't want a user who reboots days later to suddenly get
|
|
// rolled back because the marker was never cleared.
|
|
let applied_at = chrono::DateTime::parse_from_rfc3339(&marker.applied_at);
|
|
if let Ok(ts) = applied_at {
|
|
let age = chrono::Utc::now() - ts.with_timezone(&chrono::Utc);
|
|
if age.num_seconds() > PENDING_VERIFY_MAX_AGE_SECS {
|
|
tracing::warn!(
|
|
age_secs = age.num_seconds(),
|
|
"pending-verify marker is stale, clearing without probing"
|
|
);
|
|
clear_pending_verification(data_dir).await;
|
|
return;
|
|
}
|
|
}
|
|
|
|
info!(
|
|
new_version = %marker.new_version,
|
|
previous_version = %marker.previous_version,
|
|
"Post-OTA verification: probing frontend at https://127.0.0.1/"
|
|
);
|
|
|
|
// Give the new service time to bind its listeners + nginx to
|
|
// pick up any config changes. 15s matches what we observed on
|
|
// .116 during the v1.7.40 rollout recovery.
|
|
tokio::time::sleep(std::time::Duration::from_secs(15)).await;
|
|
|
|
let deadline =
|
|
std::time::Instant::now() + std::time::Duration::from_secs(PENDING_VERIFY_WINDOW_SECS);
|
|
let mut attempt = 0u32;
|
|
let mut last_err: Option<String> = None;
|
|
|
|
while std::time::Instant::now() < deadline {
|
|
attempt += 1;
|
|
match probe_frontend_once().await {
|
|
Ok(()) => {
|
|
info!(attempt, "Post-OTA verification succeeded — clearing marker");
|
|
clear_pending_verification(data_dir).await;
|
|
return;
|
|
}
|
|
Err(e) => {
|
|
let msg = e.to_string();
|
|
tracing::warn!(attempt, error = %msg, "Post-OTA probe failed, retrying");
|
|
last_err = Some(msg);
|
|
}
|
|
}
|
|
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
|
}
|
|
|
|
tracing::error!(
|
|
attempts = attempt,
|
|
window_secs = PENDING_VERIFY_WINDOW_SECS,
|
|
last_error = last_err.as_deref().unwrap_or(""),
|
|
new_version = %marker.new_version,
|
|
previous_version = %marker.previous_version,
|
|
"Post-OTA verification FAILED — rolling back"
|
|
);
|
|
|
|
// Restore web-ui.bak on top of web-ui. update.rs keeps web-ui.bak
|
|
// from the previous apply; moving it back is the frontend half of
|
|
// the rollback. The binary half is handled by rollback_update().
|
|
let web_ui_bak = Path::new("/opt/archipelago/web-ui.bak");
|
|
let web_ui = "/opt/archipelago/web-ui";
|
|
if web_ui_bak.exists() {
|
|
let ts = chrono::Utc::now().timestamp_millis();
|
|
let quarantine = format!("/opt/archipelago/web-ui.failed.{}", ts);
|
|
let _ = host_sudo(&["mv", web_ui, &quarantine]).await;
|
|
let _ = host_sudo(&["mv", web_ui_bak.to_str().unwrap_or(""), web_ui]).await;
|
|
tracing::info!(quarantined = %quarantine, "Restored web-ui from web-ui.bak");
|
|
} else {
|
|
tracing::warn!("web-ui.bak not present — frontend cannot be rolled back, only binary");
|
|
}
|
|
|
|
if let Err(e) = rollback_update(data_dir).await {
|
|
tracing::error!(error = %e, "rollback_update() failed during post-OTA verification");
|
|
// Leave the marker in place so a future boot gets another shot.
|
|
return;
|
|
}
|
|
|
|
clear_pending_verification(data_dir).await;
|
|
|
|
// Record why we rolled back so the UI can show it on the next boot.
|
|
if let Ok(mut state) = load_state(data_dir).await {
|
|
state.current_version = marker.previous_version.clone();
|
|
if let Err(e) = save_state(data_dir, &state).await {
|
|
tracing::warn!(error = %e, "Failed to update state after rollback");
|
|
}
|
|
}
|
|
|
|
// Restart so the old binary takes over. --no-block because we're
|
|
// the service; systemd can't wait for us to exit before starting
|
|
// the old process.
|
|
let _ = host_sudo(&["systemctl", "--no-block", "restart", "archipelago"]).await;
|
|
}
|
|
|
|
pub async fn load_state(data_dir: &Path) -> Result<UpdateState> {
|
|
let path = data_dir.join(UPDATE_STATE_FILE);
|
|
if !path.exists() {
|
|
let state = UpdateState::default();
|
|
save_state(data_dir, &state).await?;
|
|
return Ok(state);
|
|
}
|
|
let data = fs::read_to_string(&path)
|
|
.await
|
|
.context("Reading update state")?;
|
|
let mut state: UpdateState = serde_json::from_str(&data).context("Parsing update state")?;
|
|
|
|
let mut changed = false;
|
|
|
|
// Keep current_version in sync with the binary. Sideloaded nodes
|
|
// (ssh + cp /usr/local/bin/archipelago) don't touch the state file,
|
|
// so without this the running 1.7.0-alpha binary would keep seeing
|
|
// `current_version: "1.6.0-alpha"` and re-offer itself as an update.
|
|
let running = env!("CARGO_PKG_VERSION");
|
|
if state.current_version != running {
|
|
state.current_version = running.to_string();
|
|
// Binary version changed (sideload or apply). Any stored
|
|
// `available_update` is either redundant (points at the running
|
|
// version) or stale (points at a version we've already passed —
|
|
// which would surface as a "downgrade" offer in the UI). Clear
|
|
// it unconditionally; the next check_for_updates will repopulate
|
|
// if there's genuinely something newer.
|
|
state.available_update = None;
|
|
state.manifest_mirror = None;
|
|
changed = true;
|
|
}
|
|
|
|
// `update_in_progress` means a manifest OTA is downloaded and staged,
|
|
// ready for apply. Older git/self-build update paths could leave this
|
|
// flag stuck true without a staging directory, which traps the UI in an
|
|
// unrecoverable state. Heal that on every state load.
|
|
if state.update_in_progress && !has_staged_update(data_dir).await {
|
|
warn!(
|
|
staging = %data_dir.join("update-staging").display(),
|
|
"Clearing stale update_in_progress without staged OTA files"
|
|
);
|
|
state.update_in_progress = false;
|
|
changed = true;
|
|
}
|
|
|
|
if changed {
|
|
save_state(data_dir, &state).await?;
|
|
}
|
|
Ok(state)
|
|
}
|
|
|
|
/// Marker written only after EVERY component has downloaded and verified.
|
|
/// Distinguishes a complete, install-ready staging from the partial files a
|
|
/// resumable-but-failed download leaves behind.
|
|
const STAGED_COMPLETE_MARKER: &str = ".download-complete";
|
|
|
|
async fn has_staged_update(data_dir: &Path) -> bool {
|
|
// A *complete* staged update carries the marker. A partial/failed download
|
|
// leaves component files (kept for resume) but no marker, so it reads as
|
|
// "not staged" — the state self-heal then clears update_in_progress and the
|
|
// UI returns to Download instead of stranding the user on Install.
|
|
fs::metadata(data_dir.join("update-staging").join(STAGED_COMPLETE_MARKER))
|
|
.await
|
|
.is_ok()
|
|
}
|
|
|
|
pub async fn save_state(data_dir: &Path, state: &UpdateState) -> Result<()> {
|
|
let path = data_dir.join(UPDATE_STATE_FILE);
|
|
let data = serde_json::to_string_pretty(state)?;
|
|
fs::write(&path, data).await.context("Writing update state")
|
|
}
|
|
|
|
/// Check for available updates by walking the mirror list. The first
|
|
/// mirror that returns a parseable manifest with a strictly-newer
|
|
/// version wins; if no mirror offers a newer version, the node is
|
|
/// reported as up-to-date. Per-mirror we retry up to 3 times on
|
|
/// transient failures.
|
|
///
|
|
/// Manifest `download_url`s are origin-rewritten to match the mirror
|
|
/// we fetched from, so switching mirrors in the UI also switches where
|
|
/// component downloads come from — even if the publisher baked an
|
|
/// absolute URL pointing at a different server into the manifest.
|
|
pub async fn check_for_updates(data_dir: &Path) -> Result<UpdateState> {
|
|
let mut state = load_state(data_dir).await?;
|
|
|
|
info!("Checking for updates...");
|
|
let client = reqwest::Client::builder()
|
|
// Short per-attempt HTTP timeout so a wedged mirror doesn't
|
|
// delay the whole check — we'd rather move on to the next
|
|
// mirror quickly than sit waiting on a slow one. 15s covers
|
|
// slow but alive mirrors.
|
|
.timeout(std::time::Duration::from_secs(15))
|
|
.connect_timeout(std::time::Duration::from_secs(10))
|
|
.build()
|
|
.context("Failed to create HTTP client")?;
|
|
|
|
// Env override (ARCHIPELAGO_UPDATE_URL) short-circuits the mirror
|
|
// list — used on dev boxes that point at a local gitea. Otherwise
|
|
// walk the operator-configured list and fall through on failure.
|
|
let mirrors: Vec<String> = if std::env::var("ARCHIPELAGO_UPDATE_URL").is_ok() {
|
|
vec![update_manifest_url()]
|
|
} else {
|
|
load_mirrors(data_dir)
|
|
.await
|
|
.unwrap_or_else(|_| default_mirrors())
|
|
.into_iter()
|
|
.map(|m| m.url)
|
|
.collect()
|
|
};
|
|
|
|
let mut last_err: Option<String> = None;
|
|
let mut handled = false;
|
|
'mirrors: for manifest_url in mirrors.iter() {
|
|
for attempt in 1..=3u8 {
|
|
if attempt > 1 {
|
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
|
}
|
|
match client.get(manifest_url).send().await {
|
|
Ok(resp) if resp.status().is_success() => match resp.json::<UpdateManifest>().await
|
|
{
|
|
Ok(mut manifest) => {
|
|
rewrite_manifest_origins(&mut manifest, manifest_url);
|
|
if is_newer(&manifest.version, &state.current_version) {
|
|
info!(
|
|
current = %state.current_version,
|
|
available = %manifest.version,
|
|
mirror = %manifest_url,
|
|
"Update available"
|
|
);
|
|
state.available_update = Some(manifest);
|
|
state.manifest_mirror = Some(manifest_url.clone());
|
|
} else {
|
|
// Manifest version matches us or is behind
|
|
// us — either we're current, or this mirror
|
|
// is stale. Try the next mirror; if all are
|
|
// stale or at our version we'll fall through
|
|
// to "up to date".
|
|
debug!(
|
|
current = %state.current_version,
|
|
manifest = %manifest.version,
|
|
mirror = %manifest_url,
|
|
"No newer version in manifest"
|
|
);
|
|
state.manifest_mirror = None;
|
|
state.available_update = None;
|
|
handled = true;
|
|
continue 'mirrors;
|
|
}
|
|
handled = true;
|
|
break 'mirrors;
|
|
}
|
|
Err(e) => last_err = Some(format!("{}: parse: {}", manifest_url, e)),
|
|
},
|
|
Ok(resp) => {
|
|
last_err = Some(format!("{}: HTTP {}", manifest_url, resp.status()));
|
|
}
|
|
Err(e) => {
|
|
last_err = Some(format!("{}: {}", manifest_url, e));
|
|
}
|
|
}
|
|
}
|
|
tracing::debug!(mirror = %manifest_url, "Mirror exhausted, trying next");
|
|
}
|
|
if !handled {
|
|
if let Some(e) = last_err {
|
|
debug!("Update check failed across all mirrors: {}", e);
|
|
}
|
|
}
|
|
|
|
state.last_check = Some(chrono::Utc::now().to_rfc3339());
|
|
save_state(data_dir, &state).await?;
|
|
Ok(state)
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
pub struct MirrorTestResult {
|
|
pub reachable: bool,
|
|
pub latency_ms: u64,
|
|
pub http_status: Option<u16>,
|
|
pub error: Option<String>,
|
|
}
|
|
|
|
/// Ping a mirror's manifest URL and return reachability + wall-clock
|
|
/// latency. Used by the "Test mirror" button so operators can sanity-
|
|
/// check a newly added mirror without running a full update check.
|
|
pub async fn test_mirror(url: &str) -> MirrorTestResult {
|
|
let client = match reqwest::Client::builder()
|
|
.timeout(std::time::Duration::from_secs(10))
|
|
.connect_timeout(std::time::Duration::from_secs(5))
|
|
.build()
|
|
{
|
|
Ok(c) => c,
|
|
Err(e) => {
|
|
return MirrorTestResult {
|
|
reachable: false,
|
|
latency_ms: 0,
|
|
http_status: None,
|
|
error: Some(format!("client build failed: {}", e)),
|
|
}
|
|
}
|
|
};
|
|
let start = std::time::Instant::now();
|
|
match client.get(url).send().await {
|
|
Ok(resp) => {
|
|
let latency_ms = start.elapsed().as_millis() as u64;
|
|
let status = resp.status();
|
|
if status.is_success() {
|
|
MirrorTestResult {
|
|
reachable: true,
|
|
latency_ms,
|
|
http_status: Some(status.as_u16()),
|
|
error: None,
|
|
}
|
|
} else {
|
|
MirrorTestResult {
|
|
reachable: false,
|
|
latency_ms,
|
|
http_status: Some(status.as_u16()),
|
|
error: Some(format!("HTTP {}", status.as_u16())),
|
|
}
|
|
}
|
|
}
|
|
Err(e) => {
|
|
let latency_ms = start.elapsed().as_millis() as u64;
|
|
MirrorTestResult {
|
|
reachable: false,
|
|
latency_ms,
|
|
http_status: None,
|
|
error: Some(e.to_string()),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get current update status without checking remote.
|
|
pub async fn get_status(data_dir: &Path) -> Result<UpdateState> {
|
|
load_state(data_dir).await
|
|
}
|
|
|
|
/// Dismiss the available update notification.
|
|
pub async fn dismiss_update(data_dir: &Path) -> Result<()> {
|
|
let mut state = load_state(data_dir).await?;
|
|
state.available_update = None;
|
|
save_state(data_dir, &state).await
|
|
}
|
|
|
|
/// Download update components to a staging directory.
|
|
/// Verifies SHA256 hash for each component.
|
|
///
|
|
/// Robustness: each component download is **resumable** via HTTP Range
|
|
/// requests and retried up to 6 times with exponential backoff. When
|
|
/// gitea drops the connection mid-stream (happens regularly at slow
|
|
/// raw-file throughput), the next attempt picks up where the previous
|
|
/// one left off instead of restarting from byte zero. SHA256 is
|
|
/// verified over the complete file at the end of each component, so a
|
|
/// partially-corrupt resume still fails cleanly.
|
|
pub async fn download_update(data_dir: &Path) -> Result<DownloadProgress> {
|
|
let mut state = load_state(data_dir).await?;
|
|
if state.available_update.is_none() {
|
|
state = check_for_updates(data_dir).await?;
|
|
}
|
|
let manifest = state
|
|
.available_update
|
|
.as_ref()
|
|
.ok_or_else(|| anyhow::anyhow!("No update is available to download"))?;
|
|
|
|
let staging_dir = data_dir.join("update-staging");
|
|
fs::create_dir_all(&staging_dir)
|
|
.await
|
|
.context("Failed to create staging dir")?;
|
|
|
|
let client = reqwest::Client::builder()
|
|
// Per-request budget; each attempt gets the full window, and a retry
|
|
// resumes via Range from the partial file (download_component_resumable),
|
|
// so this is an upper bound per attempt, not the whole download. Sized
|
|
// generously for slow machines on slow links: a ~200MB release at a
|
|
// crawling ~50KB/s is ~70min, which the old 1h budget could cut off
|
|
// mid-attempt. 3h leaves ample headroom; raising it cannot slow down or
|
|
// break a fast download (those finish well inside the old limit).
|
|
.timeout(std::time::Duration::from_secs(10800))
|
|
.connect_timeout(std::time::Duration::from_secs(60))
|
|
.build()
|
|
.context("Failed to create HTTP client")?;
|
|
|
|
let mut downloaded = 0u64;
|
|
let total_bytes: u64 = manifest.components.iter().map(|c| c.size_bytes).sum();
|
|
|
|
info!(
|
|
version = %manifest.version,
|
|
components = manifest.components.len(),
|
|
total_bytes,
|
|
staging = %staging_dir.display(),
|
|
"Starting update download"
|
|
);
|
|
|
|
// Clear any stale cancel flag from a prior aborted run, then seed
|
|
// the live counters so polls during the handshake show the right
|
|
// denominator immediately instead of 0/0 → NaN%.
|
|
DOWNLOAD_CANCEL.store(false, Ordering::Relaxed);
|
|
DOWNLOAD_TOTAL.store(total_bytes, Ordering::Relaxed);
|
|
DOWNLOAD_BYTES.store(0, Ordering::Relaxed);
|
|
DOWNLOAD_PROGRESS_AT.store(now_ms(), Ordering::Relaxed);
|
|
|
|
// Consult swarm peers only when the node has opted into DHT mode. In Origin
|
|
// mode (default) this stays empty so every component goes straight to the
|
|
// HTTP origin — instant, no-rebuild fallback while live-testing the swarm.
|
|
let update_source = load_update_source(data_dir).await;
|
|
let provide_dht = load_provide_dht(data_dir).await;
|
|
let swarm_providers = if update_source == UpdateSource::Swarm {
|
|
crate::swarm::providers()
|
|
} else {
|
|
Vec::new()
|
|
};
|
|
if update_source == UpdateSource::Swarm {
|
|
info!(
|
|
providers = swarm_providers.len(),
|
|
"Update source = DHT swarm (origin still wins as fallback)"
|
|
);
|
|
}
|
|
|
|
for component in &manifest.components {
|
|
if is_canceled() {
|
|
DOWNLOAD_TOTAL.store(0, Ordering::Relaxed);
|
|
DOWNLOAD_BYTES.store(0, Ordering::Relaxed);
|
|
anyhow::bail!("Download canceled");
|
|
}
|
|
info!(name = %component.name, url = %component.download_url, "Downloading component");
|
|
let dest = staging_dir.join(&component.name);
|
|
|
|
// DHT Phase 2: when the manifest pins a BLAKE3 digest, route the fetch
|
|
// through the swarm seam (swarm-assist, origin always wins). With no
|
|
// providers registered (iroh-swarm feature off) this is identical to
|
|
// calling the resumable HTTP origin directly — same bytes, now
|
|
// content-addressed. A swarm hit is BLAKE3-verified inside the seam;
|
|
// we still enforce the mandatory SHA-256 gate on peer bytes here and
|
|
// re-fetch from origin if a (consistency-broken) peer slips through.
|
|
let digest = component.blake3.as_deref().and_then(|b| {
|
|
let s = b.trim();
|
|
let normalized = if s.contains(':') {
|
|
s.to_string()
|
|
} else {
|
|
format!("blake3:{s}")
|
|
};
|
|
crate::content_hash::ContentDigest::parse(&normalized).ok()
|
|
});
|
|
if let Some(digest) = digest {
|
|
let client_ref = &client;
|
|
let dest_ref = &dest;
|
|
let source = crate::swarm::fetch_content_addressed(
|
|
&digest,
|
|
&swarm_providers,
|
|
&dest,
|
|
move || async move {
|
|
download_component_resumable(client_ref, component, dest_ref, downloaded).await
|
|
},
|
|
)
|
|
.await?;
|
|
if source == crate::swarm::FetchSource::Swarm {
|
|
let bytes = tokio::fs::read(&dest).await?;
|
|
if crate::content_hash::sha256_hex(&bytes) != component.sha256 {
|
|
warn!(
|
|
name = %component.name,
|
|
"swarm bytes passed BLAKE3 but failed the SHA-256 manifest gate — re-fetching from origin"
|
|
);
|
|
let _ = tokio::fs::remove_file(&dest).await;
|
|
download_component_resumable(&client, component, &dest, downloaded).await?;
|
|
}
|
|
}
|
|
// This is a PUBLIC release blob and it just passed both the BLAKE3 and
|
|
// SHA-256 gates — announce that we can now seed it to peers. Gated on
|
|
// the node's "provide to swarm" preference (default on); best-effort,
|
|
// inert unless the iroh swarm is active, and never blocks the install.
|
|
// Independent of fetch source: an origin-fetching node can still seed.
|
|
if provide_dht {
|
|
crate::swarm::announce_held_blob(&digest.hex, &dest).await;
|
|
}
|
|
} else {
|
|
download_component_resumable(&client, component, &dest, downloaded).await?;
|
|
}
|
|
downloaded += component.size_bytes;
|
|
DOWNLOAD_BYTES.store(downloaded, Ordering::Relaxed);
|
|
info!(
|
|
name = %component.name,
|
|
bytes = component.size_bytes,
|
|
"Component downloaded and verified"
|
|
);
|
|
}
|
|
|
|
// Mark update as downloaded. Write the completion marker FIRST so a crash
|
|
// between the two can't leave update_in_progress=true without the marker
|
|
// (which the self-heal would then clear, harmlessly forcing a re-download).
|
|
let _ = fs::write(staging_dir.join(STAGED_COMPLETE_MARKER), b"1").await;
|
|
let mut state = load_state(data_dir).await?;
|
|
state.update_in_progress = true;
|
|
save_state(data_dir, &state).await?;
|
|
|
|
Ok(DownloadProgress {
|
|
total_bytes,
|
|
downloaded_bytes: downloaded,
|
|
components_downloaded: manifest.components.len(),
|
|
staging_dir: staging_dir.to_string_lossy().to_string(),
|
|
})
|
|
}
|
|
|
|
/// Download a single component to `dest`, resuming from the end of
|
|
/// any existing partial file via a Range request. Retries up to 6
|
|
/// times with exponential backoff (5s, 15s, 30s, 60s, 120s, 180s).
|
|
/// Verifies the SHA256 over the full file at the end.
|
|
async fn download_component_resumable(
|
|
client: &reqwest::Client,
|
|
component: &ComponentUpdate,
|
|
dest: &Path,
|
|
prior_total: u64,
|
|
) -> Result<()> {
|
|
use sha2::{Digest, Sha256};
|
|
use tokio::io::AsyncWriteExt;
|
|
const MAX_ATTEMPTS: u32 = 6;
|
|
const BACKOFFS: [u64; 5] = [5, 15, 30, 60, 120];
|
|
|
|
let mut last_err: Option<anyhow::Error> = None;
|
|
for attempt in 1..=MAX_ATTEMPTS {
|
|
let existing_len = match tokio::fs::metadata(dest).await {
|
|
Ok(m) => m.len(),
|
|
Err(_) => 0,
|
|
};
|
|
if existing_len >= component.size_bytes {
|
|
// File is already complete — break out and go verify.
|
|
break;
|
|
}
|
|
if attempt > 1 {
|
|
let delay = BACKOFFS[(attempt as usize - 2).min(BACKOFFS.len() - 1)];
|
|
tracing::warn!(
|
|
name = %component.name,
|
|
attempt,
|
|
resume_at = existing_len,
|
|
"Retrying download in {}s (previous error: {})",
|
|
delay,
|
|
last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
|
|
);
|
|
// Sleep in 500ms slices so a Cancel during backoff wakes
|
|
// promptly instead of waiting out the full exponential window.
|
|
let slices = delay * 2;
|
|
for _ in 0..slices {
|
|
if is_canceled() {
|
|
anyhow::bail!("Download canceled");
|
|
}
|
|
tokio::time::sleep(std::time::Duration::from_millis(500)).await;
|
|
}
|
|
}
|
|
if is_canceled() {
|
|
anyhow::bail!("Download canceled");
|
|
}
|
|
|
|
let mut req = client.get(&component.download_url);
|
|
if existing_len > 0 {
|
|
req = req.header("Range", format!("bytes={}-", existing_len));
|
|
}
|
|
let resp = match req.send().await {
|
|
Ok(r) => r,
|
|
Err(e) => {
|
|
last_err = Some(anyhow::anyhow!(e));
|
|
continue;
|
|
}
|
|
};
|
|
let status = resp.status();
|
|
// 200 OK on a fresh start, 206 Partial Content on a resume
|
|
// that the server honoured. Anything else is a problem.
|
|
let is_resume = existing_len > 0 && status == reqwest::StatusCode::PARTIAL_CONTENT;
|
|
let is_fresh = existing_len == 0 && status.is_success();
|
|
let server_ignored_range = existing_len > 0 && status == reqwest::StatusCode::OK;
|
|
if !is_resume && !is_fresh && !server_ignored_range {
|
|
last_err = Some(anyhow::anyhow!(
|
|
"HTTP {} for {} (resume offset {})",
|
|
status,
|
|
component.name,
|
|
existing_len
|
|
));
|
|
continue;
|
|
}
|
|
// If the server ignored Range (returned 200 with the full
|
|
// body), wipe the partial file and start over.
|
|
let mut file = if server_ignored_range {
|
|
let _ = tokio::fs::remove_file(dest).await;
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.write(true)
|
|
.truncate(true)
|
|
.open(dest)
|
|
.await
|
|
.context("open staging file")?
|
|
} else if is_resume {
|
|
tokio::fs::OpenOptions::new()
|
|
.append(true)
|
|
.open(dest)
|
|
.await
|
|
.context("open staging file for append")?
|
|
} else {
|
|
tokio::fs::OpenOptions::new()
|
|
.create(true)
|
|
.write(true)
|
|
.truncate(true)
|
|
.open(dest)
|
|
.await
|
|
.context("open staging file")?
|
|
};
|
|
|
|
let mut resp = resp;
|
|
let mut stream_err = false;
|
|
let mut on_disk = existing_len;
|
|
let mut canceled = false;
|
|
loop {
|
|
if is_canceled() {
|
|
canceled = true;
|
|
break;
|
|
}
|
|
match resp.chunk().await {
|
|
Ok(Some(bytes)) => {
|
|
if let Err(e) = file.write_all(&bytes).await {
|
|
last_err = Some(anyhow::anyhow!(e).context("writing chunk"));
|
|
stream_err = true;
|
|
break;
|
|
}
|
|
on_disk += bytes.len() as u64;
|
|
DOWNLOAD_BYTES.store(
|
|
prior_total + on_disk.min(component.size_bytes),
|
|
Ordering::Relaxed,
|
|
);
|
|
DOWNLOAD_PROGRESS_AT.store(now_ms(), Ordering::Relaxed);
|
|
}
|
|
Ok(None) => break, // stream ended cleanly
|
|
Err(e) => {
|
|
last_err = Some(anyhow::anyhow!(e).context("reading chunk"));
|
|
stream_err = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if canceled {
|
|
let _ = file.flush().await;
|
|
drop(file);
|
|
DOWNLOAD_TOTAL.store(0, Ordering::Relaxed);
|
|
DOWNLOAD_BYTES.store(0, Ordering::Relaxed);
|
|
anyhow::bail!("Download canceled");
|
|
}
|
|
let _ = file.flush().await;
|
|
let _ = file.sync_all().await;
|
|
drop(file);
|
|
if stream_err {
|
|
continue;
|
|
}
|
|
|
|
// Stream ended cleanly. If we've got the expected size, verify
|
|
// the SHA and succeed. Otherwise loop to resume from the new
|
|
// offset on the next attempt.
|
|
let final_len = tokio::fs::metadata(dest)
|
|
.await
|
|
.map(|m| m.len())
|
|
.unwrap_or(0);
|
|
if final_len < component.size_bytes {
|
|
last_err = Some(anyhow::anyhow!(
|
|
"download truncated: got {} of {} bytes",
|
|
final_len,
|
|
component.size_bytes
|
|
));
|
|
continue;
|
|
}
|
|
|
|
// Full file — verify hash.
|
|
let bytes = tokio::fs::read(dest)
|
|
.await
|
|
.context("read staging file for hash check")?;
|
|
let hash = hex::encode(Sha256::digest(&bytes));
|
|
if hash == component.sha256 {
|
|
// DHT Phase 1: if the manifest also pins a BLAKE3 digest, it must
|
|
// match too. SHA-256 stays the mandatory gate during migration;
|
|
// BLAKE3 is the hash the iroh swarm will fetch/verify by, so a
|
|
// present-but-wrong BLAKE3 means the bytes aren't swarm-consistent
|
|
// — treat it like a SHA mismatch and re-download.
|
|
if let Some(b3) = component.blake3.as_deref() {
|
|
let expected = b3.trim().strip_prefix("blake3:").unwrap_or(b3.trim());
|
|
let actual = crate::content_hash::blake3_hex(&bytes);
|
|
if !actual.eq_ignore_ascii_case(expected) {
|
|
let _ = tokio::fs::remove_file(dest).await;
|
|
last_err = Some(anyhow::anyhow!(
|
|
"BLAKE3 mismatch for {}: expected {}, got {}",
|
|
component.name,
|
|
expected,
|
|
actual
|
|
));
|
|
continue;
|
|
}
|
|
}
|
|
return Ok(());
|
|
}
|
|
// SHA mismatch — the file on disk is garbage. Nuke it and
|
|
// start over from scratch on the next attempt.
|
|
let _ = tokio::fs::remove_file(dest).await;
|
|
last_err = Some(anyhow::anyhow!(
|
|
"SHA256 mismatch for {}: expected {}, got {}",
|
|
component.name,
|
|
component.sha256,
|
|
hash
|
|
));
|
|
}
|
|
Err(last_err.unwrap_or_else(|| anyhow::anyhow!("download failed without a captured error")))
|
|
}
|
|
|
|
/// Cancel an in-flight download. Sets the cancellation flag so the
|
|
/// download loop bails out at the next chunk or backoff boundary, then
|
|
/// zeros the live counters and wipes the staging directory so the UI
|
|
/// sees "no active download" immediately and the next attempt starts
|
|
/// clean. Safe to call even when no download is running.
|
|
pub async fn cancel_download(data_dir: &Path) -> Result<()> {
|
|
DOWNLOAD_CANCEL.store(true, Ordering::Relaxed);
|
|
DOWNLOAD_BYTES.store(0, Ordering::Relaxed);
|
|
DOWNLOAD_TOTAL.store(0, Ordering::Relaxed);
|
|
let staging = data_dir.join("update-staging");
|
|
let wiped = if staging.exists() {
|
|
tokio::fs::remove_dir_all(&staging).await.is_ok()
|
|
} else {
|
|
false
|
|
};
|
|
// Clear the "downloaded, ready to apply" marker too — a canceled
|
|
// download is not a staged update.
|
|
let mut cleared_marker = false;
|
|
if let Ok(mut state) = load_state(data_dir).await {
|
|
if state.update_in_progress {
|
|
state.update_in_progress = false;
|
|
let _ = save_state(data_dir, &state).await;
|
|
cleared_marker = true;
|
|
}
|
|
}
|
|
info!(
|
|
staging = %staging.display(),
|
|
wiped,
|
|
cleared_marker,
|
|
"Update download canceled"
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
/// Run a command as root, but *outside* the archipelago service's
|
|
/// restricted mount namespace.
|
|
///
|
|
/// archipelago.service uses `ProtectSystem=strict`, which makes `/opt`
|
|
/// and `/usr` read-only inside the service — and sudo inherits the
|
|
/// namespace, so `sudo mv /opt/archipelago/...` fails with EROFS even
|
|
/// though sudo itself is root. `systemd-run --wait` spawns a transient
|
|
/// service unit that inherits systemd's default protections (i.e. none
|
|
/// of ours), escaping the namespace.
|
|
pub(crate) async fn host_sudo(args: &[&str]) -> Result<std::process::ExitStatus> {
|
|
let mut full: Vec<&str> = vec![
|
|
"systemd-run",
|
|
"--wait",
|
|
"--quiet",
|
|
"--collect",
|
|
"--pipe",
|
|
"--",
|
|
];
|
|
full.extend_from_slice(args);
|
|
tokio::process::Command::new("sudo")
|
|
.args(&full)
|
|
.status()
|
|
.await
|
|
.context("sudo systemd-run spawn failed")
|
|
}
|
|
|
|
/// Apply a downloaded update. Backs up current binaries, replaces with staged versions.
|
|
pub async fn apply_update(data_dir: &Path) -> Result<()> {
|
|
let staging_dir = data_dir.join("update-staging");
|
|
if !staging_dir.exists() {
|
|
anyhow::bail!("No staged update found. Download first.");
|
|
}
|
|
|
|
let backup_dir = data_dir.join("update-backup");
|
|
fs::create_dir_all(&backup_dir)
|
|
.await
|
|
.context("Failed to create backup dir")?;
|
|
|
|
info!(
|
|
staging = %staging_dir.display(),
|
|
backup = %backup_dir.display(),
|
|
"Applying staged update"
|
|
);
|
|
|
|
// Back up current backend binary
|
|
let current_binary = Path::new("/usr/local/bin/archipelago");
|
|
if current_binary.exists() {
|
|
let backup_path = backup_dir.join("archipelago");
|
|
// A leftover backup from an earlier rollback can be root-owned
|
|
// (rollback used to chown it in place), and fs::copy O_TRUNCs the
|
|
// existing file — EACCES as the service user, wedging every apply
|
|
// (seen on .116, v1.7.86 OTA). Unlink first; the dir is
|
|
// service-owned so unlink works even when the file isn't ours.
|
|
if backup_path.exists() {
|
|
if let Err(e) = fs::remove_file(&backup_path).await {
|
|
tracing::warn!(error = %e, "unlink of stale binary backup failed, retrying via host_sudo");
|
|
let _ = host_sudo(&["rm", "-f", &backup_path.to_string_lossy()]).await;
|
|
}
|
|
}
|
|
fs::copy(current_binary, &backup_path)
|
|
.await
|
|
.context("Failed to backup current binary")?;
|
|
info!("Current binary backed up");
|
|
}
|
|
|
|
// Apply staged components
|
|
let mut entries = fs::read_dir(&staging_dir)
|
|
.await
|
|
.context("Failed to read staging dir")?;
|
|
|
|
while let Some(entry) = entries.next_entry().await? {
|
|
let name = entry.file_name().to_string_lossy().to_string();
|
|
let src = entry.path();
|
|
|
|
match name.as_str() {
|
|
"archipelago" => {
|
|
// Two namespace gotchas this block works around:
|
|
// 1. We're running FROM /usr/local/bin/archipelago, so
|
|
// `install`/`cp` (O_TRUNC + write) fail with ETXTBSY.
|
|
// Use `mv`, which is atomic rename() and tolerates a
|
|
// busy destination.
|
|
// 2. archipelago.service sets ProtectSystem=strict, so
|
|
// even `sudo mv` into /usr/local/bin/ fails EROFS —
|
|
// sudo inherits the service's mount namespace. Route
|
|
// the rename through systemd-run so it runs in a
|
|
// transient unit with default protections.
|
|
let staged = src.to_string_lossy().to_string();
|
|
let _ = host_sudo(&["chmod", "0755", &staged]).await;
|
|
let _ = host_sudo(&["chown", "root:root", &staged]).await;
|
|
let status = host_sudo(&["mv", &staged, "/usr/local/bin/archipelago"])
|
|
.await
|
|
.with_context(|| format!("Failed to spawn mv for {}", name))?;
|
|
if !status.success() {
|
|
anyhow::bail!(
|
|
"mv into /usr/local/bin failed for {} (exit {:?})",
|
|
name,
|
|
status.code()
|
|
);
|
|
}
|
|
info!(name = %name, "Backend binary applied");
|
|
}
|
|
_ if name.contains("frontend") && name.ends_with(".tar.gz") => {
|
|
// Tarball contents are the *inside* of web-ui/ (root entries
|
|
// `./test-aiui.html`, `./assets/`, ...). Extract into a
|
|
// uniquely-named staging dir, then mv into place. No `rm
|
|
// -rf` pre-cleanup — that's what hit transient EROFS on
|
|
// .198 and aborted the apply mid-flight.
|
|
let ts = chrono::Utc::now().timestamp_millis();
|
|
let staging_new = format!("/opt/archipelago/web-ui.new.{}", ts);
|
|
let staging_old = format!("/opt/archipelago/web-ui.old.{}", ts);
|
|
let web_ui = "/opt/archipelago/web-ui";
|
|
let backup_path = "/opt/archipelago/web-ui.bak";
|
|
|
|
// All sudo calls that touch /opt/archipelago go through
|
|
// host_sudo so they see a normal root mount namespace.
|
|
let mk = host_sudo(&["mkdir", "-p", &staging_new])
|
|
.await
|
|
.context("Failed to create frontend staging dir")?;
|
|
if !mk.success() {
|
|
anyhow::bail!("mkdir {} failed", staging_new);
|
|
}
|
|
let extract =
|
|
host_sudo(&["tar", "-xzf", &src.to_string_lossy(), "-C", &staging_new])
|
|
.await
|
|
.with_context(|| format!("Failed to extract {}", name))?;
|
|
if !extract.success() {
|
|
let _ = host_sudo(&["rm", "-rf", &staging_new]).await;
|
|
anyhow::bail!("tar extraction failed for {}", name);
|
|
}
|
|
let _ = host_sudo(&["chown", "-R", "archipelago:archipelago", &staging_new]).await;
|
|
|
|
// Set world-readable perms so nginx (runs as www-data)
|
|
// can stat + serve the files. Without this, the tar
|
|
// extraction inherits the staging-dir's 700 mode and
|
|
// nginx returns 403/500 for every request after the
|
|
// swap — exactly what bit .116 on the v1.7.38 rollout.
|
|
let _ = host_sudo(&["chmod", "755", &staging_new]).await;
|
|
let _ = host_sudo(&[
|
|
"find",
|
|
&staging_new,
|
|
"-type",
|
|
"d",
|
|
"-exec",
|
|
"chmod",
|
|
"755",
|
|
"{}",
|
|
"+",
|
|
])
|
|
.await;
|
|
let _ = host_sudo(&[
|
|
"find",
|
|
&staging_new,
|
|
"-type",
|
|
"f",
|
|
"-exec",
|
|
"chmod",
|
|
"644",
|
|
"{}",
|
|
"+",
|
|
])
|
|
.await;
|
|
|
|
// Preserve paths that are installed outside the Vue build
|
|
// (baked in by the ISO or sibling installers) and so
|
|
// aren't in the new tarball. Without this copy, every OTA
|
|
// wipes them — notably aiui/ (Claude Code sidebar) and
|
|
// the companion APK. `cp -a` preserves mode/ownership.
|
|
for preserved in ["aiui", "archipelago-companion.apk"] {
|
|
let src = format!("{}/{}", web_ui, preserved);
|
|
let dst = format!("{}/{}", staging_new, preserved);
|
|
// Only preserve the old copy if the new tarball
|
|
// doesn't already ship a fresher one.
|
|
if Path::new(&src).exists() && !Path::new(&dst).exists() {
|
|
let _ = host_sudo(&["cp", "-a", &src, &dst]).await;
|
|
}
|
|
}
|
|
|
|
// Swap: mv current web-ui aside, then mv new into place.
|
|
if Path::new(web_ui).exists() {
|
|
let mv_old = host_sudo(&["mv", web_ui, &staging_old])
|
|
.await
|
|
.context("Failed to rotate old web-ui")?;
|
|
if !mv_old.success() {
|
|
anyhow::bail!("failed to move old web-ui aside");
|
|
}
|
|
}
|
|
let mv_new = host_sudo(&["mv", &staging_new, web_ui])
|
|
.await
|
|
.context("Failed to swap new web-ui into place")?;
|
|
if !mv_new.success() {
|
|
if Path::new(&staging_old).exists() {
|
|
let _ = host_sudo(&["mv", &staging_old, web_ui]).await;
|
|
}
|
|
anyhow::bail!("failed to move new web-ui into place");
|
|
}
|
|
|
|
// Rotate previous rollback aside and install this apply's
|
|
// old copy as the new rollback.
|
|
if Path::new(&staging_old).exists() {
|
|
if Path::new(backup_path).exists() {
|
|
let _ = host_sudo(&["mv", backup_path, &format!("{}.{}", backup_path, ts)])
|
|
.await;
|
|
}
|
|
let _ = host_sudo(&["mv", &staging_old, backup_path]).await;
|
|
}
|
|
info!(name = %name, "Frontend archive extracted to /opt/archipelago/web-ui");
|
|
}
|
|
_ if name.contains("runtime") && name.ends_with(".tar.gz") => {
|
|
let ts = chrono::Utc::now().timestamp_millis();
|
|
let staging_new = format!("/opt/archipelago/runtime.new.{}", ts);
|
|
let archive = src.to_string_lossy().to_string();
|
|
|
|
let mk = host_sudo(&["mkdir", "-p", &staging_new])
|
|
.await
|
|
.context("Failed to create runtime staging dir")?;
|
|
if !mk.success() {
|
|
anyhow::bail!("mkdir {} failed", staging_new);
|
|
}
|
|
|
|
let extract = host_sudo(&["tar", "-xzf", &archive, "-C", &staging_new])
|
|
.await
|
|
.with_context(|| format!("Failed to extract {}", name))?;
|
|
if !extract.success() {
|
|
let _ = host_sudo(&["rm", "-rf", &staging_new]).await;
|
|
anyhow::bail!("tar extraction failed for {}", name);
|
|
}
|
|
|
|
let runtime_paths = [
|
|
("apps", "apps"),
|
|
("scripts", "scripts"),
|
|
("docker", "docker"),
|
|
(
|
|
"image-recipe/configs/archipelago-doctor.service",
|
|
"archipelago-doctor.service",
|
|
),
|
|
(
|
|
"image-recipe/configs/archipelago-doctor.timer",
|
|
"archipelago-doctor.timer",
|
|
),
|
|
];
|
|
|
|
for (relative, label) in runtime_paths {
|
|
let staged_path = format!("{}/{}", staging_new, relative);
|
|
if !Path::new(&staged_path).exists() {
|
|
tracing::debug!(path = %relative, "Runtime artifact path absent, skipping");
|
|
continue;
|
|
}
|
|
|
|
match label {
|
|
"apps" | "scripts" | "docker" => {
|
|
let dest = format!("/opt/archipelago/{}", label);
|
|
let tmp_dest =
|
|
format!("{}.new.{}", dest, chrono::Utc::now().timestamp_millis());
|
|
let _ = host_sudo(&["mkdir", "-p", &tmp_dest]).await;
|
|
let staged_dot = format!("{}/.", staged_path);
|
|
let copy = host_sudo(&["cp", "-a", &staged_dot, &tmp_dest])
|
|
.await
|
|
.with_context(|| format!("Failed to copy runtime {}", label))?;
|
|
if !copy.success() {
|
|
let _ = host_sudo(&["rm", "-rf", &tmp_dest]).await;
|
|
anyhow::bail!("runtime copy failed for {}", label);
|
|
}
|
|
let _ = host_sudo(&["mkdir", "-p", &dest]).await;
|
|
let clean = host_sudo(&[
|
|
"find",
|
|
&dest,
|
|
"-mindepth",
|
|
"1",
|
|
"-maxdepth",
|
|
"1",
|
|
"-exec",
|
|
"rm",
|
|
"-rf",
|
|
"{}",
|
|
"+",
|
|
])
|
|
.await
|
|
.with_context(|| format!("Failed to clean runtime {}", label))?;
|
|
if !clean.success() {
|
|
let _ = host_sudo(&["rm", "-rf", &tmp_dest]).await;
|
|
anyhow::bail!("runtime clean failed for {}", label);
|
|
}
|
|
let tmp_dot = format!("{}/.", tmp_dest);
|
|
let promote = host_sudo(&["cp", "-a", &tmp_dot, &dest])
|
|
.await
|
|
.with_context(|| format!("Failed to promote runtime {}", label))?;
|
|
let _ = host_sudo(&["rm", "-rf", &tmp_dest]).await;
|
|
if !promote.success() {
|
|
anyhow::bail!("runtime promote failed for {}", label);
|
|
}
|
|
if label == "scripts" {
|
|
let _ = host_sudo(&[
|
|
"find", &dest, "-type", "f", "-name", "*.sh", "-exec", "chmod",
|
|
"755", "{}", "+",
|
|
])
|
|
.await;
|
|
}
|
|
}
|
|
"archipelago-doctor.service" | "archipelago-doctor.timer" => {
|
|
let dest = format!("/etc/systemd/system/{}", label);
|
|
let install = host_sudo(&["install", "-m", "644", &staged_path, &dest])
|
|
.await
|
|
.with_context(|| format!("Failed to install {}", label))?;
|
|
if !install.success() {
|
|
anyhow::bail!("runtime unit install failed for {}", label);
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
if Path::new(&format!("{}/scripts/image-versions.sh", staging_new)).exists() {
|
|
let _ = host_sudo(&[
|
|
"cp",
|
|
&format!("{}/scripts/image-versions.sh", staging_new),
|
|
"/opt/archipelago/image-versions.sh",
|
|
])
|
|
.await;
|
|
}
|
|
|
|
let _ = host_sudo(&["systemctl", "daemon-reload"]).await;
|
|
let _ =
|
|
host_sudo(&["systemctl", "enable", "--now", "archipelago-doctor.timer"]).await;
|
|
let _ = host_sudo(&["rm", "-rf", &staging_new]).await;
|
|
info!(name = %name, "Runtime assets applied to /opt/archipelago");
|
|
}
|
|
_ => {
|
|
debug!(name = %name, "Unknown component, skipping");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update state
|
|
let previous_version = {
|
|
let state = load_state(data_dir).await?;
|
|
state.current_version.clone()
|
|
};
|
|
let mut state = load_state(data_dir).await?;
|
|
let new_version = if let Some(manifest) = &state.available_update {
|
|
state.current_version = manifest.version.clone();
|
|
manifest.version.clone()
|
|
} else {
|
|
state.current_version.clone()
|
|
};
|
|
state.available_update = None;
|
|
state.update_in_progress = false;
|
|
state.rollback_available = true;
|
|
save_state(data_dir, &state).await?;
|
|
|
|
// Write the post-OTA verification marker BEFORE we schedule the
|
|
// restart. The new binary will read it on startup, probe the
|
|
// frontend, and auto-rollback if nginx is serving 5xx. Covers the
|
|
// class of failure where "apply succeeds, restart succeeds, but
|
|
// the UI is dead" (v1.7.38/39 tarball-perms bug). Best-effort —
|
|
// a failed marker write shouldn't abort the apply.
|
|
let marker = PendingVerification {
|
|
applied_at: chrono::Utc::now().to_rfc3339(),
|
|
new_version,
|
|
previous_version,
|
|
deadline_ts: chrono::Utc::now().timestamp() + PENDING_VERIFY_WINDOW_SECS as i64 + 60,
|
|
};
|
|
if let Err(e) = write_pending_verification(data_dir, &marker).await {
|
|
tracing::warn!(error = %e, "Failed to write post-OTA verify marker — rollback disabled for this OTA");
|
|
} else {
|
|
info!("Post-OTA verify marker written; new binary will probe on boot");
|
|
}
|
|
|
|
// Clean staging
|
|
let _ = fs::remove_dir_all(&staging_dir).await;
|
|
|
|
info!("Update applied — scheduling service restart in 2s so the RPC reply lands first");
|
|
|
|
// Restart asynchronously so the JSON-RPC response actually reaches the
|
|
// UI before systemd kills us. --no-block makes sure systemctl doesn't
|
|
// try to wait for the current service (us) to exit cleanly before
|
|
// starting the new process — it would deadlock otherwise.
|
|
tokio::spawn(async {
|
|
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
|
|
// systemctl talks to PID 1 over D-Bus — doesn't need the host
|
|
// mount namespace, but routing through host_sudo keeps the
|
|
// apply flow's sudo calls uniform.
|
|
let _ = host_sudo(&["systemctl", "--no-block", "restart", "archipelago"]).await;
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Rollback to the previous version from backup.
|
|
pub async fn rollback_update(data_dir: &Path) -> Result<()> {
|
|
let backup_dir = data_dir.join("update-backup");
|
|
if !backup_dir.exists() {
|
|
anyhow::bail!("No rollback backup available");
|
|
}
|
|
|
|
let backup_binary = backup_dir.join("archipelago");
|
|
if backup_binary.exists() {
|
|
// Same two namespace gotchas as apply_update()'s binary swap:
|
|
// `cp` straight onto the running binary is O_TRUNC and fails
|
|
// ETXTBSY (exit 1 — exactly what broke the .116 rollback), and
|
|
// plain sudo inherits ProtectSystem=strict, so everything goes
|
|
// through host_sudo. Copy to a temp name on the same filesystem,
|
|
// fix ownership on the TEMP file (never the stored backup — an
|
|
// in-place chown is what later wedged apply_update), then mv,
|
|
// which is an atomic rename and tolerates a busy destination.
|
|
let backup_str = backup_binary.to_string_lossy().to_string();
|
|
let tmp = format!(
|
|
"/usr/local/bin/.archipelago.rollback.{}",
|
|
chrono::Utc::now().timestamp_millis()
|
|
);
|
|
let copy = host_sudo(&["cp", &backup_str, &tmp])
|
|
.await
|
|
.context("Failed to stage backup binary via host_sudo")?;
|
|
if !copy.success() {
|
|
anyhow::bail!(
|
|
"cp backup binary to {} failed (exit {:?})",
|
|
tmp,
|
|
copy.code()
|
|
);
|
|
}
|
|
let _ = host_sudo(&["chmod", "0755", &tmp]).await;
|
|
let _ = host_sudo(&["chown", "root:root", &tmp]).await;
|
|
let status = host_sudo(&["mv", &tmp, "/usr/local/bin/archipelago"])
|
|
.await
|
|
.context("Failed to restore backup binary via host_sudo")?;
|
|
if !status.success() {
|
|
let _ = host_sudo(&["rm", "-f", &tmp]).await;
|
|
anyhow::bail!(
|
|
"mv backup binary into /usr/local/bin failed (exit {:?})",
|
|
status.code()
|
|
);
|
|
}
|
|
info!("Binary rolled back to previous version");
|
|
}
|
|
|
|
let mut state = load_state(data_dir).await?;
|
|
state.rollback_available = false;
|
|
save_state(data_dir, &state).await?;
|
|
|
|
let _ = fs::remove_dir_all(&backup_dir).await;
|
|
|
|
info!("Rollback complete. Restart service to take effect.");
|
|
Ok(())
|
|
}
|
|
|
|
#[derive(Debug, Serialize, Deserialize)]
|
|
pub struct DownloadProgress {
|
|
pub total_bytes: u64,
|
|
pub downloaded_bytes: u64,
|
|
pub components_downloaded: usize,
|
|
pub staging_dir: String,
|
|
}
|
|
|
|
/// Set the update schedule preference.
|
|
pub async fn set_schedule(data_dir: &Path, schedule: UpdateSchedule) -> Result<()> {
|
|
let mut state = load_state(data_dir).await?;
|
|
state.schedule = schedule;
|
|
save_state(data_dir, &state).await?;
|
|
info!(schedule = ?schedule, "Update schedule changed");
|
|
Ok(())
|
|
}
|
|
|
|
/// Get the current schedule.
|
|
pub async fn get_schedule(data_dir: &Path) -> Result<UpdateSchedule> {
|
|
let state = load_state(data_dir).await?;
|
|
Ok(state.schedule)
|
|
}
|
|
|
|
/// Background update scheduler. Runs in a loop, checking/applying based on schedule.
|
|
/// Call this once at startup via `tokio::spawn`.
|
|
pub async fn run_update_scheduler(data_dir: std::path::PathBuf) {
|
|
use tokio::time::{interval, Duration};
|
|
|
|
// Check every hour; act based on schedule setting
|
|
let mut tick = interval(Duration::from_secs(3600));
|
|
|
|
// Refresh the app catalog once at startup so per-app "update available"
|
|
// badges appear without waiting for the first hourly tick.
|
|
if let Err(e) = crate::container::app_catalog::refresh_catalog(&data_dir).await {
|
|
debug!(
|
|
"Update scheduler: initial app-catalog refresh failed: {}",
|
|
e
|
|
);
|
|
}
|
|
|
|
loop {
|
|
tick.tick().await;
|
|
|
|
// App-catalog refresh is INDEPENDENT of the OTA schedule below: it only
|
|
// populates per-app update availability (the "Update" button still has
|
|
// to be clicked — nothing auto-applies). Best-effort; on failure the
|
|
// previously cached catalog stays in place (origin-always-wins).
|
|
if let Err(e) = crate::container::app_catalog::refresh_catalog(&data_dir).await {
|
|
debug!("Update scheduler: app-catalog refresh failed: {}", e);
|
|
}
|
|
|
|
let state = match load_state(&data_dir).await {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
debug!("Update scheduler: failed to load state: {}", e);
|
|
continue;
|
|
}
|
|
};
|
|
|
|
match state.schedule {
|
|
UpdateSchedule::Manual => {
|
|
debug!("Update scheduler: manual mode, skipping");
|
|
continue;
|
|
}
|
|
UpdateSchedule::DailyCheck => {
|
|
// Only check once per day
|
|
if let Some(ref last) = state.last_check {
|
|
if let Ok(last_time) = chrono::DateTime::parse_from_rfc3339(last) {
|
|
let elapsed = chrono::Utc::now() - last_time.with_timezone(&chrono::Utc);
|
|
if elapsed.num_hours() < 24 {
|
|
debug!("Update scheduler: checked recently, skipping");
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
info!("Update scheduler: running daily check");
|
|
if let Err(e) = check_for_updates(&data_dir).await {
|
|
debug!("Update scheduler: check failed: {}", e);
|
|
}
|
|
}
|
|
UpdateSchedule::AutoApply => {
|
|
// Auto-apply: check, download, and apply during 3 AM window
|
|
let hour = chrono::Local::now().hour();
|
|
if hour != 3 {
|
|
// Still do daily check outside the window
|
|
if let Some(ref last) = state.last_check {
|
|
if let Ok(last_time) = chrono::DateTime::parse_from_rfc3339(last) {
|
|
let elapsed =
|
|
chrono::Utc::now() - last_time.with_timezone(&chrono::Utc);
|
|
if elapsed.num_hours() < 24 {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
info!("Update scheduler: auto-apply check (outside window)");
|
|
if let Err(e) = check_for_updates(&data_dir).await {
|
|
debug!("Update scheduler: check failed: {}", e);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// 3 AM — check, download, and apply
|
|
info!("Update scheduler: 3 AM auto-apply window");
|
|
match check_for_updates(&data_dir).await {
|
|
Ok(s) if s.available_update.is_some() => {
|
|
info!("Update scheduler: downloading update");
|
|
if let Err(e) = download_update(&data_dir).await {
|
|
debug!("Update scheduler: download failed: {}", e);
|
|
continue;
|
|
}
|
|
info!("Update scheduler: applying update");
|
|
if let Err(e) = apply_update(&data_dir).await {
|
|
debug!("Update scheduler: apply failed: {}", e);
|
|
continue;
|
|
}
|
|
info!(
|
|
"Update scheduler: update applied, restart scheduled by apply_update"
|
|
);
|
|
// apply_update has already spawned a 2s-delayed
|
|
// `systemctl restart archipelago`. Don't call
|
|
// std::process::exit here — that kills the runtime
|
|
// before the spawned restart task runs, and since
|
|
// the unit is Restart=on-failure a clean exit(0)
|
|
// leaves the service dead. Fall through; the
|
|
// scheduled restart will bring us back cleanly.
|
|
return;
|
|
}
|
|
Ok(_) => {
|
|
debug!("Update scheduler: no update available");
|
|
}
|
|
Err(e) => {
|
|
debug!("Update scheduler: check failed: {}", e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_update_schedule_default_is_daily_check() {
|
|
let schedule = UpdateSchedule::default();
|
|
assert_eq!(schedule, UpdateSchedule::DailyCheck);
|
|
}
|
|
|
|
#[test]
|
|
fn test_manifest_origin_parses_https() {
|
|
assert_eq!(
|
|
manifest_origin(
|
|
"https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/manifest.json"
|
|
),
|
|
Some("https://git.tx1138.com".to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_manifest_origin_parses_http_with_port() {
|
|
assert_eq!(
|
|
manifest_origin(
|
|
"http://23.182.128.160:3000/lfg2025/archy/raw/branch/main/releases/manifest.json"
|
|
),
|
|
Some("http://23.182.128.160:3000".to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_manifest_origin_rejects_garbage() {
|
|
assert_eq!(manifest_origin("not a url"), None);
|
|
assert_eq!(manifest_origin("ftp://git.tx1138.com/x"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_rewrite_manifest_origins_swaps_all_components() {
|
|
let mut manifest = UpdateManifest {
|
|
version: "1.7.26-alpha".into(),
|
|
release_date: "2026-04-21".into(),
|
|
changelog: vec![],
|
|
components: vec![
|
|
ComponentUpdate {
|
|
name: "archipelago".into(),
|
|
current_version: "1.7.25-alpha".into(),
|
|
new_version: "1.7.26-alpha".into(),
|
|
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/archipelago".into(),
|
|
sha256: "x".into(),
|
|
size_bytes: 1,
|
|
blake3: None,
|
|
},
|
|
ComponentUpdate {
|
|
name: "frontend".into(),
|
|
current_version: "1.7.25-alpha".into(),
|
|
new_version: "1.7.26-alpha".into(),
|
|
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/frontend.tar.gz".into(),
|
|
sha256: "y".into(),
|
|
size_bytes: 2,
|
|
blake3: None,
|
|
},
|
|
],
|
|
};
|
|
rewrite_manifest_origins(
|
|
&mut manifest,
|
|
"http://23.182.128.160:3000/lfg2025/archy/raw/branch/main/releases/manifest.json",
|
|
);
|
|
assert_eq!(
|
|
manifest.components[0].download_url,
|
|
"http://23.182.128.160:3000/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/archipelago"
|
|
);
|
|
assert_eq!(
|
|
manifest.components[1].download_url,
|
|
"http://23.182.128.160:3000/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/frontend.tar.gz"
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_load_mirrors_returns_defaults_when_absent() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let list = load_mirrors(dir.path()).await.unwrap();
|
|
assert_eq!(list.len(), 1);
|
|
assert!(list[0].url.contains("146.59.87.168"));
|
|
assert!(
|
|
!list.iter().any(|m| m.url.contains("git.tx1138.com")),
|
|
"tx1138 was retired as a release server and must not be a default mirror"
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_load_mirrors_strips_retired_tx1138_mirror() {
|
|
// A node that was running before tx1138 was retired has it baked
|
|
// into its saved mirror list. load_mirrors must strip it on load.
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let saved = vec![
|
|
UpdateMirror {
|
|
url: DEFAULT_UPDATE_MANIFEST_URL.to_string(),
|
|
label: "Server 1 (OVH)".to_string(),
|
|
},
|
|
UpdateMirror {
|
|
url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/manifest.json"
|
|
.to_string(),
|
|
label: "Server 2 (tx1138)".to_string(),
|
|
},
|
|
];
|
|
save_mirrors(dir.path(), &saved).await.unwrap();
|
|
let list = load_mirrors(dir.path()).await.unwrap();
|
|
assert!(
|
|
!list.iter().any(|m| m.url.contains("git.tx1138.com")),
|
|
"retired tx1138 mirror should be stripped on load; got {:?}",
|
|
list
|
|
);
|
|
assert!(list.iter().any(|m| m.url.contains("146.59.87.168")));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_save_and_load_mirrors_roundtrip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let list = vec![UpdateMirror {
|
|
url: "https://example.com/m.json".into(),
|
|
label: "Example".into(),
|
|
}];
|
|
save_mirrors(dir.path(), &list).await.unwrap();
|
|
let back = load_mirrors(dir.path()).await.unwrap();
|
|
// load_mirrors merges in any missing default mirrors so a node
|
|
// that explicitly added a single custom mirror still gets the
|
|
// built-in OVH default. The custom mirror is preserved.
|
|
assert!(
|
|
back.iter().any(|m| m.url == "https://example.com/m.json"),
|
|
"custom mirror should round-trip; got {:?}",
|
|
back
|
|
);
|
|
for def in default_mirrors() {
|
|
assert!(
|
|
back.iter().any(|m| m.url == def.url),
|
|
"default mirror {} should be present after load; got {:?}",
|
|
def.url,
|
|
back
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_update_state_default_values() {
|
|
let state = UpdateState::default();
|
|
assert_eq!(state.current_version, env!("CARGO_PKG_VERSION"));
|
|
assert!(state.last_check.is_none());
|
|
assert!(state.available_update.is_none());
|
|
assert!(!state.update_in_progress);
|
|
assert!(!state.rollback_available);
|
|
assert_eq!(state.schedule, UpdateSchedule::DailyCheck);
|
|
}
|
|
|
|
#[test]
|
|
fn test_update_state_serialization_roundtrip() {
|
|
let state = UpdateState {
|
|
current_version: "0.2.0".to_string(),
|
|
last_check: Some("2025-01-01T00:00:00Z".to_string()),
|
|
available_update: None,
|
|
update_in_progress: false,
|
|
rollback_available: true,
|
|
schedule: UpdateSchedule::AutoApply,
|
|
manifest_mirror: None,
|
|
};
|
|
let json = serde_json::to_string(&state).unwrap();
|
|
let deserialized: UpdateState = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(deserialized.current_version, "0.2.0");
|
|
assert!(deserialized.rollback_available);
|
|
assert_eq!(deserialized.schedule, UpdateSchedule::AutoApply);
|
|
}
|
|
|
|
#[test]
|
|
fn test_update_schedule_serde_rename() {
|
|
let json = serde_json::to_string(&UpdateSchedule::DailyCheck).unwrap();
|
|
assert_eq!(json, "\"daily_check\"");
|
|
let json = serde_json::to_string(&UpdateSchedule::Manual).unwrap();
|
|
assert_eq!(json, "\"manual\"");
|
|
let json = serde_json::to_string(&UpdateSchedule::AutoApply).unwrap();
|
|
assert_eq!(json, "\"auto_apply\"");
|
|
}
|
|
|
|
#[test]
|
|
fn test_update_state_schedule_defaults_on_missing_field() {
|
|
// When schedule field is missing from JSON, it should default to DailyCheck
|
|
let json = r#"{
|
|
"current_version": "0.1.0",
|
|
"last_check": null,
|
|
"available_update": null,
|
|
"update_in_progress": false,
|
|
"rollback_available": false
|
|
}"#;
|
|
let state: UpdateState = serde_json::from_str(json).unwrap();
|
|
assert_eq!(state.schedule, UpdateSchedule::DailyCheck);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_version_triple() {
|
|
assert_eq!(parse_version_triple("1.7.18"), Some((1, 7, 18)));
|
|
assert_eq!(parse_version_triple("1.7.18-alpha"), Some((1, 7, 18)));
|
|
assert_eq!(parse_version_triple("0.0.1"), Some((0, 0, 1)));
|
|
assert_eq!(parse_version_triple("garbage"), None);
|
|
assert_eq!(parse_version_triple("1.2"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_newer() {
|
|
assert!(is_newer("1.7.19-alpha", "1.7.18-alpha"));
|
|
assert!(is_newer("1.8.0-alpha", "1.7.99-alpha"));
|
|
assert!(is_newer("1.7.10-alpha", "1.7.9-alpha")); // numeric, not lexical
|
|
assert!(!is_newer("1.7.18-alpha", "1.7.18-alpha"));
|
|
assert!(!is_newer("1.7.17-alpha", "1.7.18-alpha")); // would-be downgrade
|
|
assert!(!is_newer("1.7.9-alpha", "1.7.10-alpha"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_load_state_clears_stale_available_on_version_bump() {
|
|
// Simulates a sideload: state file on disk says we're on
|
|
// 1.7.16-alpha with 1.7.17-alpha staged as the pending update,
|
|
// but the running binary is 1.7.18-alpha (skipped a version).
|
|
// load_state must drop the stale available_update so the UI
|
|
// doesn't offer a downgrade.
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let stale = UpdateState {
|
|
current_version: "1.7.16-alpha".to_string(),
|
|
available_update: Some(UpdateManifest {
|
|
version: "1.7.17-alpha".to_string(),
|
|
release_date: "2026-04-20".to_string(),
|
|
changelog: vec![],
|
|
components: vec![],
|
|
}),
|
|
..UpdateState::default()
|
|
};
|
|
save_state(dir.path(), &stale).await.unwrap();
|
|
let loaded = load_state(dir.path()).await.unwrap();
|
|
assert_eq!(loaded.current_version, env!("CARGO_PKG_VERSION"));
|
|
assert!(
|
|
loaded.available_update.is_none(),
|
|
"stale available_update must be cleared after version bump"
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_load_state_creates_default_when_missing() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let state = load_state(dir.path()).await.unwrap();
|
|
assert_eq!(state.current_version, env!("CARGO_PKG_VERSION"));
|
|
assert!(!state.update_in_progress);
|
|
// File should now exist after load created the default
|
|
assert!(dir.path().join(UPDATE_STATE_FILE).exists());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_save_and_load_state_roundtrip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let staging = dir.path().join("update-staging");
|
|
tokio::fs::create_dir_all(&staging).await.unwrap();
|
|
tokio::fs::write(staging.join("archipelago"), b"staged")
|
|
.await
|
|
.unwrap();
|
|
// A *complete* staged update carries the .download-complete marker;
|
|
// without it has_staged_update() reads the staging as partial and the
|
|
// load_state self-heal clears update_in_progress (see #26). This test
|
|
// simulates a complete staging, so write the marker.
|
|
tokio::fs::write(staging.join(STAGED_COMPLETE_MARKER), b"1")
|
|
.await
|
|
.unwrap();
|
|
let state = UpdateState {
|
|
current_version: "1.0.0".to_string(),
|
|
last_check: Some("2025-06-15T12:00:00Z".to_string()),
|
|
available_update: Some(UpdateManifest {
|
|
version: "1.1.0".to_string(),
|
|
release_date: "2025-06-20".to_string(),
|
|
changelog: vec!["Fix bugs".to_string(), "New feature".to_string()],
|
|
components: vec![ComponentUpdate {
|
|
name: "archipelago".to_string(),
|
|
current_version: "1.0.0".to_string(),
|
|
new_version: "1.1.0".to_string(),
|
|
download_url: "https://example.com/binary".to_string(),
|
|
sha256: "abc123".to_string(),
|
|
size_bytes: 5000,
|
|
blake3: None,
|
|
}],
|
|
}),
|
|
update_in_progress: true,
|
|
rollback_available: false,
|
|
schedule: UpdateSchedule::Manual,
|
|
manifest_mirror: Some(
|
|
"https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/manifest.json"
|
|
.to_string(),
|
|
),
|
|
};
|
|
save_state(dir.path(), &state).await.unwrap();
|
|
let loaded = load_state(dir.path()).await.unwrap();
|
|
// load_state rewrites current_version to match the running
|
|
// binary (sideload self-heal), so don't assert on the saved
|
|
// value. The migration also clears available_update when the
|
|
// version changes — check the other fields survived.
|
|
assert_eq!(loaded.current_version, env!("CARGO_PKG_VERSION"));
|
|
assert!(loaded.update_in_progress);
|
|
assert_eq!(loaded.schedule, UpdateSchedule::Manual);
|
|
assert!(loaded.available_update.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_load_state_clears_stale_in_progress_without_staging() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let state = UpdateState {
|
|
update_in_progress: true,
|
|
..UpdateState::default()
|
|
};
|
|
save_state(dir.path(), &state).await.unwrap();
|
|
|
|
let loaded = load_state(dir.path()).await.unwrap();
|
|
|
|
assert!(!loaded.update_in_progress);
|
|
let persisted = load_state(dir.path()).await.unwrap();
|
|
assert!(!persisted.update_in_progress);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_dismiss_update_clears_available() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let state = UpdateState {
|
|
available_update: Some(UpdateManifest {
|
|
version: "2.0.0".to_string(),
|
|
release_date: "2025-07-01".to_string(),
|
|
changelog: vec![],
|
|
components: vec![],
|
|
}),
|
|
..UpdateState::default()
|
|
};
|
|
save_state(dir.path(), &state).await.unwrap();
|
|
dismiss_update(dir.path()).await.unwrap();
|
|
let loaded = load_state(dir.path()).await.unwrap();
|
|
assert!(loaded.available_update.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_set_and_get_schedule() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
// Initialize state
|
|
let _ = load_state(dir.path()).await.unwrap();
|
|
|
|
set_schedule(dir.path(), UpdateSchedule::AutoApply)
|
|
.await
|
|
.unwrap();
|
|
let schedule = get_schedule(dir.path()).await.unwrap();
|
|
assert_eq!(schedule, UpdateSchedule::AutoApply);
|
|
|
|
set_schedule(dir.path(), UpdateSchedule::Manual)
|
|
.await
|
|
.unwrap();
|
|
let schedule = get_schedule(dir.path()).await.unwrap();
|
|
assert_eq!(schedule, UpdateSchedule::Manual);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_get_status_returns_current_state() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let state = UpdateState {
|
|
current_version: "3.0.0".to_string(),
|
|
rollback_available: true,
|
|
..UpdateState::default()
|
|
};
|
|
save_state(dir.path(), &state).await.unwrap();
|
|
let status = get_status(dir.path()).await.unwrap();
|
|
// get_status → load_state, which rewrites current_version to
|
|
// match the running binary (see the sideload-self-heal path).
|
|
assert_eq!(status.current_version, env!("CARGO_PKG_VERSION"));
|
|
assert!(status.rollback_available);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_pending_verification_round_trip() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
let marker = PendingVerification {
|
|
applied_at: chrono::Utc::now().to_rfc3339(),
|
|
new_version: "1.7.41-alpha".into(),
|
|
previous_version: "1.7.40-alpha".into(),
|
|
deadline_ts: chrono::Utc::now().timestamp() + 150,
|
|
};
|
|
write_pending_verification(dir.path(), &marker)
|
|
.await
|
|
.unwrap();
|
|
let read = read_pending_verification(dir.path()).await.unwrap();
|
|
assert_eq!(read.new_version, "1.7.41-alpha");
|
|
assert_eq!(read.previous_version, "1.7.40-alpha");
|
|
clear_pending_verification(dir.path()).await;
|
|
assert!(read_pending_verification(dir.path()).await.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_pending_verification_absent_is_none() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
assert!(read_pending_verification(dir.path()).await.is_none());
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_verify_pending_update_noop_without_marker() {
|
|
let dir = tempfile::tempdir().unwrap();
|
|
// No marker written -- must return quickly without doing anything
|
|
// risky (network probes, rollback calls). We're just asserting
|
|
// it doesn't panic or hang.
|
|
verify_pending_update(dir.path()).await;
|
|
}
|
|
|
|
#[test]
|
|
fn test_pending_verify_constants_are_sensible() {
|
|
// Window must be generous enough for nginx + backend startup,
|
|
// but less than the stale-marker threshold so a normal cycle
|
|
// can complete without the marker being considered stale.
|
|
assert!(PENDING_VERIFY_WINDOW_SECS < PENDING_VERIFY_MAX_AGE_SECS as u64);
|
|
assert!(PENDING_VERIFY_WINDOW_SECS >= 60);
|
|
}
|
|
}
|