diff --git a/core/Cargo.lock b/core/Cargo.lock index b1d65f8f..96170111 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -26,7 +26,7 @@ checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" dependencies = [ "cfg-if", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -92,6 +92,7 @@ dependencies = [ "bcrypt", "bip39", "bitcoin", + "blake3", "bs58", "bytes", "chacha20poly1305", @@ -202,10 +203,16 @@ checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" dependencies = [ "base64ct", "blake2", - "cpufeatures", + "cpufeatures 0.2.17", "password-hash", ] +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + [[package]] name = "arrayvec" version = "0.7.6" @@ -424,6 +431,20 @@ dependencies = [ "digest", ] +[[package]] +name = "blake3" +version = "1.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq 0.4.2", + "cpufeatures 0.3.0", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -524,7 +545,7 @@ checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818" dependencies = [ "cfg-if", "cipher", - "cpufeatures", + "cpufeatures 0.2.17", ] [[package]] @@ -604,6 +625,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "core-foundation" version = "0.9.4" @@ -629,6 +656,15 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crc" version = "3.4.0" @@ -686,7 +722,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "curve25519-dalek-derive", "digest", "fiat-crypto", @@ -1921,7 +1957,7 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf" dependencies = [ - "cpufeatures", + "cpufeatures 0.2.17", "opaque-debug", "universal-hash", ] @@ -1933,7 +1969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "opaque-debug", "universal-hash", ] @@ -2478,7 +2514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -2489,7 +2525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -2506,7 +2542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -2972,7 +3008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f124352108f58ef88299e909f6e9470f1cdc8d2a1397963901b4a6366206bf72" dependencies = [ "base32", - "constant_time_eq", + "constant_time_eq 0.3.1", "hmac", "rand 0.9.2", "sha1", diff --git a/core/archipelago/Cargo.toml b/core/archipelago/Cargo.toml index ee7e74f0..f8613089 100644 --- a/core/archipelago/Cargo.toml +++ b/core/archipelago/Cargo.toml @@ -42,6 +42,7 @@ archipelago-performance = { path = "../performance" } # Authentication bcrypt = "0.15" sha2 = "0.10.9" +blake3 = "1" hmac = "0.12.1" uuid = { version = "1.0", features = ["v4"] } regex = "1.10" diff --git a/core/archipelago/src/blobs.rs b/core/archipelago/src/blobs.rs index df17e993..1df52231 100644 --- a/core/archipelago/src/blobs.rs +++ b/core/archipelago/src/blobs.rs @@ -25,6 +25,12 @@ pub const MAX_BLOB_SIZE: u64 = 64 * 1024 * 1024; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BlobMeta { pub cid: String, + /// DHT Phase 1: BLAKE3 hash of the content (iroh-native swarm address). + /// The on-disk path stays SHA-256-keyed (`cid`) for back-compat; this + /// advertises the hash a peer swarm can fetch/range-verify by. Absent in + /// legacy metadata written before Phase 1. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub blake3: Option, pub size: u64, pub mime: String, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -88,6 +94,7 @@ impl BlobStore { let cid = hex::encode(hasher.finalize()); let meta = BlobMeta { cid: cid.clone(), + blake3: Some(crate::content_hash::blake3_hex(bytes)), size: bytes.len() as u64, mime: mime.to_string(), filename, diff --git a/core/archipelago/src/content_hash.rs b/core/archipelago/src/content_hash.rs new file mode 100644 index 00000000..69a4b8bb --- /dev/null +++ b/core/archipelago/src/content_hash.rs @@ -0,0 +1,149 @@ +//! Content hashing for the DHT distribution plan's *integrity & addressing* +//! tier (`docs/dht-distribution-design.md` §4). +//! +//! SHA-256 is the incumbent: it keys `blobs.rs` and verifies OTA components +//! today. BLAKE3 is introduced **alongside** it because iroh-blobs addresses +//! and *range-verifies* content by BLAKE3 — essential for resumable downloads +//! and HLS streaming. During the migration window both may be present; SHA-256 +//! stays mandatory and BLAKE3 is verified when supplied. +//! +//! Digests are written multihash-style as `":"`, e.g. +//! `"blake3:ab12…"` / `"sha256:cd34…"`, matching the app-catalog `digest` field. +//! Both algorithms emit 32-byte (64-hex-char) digests. + +use anyhow::{anyhow, bail, Context, Result}; +use sha2::{Digest, Sha256}; + +const DIGEST_LEN: usize = 32; + +/// Supported content-hash algorithms. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HashAlg { + Sha256, + Blake3, +} + +impl HashAlg { + pub fn as_str(self) -> &'static str { + match self { + HashAlg::Sha256 => "sha256", + HashAlg::Blake3 => "blake3", + } + } +} + +/// Hex-encoded SHA-256 of `bytes`. +pub fn sha256_hex(bytes: &[u8]) -> String { + hex::encode(Sha256::digest(bytes)) +} + +/// Hex-encoded BLAKE3 of `bytes`. +pub fn blake3_hex(bytes: &[u8]) -> String { + blake3::hash(bytes).to_hex().to_string() +} + +/// A parsed `":"` content digest. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ContentDigest { + pub alg: HashAlg, + /// Lowercase hex, validated to the algorithm's length. + pub hex: String, +} + +impl ContentDigest { + /// Parse a multihash-style `":"` string. + pub fn parse(s: &str) -> Result { + let (alg_part, hex_part) = s + .split_once(':') + .ok_or_else(|| anyhow!("digest must be ':', got: {}", s))?; + let alg = match alg_part { + "sha256" => HashAlg::Sha256, + "blake3" => HashAlg::Blake3, + other => bail!("unsupported hash algorithm: {}", other), + }; + let raw = hex::decode(hex_part).context("digest hex is invalid")?; + if raw.len() != DIGEST_LEN { + bail!( + "{} digest must be {} bytes, got {}", + alg.as_str(), + DIGEST_LEN, + raw.len() + ); + } + Ok(Self { + alg, + hex: hex_part.to_ascii_lowercase(), + }) + } + + /// Compute the digest of `bytes` under this digest's algorithm. + pub fn compute_hex(&self, bytes: &[u8]) -> String { + match self.alg { + HashAlg::Sha256 => sha256_hex(bytes), + HashAlg::Blake3 => blake3_hex(bytes), + } + } + + /// Verify `bytes` hash to this digest. Errors (does not panic) on mismatch. + pub fn verify(&self, bytes: &[u8]) -> Result<()> { + let actual = self.compute_hex(bytes); + if actual.eq_ignore_ascii_case(&self.hex) { + Ok(()) + } else { + bail!( + "{} mismatch: expected {}, got {}", + self.alg.as_str(), + self.hex, + actual + ) + } + } +} + +impl std::fmt::Display for ContentDigest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{}", self.alg.as_str(), self.hex) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn digest_lengths_are_32_bytes() { + assert_eq!(sha256_hex(b"hi").len(), 64); + assert_eq!(blake3_hex(b"hi").len(), 64); + } + + #[test] + fn blake3_known_answer() { + // BLAKE3 of the empty input — RFC/reference vector. + assert_eq!( + blake3_hex(b""), + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262" + ); + } + + #[test] + fn parse_roundtrip() { + let d = ContentDigest::parse(&format!("blake3:{}", blake3_hex(b"x"))).unwrap(); + assert_eq!(d.alg, HashAlg::Blake3); + assert_eq!(d.to_string(), format!("blake3:{}", blake3_hex(b"x"))); + } + + #[test] + fn verify_accepts_and_rejects() { + let d = ContentDigest::parse(&format!("sha256:{}", sha256_hex(b"payload"))).unwrap(); + assert!(d.verify(b"payload").is_ok()); + assert!(d.verify(b"tampered").is_err()); + } + + #[test] + fn parse_rejects_bad_input() { + assert!(ContentDigest::parse("nocolon").is_err()); + assert!(ContentDigest::parse("md5:abcd").is_err()); + assert!(ContentDigest::parse("blake3:nothex").is_err()); + assert!(ContentDigest::parse("blake3:ab").is_err()); // too short + } +} diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index be620245..cbfdfdb5 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -36,6 +36,7 @@ mod bootstrap; mod config; mod constants; mod container; +mod content_hash; mod content_server; mod crash_recovery; mod credentials; diff --git a/core/archipelago/src/update.rs b/core/archipelago/src/update.rs index c1601123..0d88b8c0 100644 --- a/core/archipelago/src/update.rs +++ b/core/archipelago/src/update.rs @@ -263,6 +263,11 @@ pub struct ComponentUpdate { pub download_url: String, pub sha256: String, pub size_bytes: u64, + /// DHT Phase 1: BLAKE3 content address (bare hex or `"blake3:"`), the + /// iroh-native, range-verifiable hash. Optional during the migration + /// window — when present it is verified ALONGSIDE the mandatory SHA-256. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub blake3: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -997,6 +1002,25 @@ async fn download_component_resumable( .context("read staging file for hash check")?; let hash = hex::encode(Sha256::digest(&bytes)); if hash == component.sha256 { + // DHT Phase 1: if the manifest also pins a BLAKE3 digest, it must + // match too. SHA-256 stays the mandatory gate during migration; + // BLAKE3 is the hash the iroh swarm will fetch/verify by, so a + // present-but-wrong BLAKE3 means the bytes aren't swarm-consistent + // — treat it like a SHA mismatch and re-download. + if let Some(b3) = component.blake3.as_deref() { + let expected = b3.trim().strip_prefix("blake3:").unwrap_or(b3.trim()); + let actual = crate::content_hash::blake3_hex(&bytes); + if !actual.eq_ignore_ascii_case(expected) { + let _ = tokio::fs::remove_file(dest).await; + last_err = Some(anyhow::anyhow!( + "BLAKE3 mismatch for {}: expected {}, got {}", + component.name, + expected, + actual + )); + continue; + } + } return Ok(()); } // SHA mismatch — the file on disk is garbage. Nuke it and @@ -1679,6 +1703,7 @@ mod tests { download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/archipelago".into(), sha256: "x".into(), size_bytes: 1, + blake3: None, }, ComponentUpdate { name: "frontend".into(), @@ -1687,6 +1712,7 @@ mod tests { download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/frontend.tar.gz".into(), sha256: "y".into(), size_bytes: 2, + blake3: None, }, ], }; @@ -1886,6 +1912,13 @@ mod tests { tokio::fs::write(staging.join("archipelago"), b"staged") .await .unwrap(); + // A *complete* staged update carries the .download-complete marker; + // without it has_staged_update() reads the staging as partial and the + // load_state self-heal clears update_in_progress (see #26). This test + // simulates a complete staging, so write the marker. + tokio::fs::write(staging.join(STAGED_COMPLETE_MARKER), b"1") + .await + .unwrap(); let state = UpdateState { current_version: "1.0.0".to_string(), last_check: Some("2025-06-15T12:00:00Z".to_string()), @@ -1900,6 +1933,7 @@ mod tests { download_url: "https://example.com/binary".to_string(), sha256: "abc123".to_string(), size_bytes: 5000, + blake3: None, }], }), update_in_progress: true,