feat(dht): Phase 1 — BLAKE3 content addressing alongside SHA-256
Adds the iroh-native, range-verifiable hash next to the incumbent SHA-256 so the swarm can later fetch/verify by BLAKE3 with the registry/origin as fallback. Non-breaking: SHA-256 stays the mandatory gate; BLAKE3 is verified only when present. - content_hash.rs: HashAlg + ContentDigest (parse/verify '<alg>:<hex>' multihash strings), blake3_hex/sha256_hex; BLAKE3 known-answer test - update.rs: ComponentUpdate.blake3 (serde-default); verified ALONGSIDE SHA-256 in the resumable download loop, re-download on mismatch - blobs.rs: BlobMeta.blake3 computed on put (on-disk path stays SHA-256-keyed for back-compat; advertises the future swarm address) Drive-by: fix a pre-existing stale test (test_save_and_load_state_roundtrip) that never wrote the .download-complete marker #26 requires, so load_state's self-heal cleared update_in_progress. Unrelated to BLAKE3 — surfaced by running the full update:: suite. 40/40 content_hash/update/blobs tests pass. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
27f11bf85a
commit
f0cb91ed76
56
core/Cargo.lock
generated
56
core/Cargo.lock
generated
@ -26,7 +26,7 @@ checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -92,6 +92,7 @@ dependencies = [
|
||||
"bcrypt",
|
||||
"bip39",
|
||||
"bitcoin",
|
||||
"blake3",
|
||||
"bs58",
|
||||
"bytes",
|
||||
"chacha20poly1305",
|
||||
@ -202,10 +203,16 @@ checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072"
|
||||
dependencies = [
|
||||
"base64ct",
|
||||
"blake2",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"password-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.6"
|
||||
@ -424,6 +431,20 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq 0.4.2",
|
||||
"cpufeatures 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
@ -524,7 +545,7 @@ checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cipher",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -604,6 +625,12 @@ version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
@ -629,6 +656,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc"
|
||||
version = "3.4.0"
|
||||
@ -686,7 +722,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"curve25519-dalek-derive",
|
||||
"digest",
|
||||
"fiat-crypto",
|
||||
@ -1921,7 +1957,7 @@ version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf"
|
||||
dependencies = [
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"opaque-debug",
|
||||
"universal-hash",
|
||||
]
|
||||
@ -1933,7 +1969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"opaque-debug",
|
||||
"universal-hash",
|
||||
]
|
||||
@ -2478,7 +2514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"digest",
|
||||
]
|
||||
|
||||
@ -2489,7 +2525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"digest",
|
||||
]
|
||||
|
||||
@ -2506,7 +2542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"cpufeatures",
|
||||
"cpufeatures 0.2.17",
|
||||
"digest",
|
||||
]
|
||||
|
||||
@ -2972,7 +3008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f124352108f58ef88299e909f6e9470f1cdc8d2a1397963901b4a6366206bf72"
|
||||
dependencies = [
|
||||
"base32",
|
||||
"constant_time_eq",
|
||||
"constant_time_eq 0.3.1",
|
||||
"hmac",
|
||||
"rand 0.9.2",
|
||||
"sha1",
|
||||
|
||||
@ -42,6 +42,7 @@ archipelago-performance = { path = "../performance" }
|
||||
# Authentication
|
||||
bcrypt = "0.15"
|
||||
sha2 = "0.10.9"
|
||||
blake3 = "1"
|
||||
hmac = "0.12.1"
|
||||
uuid = { version = "1.0", features = ["v4"] }
|
||||
regex = "1.10"
|
||||
|
||||
@ -25,6 +25,12 @@ pub const MAX_BLOB_SIZE: u64 = 64 * 1024 * 1024;
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BlobMeta {
|
||||
pub cid: String,
|
||||
/// DHT Phase 1: BLAKE3 hash of the content (iroh-native swarm address).
|
||||
/// The on-disk path stays SHA-256-keyed (`cid`) for back-compat; this
|
||||
/// advertises the hash a peer swarm can fetch/range-verify by. Absent in
|
||||
/// legacy metadata written before Phase 1.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub blake3: Option<String>,
|
||||
pub size: u64,
|
||||
pub mime: String,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
@ -88,6 +94,7 @@ impl BlobStore {
|
||||
let cid = hex::encode(hasher.finalize());
|
||||
let meta = BlobMeta {
|
||||
cid: cid.clone(),
|
||||
blake3: Some(crate::content_hash::blake3_hex(bytes)),
|
||||
size: bytes.len() as u64,
|
||||
mime: mime.to_string(),
|
||||
filename,
|
||||
|
||||
149
core/archipelago/src/content_hash.rs
Normal file
149
core/archipelago/src/content_hash.rs
Normal file
@ -0,0 +1,149 @@
|
||||
//! Content hashing for the DHT distribution plan's *integrity & addressing*
|
||||
//! tier (`docs/dht-distribution-design.md` §4).
|
||||
//!
|
||||
//! SHA-256 is the incumbent: it keys `blobs.rs` and verifies OTA components
|
||||
//! today. BLAKE3 is introduced **alongside** it because iroh-blobs addresses
|
||||
//! and *range-verifies* content by BLAKE3 — essential for resumable downloads
|
||||
//! and HLS streaming. During the migration window both may be present; SHA-256
|
||||
//! stays mandatory and BLAKE3 is verified when supplied.
|
||||
//!
|
||||
//! Digests are written multihash-style as `"<alg>:<hex>"`, e.g.
|
||||
//! `"blake3:ab12…"` / `"sha256:cd34…"`, matching the app-catalog `digest` field.
|
||||
//! Both algorithms emit 32-byte (64-hex-char) digests.
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
const DIGEST_LEN: usize = 32;
|
||||
|
||||
/// Supported content-hash algorithms.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum HashAlg {
|
||||
Sha256,
|
||||
Blake3,
|
||||
}
|
||||
|
||||
impl HashAlg {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
HashAlg::Sha256 => "sha256",
|
||||
HashAlg::Blake3 => "blake3",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hex-encoded SHA-256 of `bytes`.
|
||||
pub fn sha256_hex(bytes: &[u8]) -> String {
|
||||
hex::encode(Sha256::digest(bytes))
|
||||
}
|
||||
|
||||
/// Hex-encoded BLAKE3 of `bytes`.
|
||||
pub fn blake3_hex(bytes: &[u8]) -> String {
|
||||
blake3::hash(bytes).to_hex().to_string()
|
||||
}
|
||||
|
||||
/// A parsed `"<alg>:<hex>"` content digest.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ContentDigest {
|
||||
pub alg: HashAlg,
|
||||
/// Lowercase hex, validated to the algorithm's length.
|
||||
pub hex: String,
|
||||
}
|
||||
|
||||
impl ContentDigest {
|
||||
/// Parse a multihash-style `"<alg>:<hex>"` string.
|
||||
pub fn parse(s: &str) -> Result<Self> {
|
||||
let (alg_part, hex_part) = s
|
||||
.split_once(':')
|
||||
.ok_or_else(|| anyhow!("digest must be '<alg>:<hex>', got: {}", s))?;
|
||||
let alg = match alg_part {
|
||||
"sha256" => HashAlg::Sha256,
|
||||
"blake3" => HashAlg::Blake3,
|
||||
other => bail!("unsupported hash algorithm: {}", other),
|
||||
};
|
||||
let raw = hex::decode(hex_part).context("digest hex is invalid")?;
|
||||
if raw.len() != DIGEST_LEN {
|
||||
bail!(
|
||||
"{} digest must be {} bytes, got {}",
|
||||
alg.as_str(),
|
||||
DIGEST_LEN,
|
||||
raw.len()
|
||||
);
|
||||
}
|
||||
Ok(Self {
|
||||
alg,
|
||||
hex: hex_part.to_ascii_lowercase(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute the digest of `bytes` under this digest's algorithm.
|
||||
pub fn compute_hex(&self, bytes: &[u8]) -> String {
|
||||
match self.alg {
|
||||
HashAlg::Sha256 => sha256_hex(bytes),
|
||||
HashAlg::Blake3 => blake3_hex(bytes),
|
||||
}
|
||||
}
|
||||
|
||||
/// Verify `bytes` hash to this digest. Errors (does not panic) on mismatch.
|
||||
pub fn verify(&self, bytes: &[u8]) -> Result<()> {
|
||||
let actual = self.compute_hex(bytes);
|
||||
if actual.eq_ignore_ascii_case(&self.hex) {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(
|
||||
"{} mismatch: expected {}, got {}",
|
||||
self.alg.as_str(),
|
||||
self.hex,
|
||||
actual
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ContentDigest {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}:{}", self.alg.as_str(), self.hex)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn digest_lengths_are_32_bytes() {
|
||||
assert_eq!(sha256_hex(b"hi").len(), 64);
|
||||
assert_eq!(blake3_hex(b"hi").len(), 64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blake3_known_answer() {
|
||||
// BLAKE3 of the empty input — RFC/reference vector.
|
||||
assert_eq!(
|
||||
blake3_hex(b""),
|
||||
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_roundtrip() {
|
||||
let d = ContentDigest::parse(&format!("blake3:{}", blake3_hex(b"x"))).unwrap();
|
||||
assert_eq!(d.alg, HashAlg::Blake3);
|
||||
assert_eq!(d.to_string(), format!("blake3:{}", blake3_hex(b"x")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_accepts_and_rejects() {
|
||||
let d = ContentDigest::parse(&format!("sha256:{}", sha256_hex(b"payload"))).unwrap();
|
||||
assert!(d.verify(b"payload").is_ok());
|
||||
assert!(d.verify(b"tampered").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_rejects_bad_input() {
|
||||
assert!(ContentDigest::parse("nocolon").is_err());
|
||||
assert!(ContentDigest::parse("md5:abcd").is_err());
|
||||
assert!(ContentDigest::parse("blake3:nothex").is_err());
|
||||
assert!(ContentDigest::parse("blake3:ab").is_err()); // too short
|
||||
}
|
||||
}
|
||||
@ -36,6 +36,7 @@ mod bootstrap;
|
||||
mod config;
|
||||
mod constants;
|
||||
mod container;
|
||||
mod content_hash;
|
||||
mod content_server;
|
||||
mod crash_recovery;
|
||||
mod credentials;
|
||||
|
||||
@ -263,6 +263,11 @@ pub struct ComponentUpdate {
|
||||
pub download_url: String,
|
||||
pub sha256: String,
|
||||
pub size_bytes: u64,
|
||||
/// DHT Phase 1: BLAKE3 content address (bare hex or `"blake3:<hex>"`), the
|
||||
/// iroh-native, range-verifiable hash. Optional during the migration
|
||||
/// window — when present it is verified ALONGSIDE the mandatory SHA-256.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub blake3: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@ -997,6 +1002,25 @@ async fn download_component_resumable(
|
||||
.context("read staging file for hash check")?;
|
||||
let hash = hex::encode(Sha256::digest(&bytes));
|
||||
if hash == component.sha256 {
|
||||
// DHT Phase 1: if the manifest also pins a BLAKE3 digest, it must
|
||||
// match too. SHA-256 stays the mandatory gate during migration;
|
||||
// BLAKE3 is the hash the iroh swarm will fetch/verify by, so a
|
||||
// present-but-wrong BLAKE3 means the bytes aren't swarm-consistent
|
||||
// — treat it like a SHA mismatch and re-download.
|
||||
if let Some(b3) = component.blake3.as_deref() {
|
||||
let expected = b3.trim().strip_prefix("blake3:").unwrap_or(b3.trim());
|
||||
let actual = crate::content_hash::blake3_hex(&bytes);
|
||||
if !actual.eq_ignore_ascii_case(expected) {
|
||||
let _ = tokio::fs::remove_file(dest).await;
|
||||
last_err = Some(anyhow::anyhow!(
|
||||
"BLAKE3 mismatch for {}: expected {}, got {}",
|
||||
component.name,
|
||||
expected,
|
||||
actual
|
||||
));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
// SHA mismatch — the file on disk is garbage. Nuke it and
|
||||
@ -1679,6 +1703,7 @@ mod tests {
|
||||
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/archipelago".into(),
|
||||
sha256: "x".into(),
|
||||
size_bytes: 1,
|
||||
blake3: None,
|
||||
},
|
||||
ComponentUpdate {
|
||||
name: "frontend".into(),
|
||||
@ -1687,6 +1712,7 @@ mod tests {
|
||||
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/frontend.tar.gz".into(),
|
||||
sha256: "y".into(),
|
||||
size_bytes: 2,
|
||||
blake3: None,
|
||||
},
|
||||
],
|
||||
};
|
||||
@ -1886,6 +1912,13 @@ mod tests {
|
||||
tokio::fs::write(staging.join("archipelago"), b"staged")
|
||||
.await
|
||||
.unwrap();
|
||||
// A *complete* staged update carries the .download-complete marker;
|
||||
// without it has_staged_update() reads the staging as partial and the
|
||||
// load_state self-heal clears update_in_progress (see #26). This test
|
||||
// simulates a complete staging, so write the marker.
|
||||
tokio::fs::write(staging.join(STAGED_COMPLETE_MARKER), b"1")
|
||||
.await
|
||||
.unwrap();
|
||||
let state = UpdateState {
|
||||
current_version: "1.0.0".to_string(),
|
||||
last_check: Some("2025-06-15T12:00:00Z".to_string()),
|
||||
@ -1900,6 +1933,7 @@ mod tests {
|
||||
download_url: "https://example.com/binary".to_string(),
|
||||
sha256: "abc123".to_string(),
|
||||
size_bytes: 5000,
|
||||
blake3: None,
|
||||
}],
|
||||
}),
|
||||
update_in_progress: true,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user