feat(dht): Phase 1 — BLAKE3 content addressing alongside SHA-256

Adds the iroh-native, range-verifiable hash next to the incumbent SHA-256
so the swarm can later fetch/verify by BLAKE3 with the registry/origin as
fallback. Non-breaking: SHA-256 stays the mandatory gate; BLAKE3 is verified
only when present.

- content_hash.rs: HashAlg + ContentDigest (parse/verify '<alg>:<hex>'
  multihash strings), blake3_hex/sha256_hex; BLAKE3 known-answer test
- update.rs: ComponentUpdate.blake3 (serde-default); verified ALONGSIDE
  SHA-256 in the resumable download loop, re-download on mismatch
- blobs.rs: BlobMeta.blake3 computed on put (on-disk path stays
  SHA-256-keyed for back-compat; advertises the future swarm address)

Drive-by: fix a pre-existing stale test (test_save_and_load_state_roundtrip)
that never wrote the .download-complete marker #26 requires, so load_state's
self-heal cleared update_in_progress. Unrelated to BLAKE3 — surfaced by
running the full update:: suite.

40/40 content_hash/update/blobs tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
archipelago 2026-06-16 13:05:27 -04:00
parent 27f11bf85a
commit f0cb91ed76
6 changed files with 238 additions and 10 deletions

56
core/Cargo.lock generated
View File

@ -26,7 +26,7 @@ checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
"cpufeatures 0.2.17",
]
[[package]]
@ -92,6 +92,7 @@ dependencies = [
"bcrypt",
"bip39",
"bitcoin",
"blake3",
"bs58",
"bytes",
"chacha20poly1305",
@ -202,10 +203,16 @@ checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072"
dependencies = [
"base64ct",
"blake2",
"cpufeatures",
"cpufeatures 0.2.17",
"password-hash",
]
[[package]]
name = "arrayref"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
[[package]]
name = "arrayvec"
version = "0.7.6"
@ -424,6 +431,20 @@ dependencies = [
"digest",
]
[[package]]
name = "blake3"
version = "1.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce"
dependencies = [
"arrayref",
"arrayvec",
"cc",
"cfg-if",
"constant_time_eq 0.4.2",
"cpufeatures 0.3.0",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -524,7 +545,7 @@ checksum = "c3613f74bd2eac03dad61bd53dbe620703d4371614fe0bc3b9f04dd36fe4e818"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
"cpufeatures 0.2.17",
]
[[package]]
@ -604,6 +625,12 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "constant_time_eq"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b"
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -629,6 +656,15 @@ dependencies = [
"libc",
]
[[package]]
name = "cpufeatures"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201"
dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.4.0"
@ -686,7 +722,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"curve25519-dalek-derive",
"digest",
"fiat-crypto",
@ -1921,7 +1957,7 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8159bd90725d2df49889a078b54f4f79e87f1f8a8444194cdca81d38f5393abf"
dependencies = [
"cpufeatures",
"cpufeatures 0.2.17",
"opaque-debug",
"universal-hash",
]
@ -1933,7 +1969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"opaque-debug",
"universal-hash",
]
@ -2478,7 +2514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"digest",
]
@ -2489,7 +2525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"digest",
]
@ -2506,7 +2542,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"cpufeatures 0.2.17",
"digest",
]
@ -2972,7 +3008,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f124352108f58ef88299e909f6e9470f1cdc8d2a1397963901b4a6366206bf72"
dependencies = [
"base32",
"constant_time_eq",
"constant_time_eq 0.3.1",
"hmac",
"rand 0.9.2",
"sha1",

View File

@ -42,6 +42,7 @@ archipelago-performance = { path = "../performance" }
# Authentication
bcrypt = "0.15"
sha2 = "0.10.9"
blake3 = "1"
hmac = "0.12.1"
uuid = { version = "1.0", features = ["v4"] }
regex = "1.10"

View File

@ -25,6 +25,12 @@ pub const MAX_BLOB_SIZE: u64 = 64 * 1024 * 1024;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlobMeta {
pub cid: String,
/// DHT Phase 1: BLAKE3 hash of the content (iroh-native swarm address).
/// The on-disk path stays SHA-256-keyed (`cid`) for back-compat; this
/// advertises the hash a peer swarm can fetch/range-verify by. Absent in
/// legacy metadata written before Phase 1.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub blake3: Option<String>,
pub size: u64,
pub mime: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
@ -88,6 +94,7 @@ impl BlobStore {
let cid = hex::encode(hasher.finalize());
let meta = BlobMeta {
cid: cid.clone(),
blake3: Some(crate::content_hash::blake3_hex(bytes)),
size: bytes.len() as u64,
mime: mime.to_string(),
filename,

View File

@ -0,0 +1,149 @@
//! Content hashing for the DHT distribution plan's *integrity & addressing*
//! tier (`docs/dht-distribution-design.md` §4).
//!
//! SHA-256 is the incumbent: it keys `blobs.rs` and verifies OTA components
//! today. BLAKE3 is introduced **alongside** it because iroh-blobs addresses
//! and *range-verifies* content by BLAKE3 — essential for resumable downloads
//! and HLS streaming. During the migration window both may be present; SHA-256
//! stays mandatory and BLAKE3 is verified when supplied.
//!
//! Digests are written multihash-style as `"<alg>:<hex>"`, e.g.
//! `"blake3:ab12…"` / `"sha256:cd34…"`, matching the app-catalog `digest` field.
//! Both algorithms emit 32-byte (64-hex-char) digests.
use anyhow::{anyhow, bail, Context, Result};
use sha2::{Digest, Sha256};
const DIGEST_LEN: usize = 32;
/// Supported content-hash algorithms.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HashAlg {
Sha256,
Blake3,
}
impl HashAlg {
pub fn as_str(self) -> &'static str {
match self {
HashAlg::Sha256 => "sha256",
HashAlg::Blake3 => "blake3",
}
}
}
/// Hex-encoded SHA-256 of `bytes`.
pub fn sha256_hex(bytes: &[u8]) -> String {
hex::encode(Sha256::digest(bytes))
}
/// Hex-encoded BLAKE3 of `bytes`.
pub fn blake3_hex(bytes: &[u8]) -> String {
blake3::hash(bytes).to_hex().to_string()
}
/// A parsed `"<alg>:<hex>"` content digest.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContentDigest {
pub alg: HashAlg,
/// Lowercase hex, validated to the algorithm's length.
pub hex: String,
}
impl ContentDigest {
/// Parse a multihash-style `"<alg>:<hex>"` string.
pub fn parse(s: &str) -> Result<Self> {
let (alg_part, hex_part) = s
.split_once(':')
.ok_or_else(|| anyhow!("digest must be '<alg>:<hex>', got: {}", s))?;
let alg = match alg_part {
"sha256" => HashAlg::Sha256,
"blake3" => HashAlg::Blake3,
other => bail!("unsupported hash algorithm: {}", other),
};
let raw = hex::decode(hex_part).context("digest hex is invalid")?;
if raw.len() != DIGEST_LEN {
bail!(
"{} digest must be {} bytes, got {}",
alg.as_str(),
DIGEST_LEN,
raw.len()
);
}
Ok(Self {
alg,
hex: hex_part.to_ascii_lowercase(),
})
}
/// Compute the digest of `bytes` under this digest's algorithm.
pub fn compute_hex(&self, bytes: &[u8]) -> String {
match self.alg {
HashAlg::Sha256 => sha256_hex(bytes),
HashAlg::Blake3 => blake3_hex(bytes),
}
}
/// Verify `bytes` hash to this digest. Errors (does not panic) on mismatch.
pub fn verify(&self, bytes: &[u8]) -> Result<()> {
let actual = self.compute_hex(bytes);
if actual.eq_ignore_ascii_case(&self.hex) {
Ok(())
} else {
bail!(
"{} mismatch: expected {}, got {}",
self.alg.as_str(),
self.hex,
actual
)
}
}
}
impl std::fmt::Display for ContentDigest {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}", self.alg.as_str(), self.hex)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn digest_lengths_are_32_bytes() {
assert_eq!(sha256_hex(b"hi").len(), 64);
assert_eq!(blake3_hex(b"hi").len(), 64);
}
#[test]
fn blake3_known_answer() {
// BLAKE3 of the empty input — RFC/reference vector.
assert_eq!(
blake3_hex(b""),
"af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
);
}
#[test]
fn parse_roundtrip() {
let d = ContentDigest::parse(&format!("blake3:{}", blake3_hex(b"x"))).unwrap();
assert_eq!(d.alg, HashAlg::Blake3);
assert_eq!(d.to_string(), format!("blake3:{}", blake3_hex(b"x")));
}
#[test]
fn verify_accepts_and_rejects() {
let d = ContentDigest::parse(&format!("sha256:{}", sha256_hex(b"payload"))).unwrap();
assert!(d.verify(b"payload").is_ok());
assert!(d.verify(b"tampered").is_err());
}
#[test]
fn parse_rejects_bad_input() {
assert!(ContentDigest::parse("nocolon").is_err());
assert!(ContentDigest::parse("md5:abcd").is_err());
assert!(ContentDigest::parse("blake3:nothex").is_err());
assert!(ContentDigest::parse("blake3:ab").is_err()); // too short
}
}

View File

@ -36,6 +36,7 @@ mod bootstrap;
mod config;
mod constants;
mod container;
mod content_hash;
mod content_server;
mod crash_recovery;
mod credentials;

View File

@ -263,6 +263,11 @@ pub struct ComponentUpdate {
pub download_url: String,
pub sha256: String,
pub size_bytes: u64,
/// DHT Phase 1: BLAKE3 content address (bare hex or `"blake3:<hex>"`), the
/// iroh-native, range-verifiable hash. Optional during the migration
/// window — when present it is verified ALONGSIDE the mandatory SHA-256.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub blake3: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@ -997,6 +1002,25 @@ async fn download_component_resumable(
.context("read staging file for hash check")?;
let hash = hex::encode(Sha256::digest(&bytes));
if hash == component.sha256 {
// DHT Phase 1: if the manifest also pins a BLAKE3 digest, it must
// match too. SHA-256 stays the mandatory gate during migration;
// BLAKE3 is the hash the iroh swarm will fetch/verify by, so a
// present-but-wrong BLAKE3 means the bytes aren't swarm-consistent
// — treat it like a SHA mismatch and re-download.
if let Some(b3) = component.blake3.as_deref() {
let expected = b3.trim().strip_prefix("blake3:").unwrap_or(b3.trim());
let actual = crate::content_hash::blake3_hex(&bytes);
if !actual.eq_ignore_ascii_case(expected) {
let _ = tokio::fs::remove_file(dest).await;
last_err = Some(anyhow::anyhow!(
"BLAKE3 mismatch for {}: expected {}, got {}",
component.name,
expected,
actual
));
continue;
}
}
return Ok(());
}
// SHA mismatch — the file on disk is garbage. Nuke it and
@ -1679,6 +1703,7 @@ mod tests {
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/archipelago".into(),
sha256: "x".into(),
size_bytes: 1,
blake3: None,
},
ComponentUpdate {
name: "frontend".into(),
@ -1687,6 +1712,7 @@ mod tests {
download_url: "https://git.tx1138.com/lfg2025/archy/raw/branch/main/releases/v1.7.26-alpha/frontend.tar.gz".into(),
sha256: "y".into(),
size_bytes: 2,
blake3: None,
},
],
};
@ -1886,6 +1912,13 @@ mod tests {
tokio::fs::write(staging.join("archipelago"), b"staged")
.await
.unwrap();
// A *complete* staged update carries the .download-complete marker;
// without it has_staged_update() reads the staging as partial and the
// load_state self-heal clears update_in_progress (see #26). This test
// simulates a complete staging, so write the marker.
tokio::fs::write(staging.join(STAGED_COMPLETE_MARKER), b"1")
.await
.unwrap();
let state = UpdateState {
current_version: "1.0.0".to_string(),
last_check: Some("2025-06-15T12:00:00Z".to_string()),
@ -1900,6 +1933,7 @@ mod tests {
download_url: "https://example.com/binary".to_string(),
sha256: "abc123".to_string(),
size_bytes: 5000,
blake3: None,
}],
}),
update_in_progress: true,