229 lines
8.5 KiB
Rust
Raw Normal View History

//! Swarm-assist content fetch — the *transport & swarm* tier of the DHT
//! distribution plan (`docs/dht-distribution-design.md` §4).
//!
//! ## Guiding principle: swarm-assist, origin ALWAYS wins
//! The peer swarm is an optimization layered *above* a proven HTTP path, never
//! in place of it. A node asks each available [`BlobProvider`] (e.g. an
//! iroh-blobs swarm) for content by its [`ContentDigest`]; the first peer that
//! serves bytes which **verify** against the digest wins. If no provider has it
//! — or the swarm is disabled, or every peer is offline — we fall back to the
//! origin HTTP download, which is the guaranteed source of truth. Worst case is
//! exactly today's behaviour.
//!
//! Peer-sourced bytes are UNTRUSTED, so this module verifies them against the
//! content digest before accepting. Origin bytes run through the caller's
//! existing verification (e.g. the SHA-256 gate in `update.rs`).
//!
//! The actual iroh-blobs provider is gated behind the `iroh-swarm` feature
//! (heavy QUIC dep tree); with the feature off, [`providers`] is empty and
//! every fetch goes straight to origin — byte-for-byte today's path.
use std::path::Path;
use std::sync::Arc;
use anyhow::Result;
use async_trait::async_trait;
use tracing::{debug, info, warn};
use crate::content_hash::ContentDigest;
/// Which source ultimately served the content.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FetchSource {
/// A peer in the swarm served (and the bytes verified).
Swarm,
/// The origin HTTP fallback served.
Origin,
}
/// A source that may be able to serve content addressed by its digest.
#[async_trait]
pub trait BlobProvider: Send + Sync {
/// Short name for logging (e.g. "iroh").
fn name(&self) -> &str;
/// Try to fetch the content for `digest` into `dest`.
///
/// * `Ok(true)` — bytes written to `dest` (caller verifies the digest).
/// * `Ok(false)` — this provider does not have the content; try the next.
/// * `Err(_)` — a transient failure; try the next provider.
async fn try_fetch(&self, digest: &ContentDigest, dest: &Path) -> Result<bool>;
}
/// The ordered list of swarm providers to consult before the origin.
///
/// Empty unless the `iroh-swarm` feature is enabled and a provider has been
/// registered. Today it is always empty — the seam exists so wiring iroh is a
/// localized change rather than a surgery through the download path.
pub fn providers() -> Vec<Arc<dyn BlobProvider>> {
Vec::new()
}
/// Fetch content-addressed bytes: swarm-assist, origin always wins.
///
/// Tries each provider in order; the first to write bytes that VERIFY against
/// `digest` wins and returns [`FetchSource::Swarm`]. If none succeed, runs
/// `origin` (the guaranteed HTTP fallback) and returns [`FetchSource::Origin`].
/// A node that obtained bytes from the swarm has, by definition, a verified
/// copy it can itself seed afterwards.
pub async fn fetch_content_addressed<F, Fut>(
digest: &ContentDigest,
providers: &[Arc<dyn BlobProvider>],
dest: &Path,
origin: F,
) -> Result<FetchSource>
where
F: FnOnce() -> Fut,
Fut: std::future::Future<Output = Result<()>>,
{
for provider in providers {
match provider.try_fetch(digest, dest).await {
Ok(true) => match verify_dest(digest, dest).await {
Ok(()) => {
info!("swarm: {} served {} (verified)", provider.name(), digest);
return Ok(FetchSource::Swarm);
}
Err(e) => {
// A peer served bytes that don't match the digest — could be
// corruption or a malicious seed. Discard and try the next
// source; never let unverified peer bytes through.
warn!(
"swarm: {} served bytes failing verification for {}: {} — discarding",
provider.name(),
digest,
e
);
let _ = tokio::fs::remove_file(dest).await;
}
},
Ok(false) => debug!("swarm: {} does not have {}", provider.name(), digest),
Err(e) => debug!("swarm: {} failed for {}: {}", provider.name(), digest, e),
}
}
debug!("swarm: no provider served {} — falling back to origin", digest);
origin().await?;
Ok(FetchSource::Origin)
}
/// Read `dest` and verify it hashes to `digest`.
async fn verify_dest(digest: &ContentDigest, dest: &Path) -> Result<()> {
let bytes = tokio::fs::read(dest).await?;
digest.verify(&bytes)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::atomic::{AtomicBool, Ordering};
fn digest_of(bytes: &[u8]) -> ContentDigest {
ContentDigest::parse(&format!("blake3:{}", crate::content_hash::blake3_hex(bytes))).unwrap()
}
/// Provider that writes a fixed payload (which may or may not match).
struct FixedProvider {
name: &'static str,
payload: Option<Vec<u8>>,
}
#[async_trait]
impl BlobProvider for FixedProvider {
fn name(&self) -> &str {
self.name
}
async fn try_fetch(&self, _d: &ContentDigest, dest: &Path) -> Result<bool> {
match &self.payload {
Some(p) => {
tokio::fs::write(dest, p).await?;
Ok(true)
}
None => Ok(false),
}
}
}
fn arc(p: FixedProvider) -> Arc<dyn BlobProvider> {
Arc::new(p)
}
#[tokio::test]
async fn swarm_hit_verifies_and_skips_origin() {
let dir = tempfile::tempdir().unwrap();
let dest = dir.path().join("out");
let content = b"hello swarm".to_vec();
let digest = digest_of(&content);
let providers = vec![arc(FixedProvider {
name: "good",
payload: Some(content.clone()),
})];
let origin_ran = AtomicBool::new(false);
let src = fetch_content_addressed(&digest, &providers, &dest, || async {
origin_ran.store(true, Ordering::SeqCst);
tokio::fs::write(&dest, b"from-origin").await?;
Ok(())
})
.await
.unwrap();
assert_eq!(src, FetchSource::Swarm);
assert!(!origin_ran.load(Ordering::SeqCst), "origin must not run on swarm hit");
assert_eq!(tokio::fs::read(&dest).await.unwrap(), content);
}
#[tokio::test]
async fn bad_swarm_bytes_are_discarded_and_origin_wins() {
let dir = tempfile::tempdir().unwrap();
let dest = dir.path().join("out");
let content = b"the real bytes".to_vec();
let digest = digest_of(&content);
// Provider claims a hit but serves tampered bytes.
let providers = vec![arc(FixedProvider {
name: "evil",
payload: Some(b"TAMPERED".to_vec()),
})];
let src = fetch_content_addressed(&digest, &providers, &dest, || async {
tokio::fs::write(&dest, &content).await?;
Ok(())
})
.await
.unwrap();
assert_eq!(src, FetchSource::Origin, "tampered swarm bytes must not be accepted");
assert_eq!(tokio::fs::read(&dest).await.unwrap(), content);
}
#[tokio::test]
async fn no_providers_goes_straight_to_origin() {
let dir = tempfile::tempdir().unwrap();
let dest = dir.path().join("out");
let content = b"x".to_vec();
let digest = digest_of(&content);
let providers: Vec<Arc<dyn BlobProvider>> = vec![];
let src = fetch_content_addressed(&digest, &providers, &dest, || async {
tokio::fs::write(&dest, &content).await?;
Ok(())
})
.await
.unwrap();
assert_eq!(src, FetchSource::Origin);
}
#[tokio::test]
async fn falls_through_providers_in_order() {
let dir = tempfile::tempdir().unwrap();
let dest = dir.path().join("out");
let content = b"second wins".to_vec();
let digest = digest_of(&content);
let providers = vec![
arc(FixedProvider { name: "miss", payload: None }),
arc(FixedProvider { name: "hit", payload: Some(content.clone()) }),
];
let src = fetch_content_addressed(&digest, &providers, &dest, || async {
tokio::fs::write(&dest, b"origin").await?;
Ok(())
})
.await
.unwrap();
assert_eq!(src, FetchSource::Swarm);
assert_eq!(tokio::fs::read(&dest).await.unwrap(), content);
}
}