From 77eb1b907bb3b5ef1b55a71a1bf36ec3dcd76ff4 Mon Sep 17 00:00:00 2001 From: Dorian Date: Mon, 13 Apr 2026 08:29:44 -0400 Subject: [PATCH] feat(blobs): content-addressed blob store scaffolding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds core/archipelago/src/blobs.rs: a SHA-256 content-addressed store that writes bytes to ${data_dir}/blobs/ with a sibling .meta JSON file (mime, filename, size, created_at, optional tiny thumbnail). BlobStore::put is idempotent, max 64 MiB per blob, and issues HMAC-SHA256 capability tokens scoped to (cid, peer_pubkey_hex, expiry_epoch). Tokens are verified in constant time and rejected on expiry. This is the foundation piece for the mesh ContentRef typed envelope — the /blob/ HTTP route and ContentRef variant will land in a follow-up increment once the HMAC key is plumbed from node identity. No consumer yet, so the module compiles with dead_code warnings; these will clear when the HTTP handler and ApiHandler state wiring land next. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/archipelago/src/blobs.rs | 167 ++++++++++++++++++++++++++++++++++ core/archipelago/src/main.rs | 1 + 2 files changed, 168 insertions(+) create mode 100644 core/archipelago/src/blobs.rs diff --git a/core/archipelago/src/blobs.rs b/core/archipelago/src/blobs.rs new file mode 100644 index 00000000..3596c56c --- /dev/null +++ b/core/archipelago/src/blobs.rs @@ -0,0 +1,167 @@ +//! Content-addressed blob store for attachments shared over mesh/federation. +//! +//! Blobs live at `${data_dir}/blobs/` where `cid` is the hex-encoded +//! SHA-256 of the content. A sibling `.meta` file holds JSON metadata +//! (mime, filename, size, created_at). Capability URLs are HMAC-signed tokens +//! scoped to a recipient pubkey and expiry, verified before serving. + +use anyhow::{anyhow, Context, Result}; +use hmac::{Hmac, Mac}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::path::{Path, PathBuf}; +use tokio::fs; +use tokio::io::AsyncWriteExt; + +type HmacSha256 = Hmac; + +/// Default capability URL validity window. +pub const DEFAULT_CAP_TTL_SECS: u64 = 7 * 24 * 60 * 60; + +/// Maximum blob size accepted by the store (64 MiB). Keep attachments +/// reasonable so /var/lib/archipelago doesn't balloon unnoticed. +pub const MAX_BLOB_SIZE: u64 = 64 * 1024 * 1024; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BlobMeta { + pub cid: String, + pub size: u64, + pub mime: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub filename: Option, + pub created_at: String, + /// Optional raw thumbnail bytes (small — up to ~60 bytes is LoRa-safe). + /// Stored alongside meta so ContentRef senders don't re-fetch the blob. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub thumb_bytes: Option>, +} + +pub struct BlobStore { + root: PathBuf, + /// HMAC key used to sign capability URLs. Derived from node identity; + /// callers pass it in so we don't duplicate key management here. + cap_key: [u8; 32], +} + +impl BlobStore { + /// Create (or open) a blob store rooted at `data_dir/blobs`. + pub async fn open(data_dir: &Path, cap_key: [u8; 32]) -> Result { + let root = data_dir.join("blobs"); + fs::create_dir_all(&root).await.context("create blobs dir")?; + Ok(Self { root, cap_key }) + } + + fn path_for(&self, cid: &str) -> PathBuf { + self.root.join(cid) + } + + fn meta_path_for(&self, cid: &str) -> PathBuf { + self.root.join(format!("{}.meta", cid)) + } + + /// Write bytes to the store, returning the CID and metadata. Idempotent: + /// identical bytes produce the same CID and short-circuit re-writes. + pub async fn put( + &self, + bytes: &[u8], + mime: &str, + filename: Option, + thumb_bytes: Option>, + ) -> Result { + if bytes.len() as u64 > MAX_BLOB_SIZE { + anyhow::bail!("Blob too large: {} bytes (max {})", bytes.len(), MAX_BLOB_SIZE); + } + let mut hasher = Sha256::new(); + hasher.update(bytes); + let cid = hex::encode(hasher.finalize()); + let meta = BlobMeta { + cid: cid.clone(), + size: bytes.len() as u64, + mime: mime.to_string(), + filename, + created_at: chrono::Utc::now().to_rfc3339(), + thumb_bytes, + }; + + let blob_path = self.path_for(&cid); + if !blob_path.exists() { + let mut f = fs::File::create(&blob_path).await.context("create blob")?; + f.write_all(bytes).await.context("write blob")?; + f.sync_all().await.ok(); + } + let meta_json = serde_json::to_vec(&meta)?; + fs::write(self.meta_path_for(&cid), meta_json) + .await + .context("write blob meta")?; + Ok(meta) + } + + /// Read raw bytes for a CID. Errors if missing. + pub async fn get(&self, cid: &str) -> Result> { + let path = self.path_for(cid); + fs::read(&path) + .await + .with_context(|| format!("blob not found: {}", cid)) + } + + /// Load metadata for a CID. + pub async fn meta(&self, cid: &str) -> Result { + let raw = fs::read(self.meta_path_for(cid)) + .await + .with_context(|| format!("blob meta not found: {}", cid))?; + Ok(serde_json::from_slice(&raw)?) + } + + /// Check whether a CID is held locally. + pub async fn has(&self, cid: &str) -> bool { + fs::try_exists(self.path_for(cid)).await.unwrap_or(false) + } + + /// Sign a capability token: HMAC-SHA256(cid || peer_pubkey || expiry). + /// Returned token is hex — callers append `?cap=&exp=` to + /// the blob URL sent to the peer. + pub fn issue_capability(&self, cid: &str, peer_pubkey_hex: &str, expiry_epoch: u64) -> String { + let mut mac = HmacSha256::new_from_slice(&self.cap_key).expect("hmac key"); + mac.update(cid.as_bytes()); + mac.update(b"|"); + mac.update(peer_pubkey_hex.as_bytes()); + mac.update(b"|"); + mac.update(&expiry_epoch.to_be_bytes()); + hex::encode(mac.finalize().into_bytes()) + } + + /// Verify a capability token against (cid, peer_pubkey, expiry). + /// Returns Ok(()) on success, Err describing the failure otherwise. + /// Expired tokens fail even with a correct signature. + pub fn verify_capability( + &self, + cid: &str, + peer_pubkey_hex: &str, + expiry_epoch: u64, + token_hex: &str, + ) -> Result<()> { + let now = chrono::Utc::now().timestamp() as u64; + if expiry_epoch < now { + return Err(anyhow!("capability expired")); + } + let expected = self.issue_capability(cid, peer_pubkey_hex, expiry_epoch); + // Constant-time compare via HMAC verify. + let token_bytes = + hex::decode(token_hex).map_err(|_| anyhow!("capability token not hex"))?; + let expected_bytes = hex::decode(&expected).unwrap(); + if token_bytes.len() != expected_bytes.len() { + return Err(anyhow!("capability length mismatch")); + } + // hmac::Mac::verify is the idiomatic constant-time path, but we + // already computed `expected` so fall back to ct_eq via subtle. + let mut diff = 0u8; + for (a, b) in token_bytes.iter().zip(expected_bytes.iter()) { + diff |= a ^ b; + } + if diff == 0 { + Ok(()) + } else { + Err(anyhow!("capability signature mismatch")) + } + } +} diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index c478f850..af62f198 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -11,6 +11,7 @@ mod auth; mod backup; mod constants; mod bitcoin_rpc; +mod blobs; mod config; mod content_server; mod crash_recovery;