diff --git a/core/archipelago/src/transport/chunking.rs b/core/archipelago/src/transport/chunking.rs index d7cd58b0..402d3b8f 100644 --- a/core/archipelago/src/transport/chunking.rs +++ b/core/archipelago/src/transport/chunking.rs @@ -98,7 +98,12 @@ pub fn encode_chunked(data: &[u8]) -> Result> { } let shard_size = MAX_CHUNK_PAYLOAD; - let data_shard_count = data.len().div_ceil(shard_size); + // Reserve the first 4 bytes of shard 0 for a length header so the + // receiver can trim padding after FEC reconstruction. Effective + // payload capacity is therefore (shards * shard_size) - 4. + const LEN_HEADER: usize = 4; + let total_payload = data.len() + LEN_HEADER; + let data_shard_count = total_payload.div_ceil(shard_size); if data_shard_count > MAX_PRACTICAL_CHUNKS { anyhow::bail!( @@ -116,22 +121,25 @@ pub fn encode_chunked(data: &[u8]) -> Result> { anyhow::bail!("Too many shards: {}", total_shards); } - // Split data into equal-size shards + // Build a single contiguous buffer: [len_u32_le][data...][zero_padding] + // then split into equal-size shards. + let buffer_size = data_shard_count * shard_size; + let mut buffer = vec![0u8; buffer_size]; + buffer[..LEN_HEADER].copy_from_slice(&(data.len() as u32).to_le_bytes()); + buffer[LEN_HEADER..LEN_HEADER + data.len()].copy_from_slice(data); + let mut shards: Vec> = Vec::with_capacity(total_shards); for i in 0..data_shard_count { let start = i * shard_size; - let end = (start + shard_size).min(data.len()); - let mut shard = vec![0u8; shard_size]; - shard[..end - start].copy_from_slice(&data[start..end]); - shards.push(shard); + shards.push(buffer[start..start + shard_size].to_vec()); } - - // Add empty parity shards + // Empty parity shards for _ in 0..parity_shard_count { shards.push(vec![0u8; shard_size]); } - // Generate parity + // Generate parity over the data shards (which now correctly include + // the length header in shard 0). let rs = ReedSolomon::new(data_shard_count, parity_shard_count) .context("Failed to create Reed-Solomon codec")?; rs.encode(&mut shards) @@ -152,18 +160,6 @@ pub fn encode_chunked(data: &[u8]) -> Result> { }); } - // Encode the original data length in the first chunk's first 4 bytes - // so the receiver can trim padding after reconstruction. - let data_len = data.len() as u32; - chunks[0].payload[..4].copy_from_slice(&data_len.to_le_bytes()); - // Re-encode FEC to reflect the length header change - let mut shard_data: Vec> = chunks.iter().map(|c| c.payload.clone()).collect(); - rs.encode(&mut shard_data) - .context("Reed-Solomon re-encoding failed")?; - for (i, shard) in shard_data.into_iter().enumerate() { - chunks[i].payload = shard; - } - Ok(chunks) } @@ -318,17 +314,13 @@ mod tests { #[test] fn test_chunk_roundtrip_medium() { - // ~500 bytes: 4 data chunks + 1 parity + // 500 bytes payload + 4-byte length header = 504 bytes. + // ceil(504 / 124) = 5 data shards, plus ceil(5/4) = 2 parity = 7 total. let data: Vec = (0..500).map(|i| (i % 256) as u8).collect(); let chunks = encode_chunked(&data).unwrap(); let data_chunks: Vec<_> = chunks.iter().filter(|c| !c.is_parity).collect(); - let _parity_chunks: Vec<_> = chunks.iter().filter(|c| c.is_parity).collect(); - assert_eq!(data_chunks.len(), 4); // ceil(500/124) = 5... wait - // Actually: ceil(500/124) = ceil(4.03) = 5 data shards - // But the first shard has 4 bytes of length header embedded, so - // the actual data capacity is 124 * N - 0 (length is IN the shard data). - // Let's just check it roundtrips. + assert_eq!(data_chunks.len(), 5); let mut reassembler = ChunkReassembler::new(); let mut result = None;