fix(transport/chunking): stop overwriting first 4 bytes of user data

encode_chunked() split the payload into shards first, then overwrote
the first 4 bytes of shard 0 with a u32 length header, then re-ran
Reed-Solomon to regenerate parity over the now-corrupted shards. The
decoder correctly read the length header and trimmed `[4..4+len]`
from the reconstructed buffer, but those first 4 bytes had already
been destroyed on the encode side, so every chunked mesh payload
lost its first 4 bytes.

Restructure: reserve 4 bytes for the length header up front, build
a single contiguous [len][data][pad] buffer, then split into shards.
Parity is computed over the correct shards on the first pass, no
double-encode needed.

Update test_chunk_roundtrip_medium: 500 bytes + 4-byte header = 504
bytes, which is 5 data shards (ceil(504/124)), not 4. The old test
assertion was wrong all along and masked the corruption bug because
it only checked the roundtripped bytes, which is exactly what we
need to verify. New assertion is correct.

Verified: all 7 transport::chunking tests pass.
This commit is contained in:
archipelago 2026-04-23 12:29:10 -04:00
parent c4efb30382
commit f6efe2f356

View File

@ -98,7 +98,12 @@ pub fn encode_chunked(data: &[u8]) -> Result<Vec<Chunk>> {
}
let shard_size = MAX_CHUNK_PAYLOAD;
let data_shard_count = data.len().div_ceil(shard_size);
// Reserve the first 4 bytes of shard 0 for a length header so the
// receiver can trim padding after FEC reconstruction. Effective
// payload capacity is therefore (shards * shard_size) - 4.
const LEN_HEADER: usize = 4;
let total_payload = data.len() + LEN_HEADER;
let data_shard_count = total_payload.div_ceil(shard_size);
if data_shard_count > MAX_PRACTICAL_CHUNKS {
anyhow::bail!(
@ -116,22 +121,25 @@ pub fn encode_chunked(data: &[u8]) -> Result<Vec<Chunk>> {
anyhow::bail!("Too many shards: {}", total_shards);
}
// Split data into equal-size shards
// Build a single contiguous buffer: [len_u32_le][data...][zero_padding]
// then split into equal-size shards.
let buffer_size = data_shard_count * shard_size;
let mut buffer = vec![0u8; buffer_size];
buffer[..LEN_HEADER].copy_from_slice(&(data.len() as u32).to_le_bytes());
buffer[LEN_HEADER..LEN_HEADER + data.len()].copy_from_slice(data);
let mut shards: Vec<Vec<u8>> = Vec::with_capacity(total_shards);
for i in 0..data_shard_count {
let start = i * shard_size;
let end = (start + shard_size).min(data.len());
let mut shard = vec![0u8; shard_size];
shard[..end - start].copy_from_slice(&data[start..end]);
shards.push(shard);
shards.push(buffer[start..start + shard_size].to_vec());
}
// Add empty parity shards
// Empty parity shards
for _ in 0..parity_shard_count {
shards.push(vec![0u8; shard_size]);
}
// Generate parity
// Generate parity over the data shards (which now correctly include
// the length header in shard 0).
let rs = ReedSolomon::new(data_shard_count, parity_shard_count)
.context("Failed to create Reed-Solomon codec")?;
rs.encode(&mut shards)
@ -152,18 +160,6 @@ pub fn encode_chunked(data: &[u8]) -> Result<Vec<Chunk>> {
});
}
// Encode the original data length in the first chunk's first 4 bytes
// so the receiver can trim padding after reconstruction.
let data_len = data.len() as u32;
chunks[0].payload[..4].copy_from_slice(&data_len.to_le_bytes());
// Re-encode FEC to reflect the length header change
let mut shard_data: Vec<Vec<u8>> = chunks.iter().map(|c| c.payload.clone()).collect();
rs.encode(&mut shard_data)
.context("Reed-Solomon re-encoding failed")?;
for (i, shard) in shard_data.into_iter().enumerate() {
chunks[i].payload = shard;
}
Ok(chunks)
}
@ -318,17 +314,13 @@ mod tests {
#[test]
fn test_chunk_roundtrip_medium() {
// ~500 bytes: 4 data chunks + 1 parity
// 500 bytes payload + 4-byte length header = 504 bytes.
// ceil(504 / 124) = 5 data shards, plus ceil(5/4) = 2 parity = 7 total.
let data: Vec<u8> = (0..500).map(|i| (i % 256) as u8).collect();
let chunks = encode_chunked(&data).unwrap();
let data_chunks: Vec<_> = chunks.iter().filter(|c| !c.is_parity).collect();
let _parity_chunks: Vec<_> = chunks.iter().filter(|c| c.is_parity).collect();
assert_eq!(data_chunks.len(), 4); // ceil(500/124) = 5... wait
// Actually: ceil(500/124) = ceil(4.03) = 5 data shards
// But the first shard has 4 bytes of length header embedded, so
// the actual data capacity is 124 * N - 0 (length is IN the shard data).
// Let's just check it roundtrips.
assert_eq!(data_chunks.len(), 5);
let mut reassembler = ChunkReassembler::new();
let mut result = None;