Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions coding/conformance.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ hash = "edc22446bb2952609d0c8daccf2d22f8ad2b71eedfcf45296c3f4db49d78404a"

["commonware_coding::reed_solomon::tests::conformance::CodecConformance<Chunk<Sha256>>"]
n_cases = 65536
hash = "aa8512bb8e86e967833edd1a6cc806280d5e7334e9dc8428a098de9204db12d1"
hash = "45545e4d4aeb18b8bdb019e630fb9a1fa6dda9ed32b2d529a9213ec07ccab07c"

["commonware_coding::test::conformance::CodecConformance<Config>"]
n_cases = 65536
hash = "1a412c5c279f981857081765537b85474184048d1b17053394f94fc42ac1dbf4"

["commonware_coding::zoda::tests::conformance::CodecConformance<ReShard<Sha256>>"]
n_cases = 65536
hash = "ebbbe08eb9beb1c5215a5d67ad9deddaef7c54920e53a751b56a8261e60e0e52"
hash = "0571442797c611b3822c8a9c54138de9f54fc5b9daaf01796f611a5c74466710"

["commonware_coding::zoda::tests::conformance::CodecConformance<Shard<Sha256>>"]
n_cases = 65536
hash = "929ce4f95f9d5784f995c52b7e5cde8b62663ab068848925314dc9f80eb27d34"
hash = "fbf783e8550fe15cd7000f8185e1ca3bc9641ba0baf156ba6365d3b224e2222d"
16 changes: 8 additions & 8 deletions coding/src/reed_solomon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ pub struct Chunk<H: Hasher> {
/// The index of [Chunk] in the original data.
index: u16,

/// The proof of the shard in the [bmt] at the given index.
proof: bmt::Proof<H>,
/// The multi-proof of the shard in the [bmt] at the given index.
proof: bmt::Proof<H::Digest>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chunk could also be parameterized on D: Digest and verify updated to take in a &mut Hasher like the proof verify methods. If we decide to do that, I think it's fine to defer to a future PR to keep this diff smaller.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's do that in a follow-up - #2753

}

impl<H: Hasher> Chunk<H> {
/// Create a new [Chunk] from the given shard, index, and proof.
const fn new(shard: Vec<u8>, index: u16, proof: bmt::Proof<H>) -> Self {
const fn new(shard: Vec<u8>, index: u16, proof: bmt::Proof<H::Digest>) -> Self {
Self {
shard,
index,
Expand All @@ -75,7 +75,7 @@ impl<H: Hasher> Chunk<H> {

// Verify proof
self.proof
.verify(&mut hasher, &shard_digest, self.index as u32, root)
.verify_element_inclusion(&mut hasher, &shard_digest, self.index as u32, root)
.is_ok()
}
}
Expand All @@ -95,7 +95,7 @@ impl<H: Hasher> Read for Chunk<H> {
fn read_cfg(reader: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
let shard = Vec::<u8>::read_range(reader, ..=cfg.maximum_shard_size)?;
let index = u16::read(reader)?;
let proof = bmt::Proof::<H>::read(reader)?;
let proof = bmt::Proof::<H::Digest>::read_cfg(reader, &1)?;
Ok(Self {
shard,
index,
Expand Down Expand Up @@ -381,7 +381,7 @@ fn decode<H: Hasher, S: Strategy>(
/// The encoder takes input data, splits it into `k` data shards, and generates `m` recovery
/// shards using [Reed-Solomon encoding](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction).
/// All `n = k + m` shards are then used to build a [bmt], producing a single root hash. Each shard
/// is packaged as a chunk containing the shard data, its index, and a Merkle proof against the [bmt] root.
/// is packaged as a chunk containing the shard data, its index, and a Merkle multi-proof against the [bmt] root.
///
/// ## Encoding
///
Expand Down Expand Up @@ -445,12 +445,12 @@ fn decode<H: Hasher, S: Strategy>(
/// Each chunk contains:
/// - `shard`: The shard data (original or recovery).
/// - `index`: The shard's original index (0 to n-1).
/// - `proof`: A Merkle proof of the shard's inclusion in the [bmt].
/// - `proof`: A Merkle multi-proof of the shard's inclusion in the [bmt].
///
/// ## Decoding and Verification
///
/// The decoder requires any `k` chunks to reconstruct the original data.
/// 1. Each chunk's Merkle proof is verified against the [bmt] root.
/// 1. Each chunk's Merkle multi-proof is verified against the [bmt] root.
/// 2. The shards from the valid chunks are used to reconstruct the original `k` data shards.
/// 3. To ensure consistency, the recovered data shards are re-encoded, and a new [bmt] root is
/// generated. This new root MUST match the original [bmt] root. This prevents attacks where
Expand Down
85 changes: 47 additions & 38 deletions coding/src/zoda.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,7 @@ use commonware_math::{
ntt::{EvaluationVector, Matrix},
};
use commonware_parallel::Strategy;
use commonware_storage::mmr::{
mem::DirtyMmr, verification::multi_proof, Error as MmrError, Location, Proof, StandardHasher,
};
use futures::executor::block_on;
use commonware_storage::bmt::{Builder as BmtBuilder, Error as BmtError, Proof};
use rand::seq::SliceRandom as _;
use std::{marker::PhantomData, sync::Arc};
use thiserror::Error;
Expand Down Expand Up @@ -439,7 +436,6 @@ impl<H: Hasher> Read for ReShard<H> {
let max_data_els = F::bits_to_elements(max_data_bits).max(1);
Ok(Self {
// Worst case: every row is one data element, and the sample size is all rows.
// TODO (#2506): use correct bounds on inclusion proof size
inclusion_proof: Read::read_cfg(buf, &max_data_els)?,
shard: Read::read_cfg(buf, &max_data_els)?,
})
Expand Down Expand Up @@ -468,8 +464,15 @@ pub struct CheckedShard {
/// Take indices up to `total`, and shuffle them.
///
/// The shuffle depends, deterministically, on the transcript.
fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<Location> {
let mut out = (0..total as u64).map(Location::from).collect::<Vec<_>>();
///
/// # Panics
///
/// Panics if `total` exceeds `u32::MAX`.
fn shuffle_indices(transcript: &Transcript, total: usize) -> Vec<u32> {
let total: u32 = total
.try_into()
.expect("encoded_rows exceeds u32::MAX; data too large for ZODA");
let mut out = (0..total).collect::<Vec<_>>();
out.shuffle(&mut transcript.noise(b"shuffle"));
out
}
Expand All @@ -492,7 +495,7 @@ pub struct CheckingData<H: Hasher> {
root: H::Digest,
checking_matrix: Matrix,
encoded_checksum: Matrix,
shuffled_indices: Vec<Location>,
shuffled_indices: Vec<u32>,
}

impl<H: Hasher> CheckingData<H> {
Expand Down Expand Up @@ -550,24 +553,29 @@ impl<H: Hasher> CheckingData<H> {
let index = index as usize;
let these_shuffled_indices = &self.shuffled_indices
[index * self.topology.samples..(index + 1) * self.topology.samples];
let proof_elements = {
these_shuffled_indices
.iter()
.zip(reshard.shard.iter())
.map(|(&i, row)| (row_digest::<H>(row), i))
.collect::<Vec<_>>()
};
if !reshard.inclusion_proof.verify_multi_inclusion(
&mut StandardHasher::<H>::new(),
&proof_elements,
&self.root,
) {

// Build elements for BMT multi-proof verification using the deterministically
// computed indices for this shard
let proof_elements: Vec<(H::Digest, u32)> = these_shuffled_indices
.iter()
.zip(reshard.shard.iter())
.map(|(&i, row)| (row_digest::<H>(row), i))
.collect();

// Verify the multi-proof
let mut hasher = H::new();
if reshard
.inclusion_proof
.verify_multi_inclusion(&mut hasher, &proof_elements, &self.root)
.is_err()
{
return Err(Error::InvalidReShard);
}

let shard_checksum = reshard.shard.mul(&self.checking_matrix);
// Check that the shard checksum rows match the encoded checksums
for (row, &i) in shard_checksum.iter().zip(these_shuffled_indices) {
if row != &self.encoded_checksum[u64::from(i) as usize] {
if row != &self.encoded_checksum[i as usize] {
return Err(Error::InvalidReShard);
}
}
Expand All @@ -591,7 +599,7 @@ pub enum Error {
#[error("insufficient unique rows {0} < {1}")]
InsufficientUniqueRows(usize, usize),
#[error("failed to create inclusion proof: {0}")]
FailedToCreateInclusionProof(MmrError),
FailedToCreateInclusionProof(BmtError),
}

// TODO (#2506): rename this to `_COMMONWARE_CODING_ZODA`
Expand Down Expand Up @@ -642,17 +650,16 @@ impl<H: Hasher> Scheme for Zoda<H> {
.evaluate()
.data();

// Step 3: Commit to the rows of the data.
let mut hasher = StandardHasher::<H>::new();
let mut mmr = DirtyMmr::new();
let row_hashes = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
// Step 3: Commit to the rows of the data using a Binary Merkle Tree.
let row_hashes: Vec<H::Digest> = strategy.map_collect_vec(0..encoded_data.rows(), |i| {
row_digest::<H>(&encoded_data[i])
});
let mut bmt_builder = BmtBuilder::<H>::new(row_hashes.len());
for hash in &row_hashes {
mmr.add(&mut hasher, hash);
bmt_builder.add(hash);
}
let mmr = mmr.merkleize(&mut hasher, None);
let root = *mmr.root();
let bmt = bmt_builder.build();
let root = bmt.root();

// Step 4: Commit to the root, and the size of the data.
let mut transcript = Transcript::new(NAMESPACE);
Expand All @@ -668,20 +675,20 @@ impl<H: Hasher> Scheme for Zoda<H> {
// Step 6: Multiply the data with the checking matrix.
let checksum = Arc::new(data.mul(&checking_matrix));

// Step 7: Produce the shards.
// We can't use "chunks" because we need to handle a sample size of 0
let index_chunks = (0..topology.total_shards)
.map(|i| &shuffled_indices[i * topology.samples..(i + 1) * topology.samples]);
let shards = index_chunks
.map(|indices| {
// Step 7: Produce the shards in parallel.
let shard_results: Vec<Result<Shard<H>, Error>> =
strategy.map_collect_vec(0..topology.total_shards, |shard_idx| {
let indices = &shuffled_indices
[shard_idx * topology.samples..(shard_idx + 1) * topology.samples];
let rows = Matrix::init(
indices.len(),
topology.data_cols,
indices
.iter()
.flat_map(|&i| encoded_data[u64::from(i) as usize].iter().copied()),
.flat_map(|&i| encoded_data[i as usize].iter().copied()),
);
let inclusion_proof = block_on(multi_proof(&mmr, indices))
let inclusion_proof = bmt
.multi_proof(indices)
.map_err(Error::FailedToCreateInclusionProof)?;
Ok(Shard {
data_bytes,
Expand All @@ -690,7 +697,9 @@ impl<H: Hasher> Scheme for Zoda<H> {
rows,
checksum: checksum.clone(),
})
})
});
let shards = shard_results
.into_iter()
.collect::<Result<Vec<_>, Error>>()?;
Ok((commitment, shards))
}
Expand Down
6 changes: 3 additions & 3 deletions storage/conformance.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ hash = "3cb6882637c1c1a929a50b3ab425311f3ef342184dc46a80b1eae616ca7b64a4"
n_cases = 65536
hash = "20f5ef35a4bbd3a40852e907df519c724e5ce24d9f929e84947fd971a2256d02"

["commonware_storage::bmt::tests::conformance::CodecConformance<Proof<Sha256>>"]
["commonware_storage::bmt::tests::conformance::CodecConformance<Proof<Sha256Digest>>"]
n_cases = 65536
hash = "c1f1d4c35fcd50931d7c36cbcddbb1c0a93afef9a93945cdd3efadf68ff53328"
hash = "6d6382956289a2f706581a4b1afa08c5cd8e8a4f55b11d454425333b6537cc17"

["commonware_storage::bmt::tests::conformance::CodecConformance<RangeProof<Sha256>>"]
["commonware_storage::bmt::tests::conformance::CodecConformance<RangeProof<Sha256Digest>>"]
n_cases = 65536
hash = "6ecb0491b09443f1f93c178af5472f138ddc71b3e8c0c106f32eafca617b56af"

Expand Down
Loading
Loading