diff --git a/icechunk/src/change_set.rs b/icechunk/src/change_set.rs index 6958ed09e..9a381d78a 100644 --- a/icechunk/src/change_set.rs +++ b/icechunk/src/change_set.rs @@ -716,12 +716,13 @@ mod tests { use super::ChangeSet; use crate::{ - change_set::{ArrayData, MoveTracker}, + change_set::{ArrayData, EditChanges, MoveTracker}, format::{ ChunkIndices, NodeId, Path, manifest::{ChunkInfo, ChunkPayload, ManifestSplits}, snapshot::ArrayShape, }, + roundtrip_serialization_tests, }; #[icechunk_macros::test] @@ -965,4 +966,40 @@ mod tests { &Path::new("/other").unwrap(), ); } + + use crate::strategies::{ + array_data, bytes, chunk_indices2, gen_move, manifest_extents, node_id, path, + split_manifest, + }; + use proptest::collection::{btree_map, hash_map, hash_set, vec}; + use proptest::prelude::*; + + prop_compose! { + fn edit_changes()(num_of_dims in any::().prop_map(usize::from))( + new_groups in hash_map(path(),(node_id(), bytes()), 3..7), + new_arrays in hash_map(path(),(node_id(), array_data()), 3..7), + updated_arrays in hash_map(node_id(), array_data(), 3..7), + updated_groups in hash_map(node_id(), bytes(), 3..7), + set_chunks in btree_map(node_id(), + hash_map(manifest_extents(num_of_dims), split_manifest(), 3..7), + 3..7), + deleted_chunks_outside_bounds in btree_map(node_id(), hash_set(chunk_indices2(), 3..8), 3..7), + deleted_groups in hash_set((path(), node_id()), 3..7), + deleted_arrays in hash_set((path(), node_id()), 3..7) + ) -> EditChanges { + EditChanges{new_groups, updated_groups, updated_arrays, set_chunks, deleted_chunks_outside_bounds, deleted_arrays, deleted_groups, new_arrays} + } + } + + fn move_tracker() -> impl Strategy { + vec(gen_move(), 1..5).prop_map(MoveTracker).boxed() + } + + fn change_set() -> impl Strategy { + use ChangeSet::*; + prop_oneof![edit_changes().prop_map(Edit), move_tracker().prop_map(Rearrange)] + .boxed() + } + + roundtrip_serialization_tests!(serialize_and_deserialize_change_sets - change_set); } diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs index a5f80bda0..7877c9db4 100644 --- a/icechunk/src/config.rs +++ b/icechunk/src/config.rs @@ -632,25 +632,16 @@ pub enum Credentials { Azure(AzureCredentials), } -#[cfg(test)] -#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] -mod tests { - use crate::strategies::{ - azure_credentials, gcs_static_credentials, repository_config, - s3_static_credentials, - }; - - use proptest::prelude::*; - - // This macro is used for creating property tests - // which check that serializing and deserializing - // an instance of a type T is equivalent to the - // identity function - // Given pairs of test names and arbitraries to be used - // for the tests, e.g., (n1, a1), (n2, a2),... (nx, ax) - // the tests can be created by doing - // roundtrip_serialization_tests!(n1 - a1, n2 - a2, .... nx - ax) - macro_rules! roundtrip_serialization_tests { +// This macro is used for creating property tests +// which check that serializing and deserializing +// an instance of a type T is equivalent to the +// identity function +// Given pairs of test names and arbitraries to be used +// for the tests, e.g., (n1, a1), (n2, a2),... (nx, ax) +// the tests can be created by doing +// roundtrip_serialization_tests!(n1 - a1, n2 - a2, .... nx - ax) +#[macro_export] +macro_rules! roundtrip_serialization_tests { ($($test_name: ident - $generator: ident), +) => { $( proptest!{ @@ -663,6 +654,16 @@ mod tests { } } +#[cfg(test)] +#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)] +mod tests { + use crate::strategies::{ + azure_credentials, gcs_static_credentials, repository_config, + s3_static_credentials, + }; + + use proptest::prelude::*; + roundtrip_serialization_tests!( test_config_roundtrip - repository_config, test_s3_static_credentials_roundtrip - s3_static_credentials, diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs index 44bcd52e6..68bd647ef 100644 --- a/icechunk/src/format/manifest.rs +++ b/icechunk/src/format/manifest.rs @@ -636,7 +636,9 @@ static ROOT_OPTIONS: VerifierOptions = VerifierOptions { #[allow(clippy::unwrap_used, clippy::panic)] mod tests { use super::*; - use crate::strategies::{ShapeDim, manifest_extents, shapes_and_dims}; + use crate::strategies::{ + ShapeDim, manifest_extents, manifest_extents2, shapes_and_dims, + }; use icechunk_macros; use itertools::{all, multizip}; use proptest::collection::vec; @@ -683,7 +685,7 @@ mod tests { #[proptest] fn test_property_extents_widths( - #[strategy(manifest_extents(4))] extent1: ManifestExtents, + #[strategy(manifest_extents2(4))] extent1: ManifestExtents, #[strategy(vec(0..100, 4))] delta_left: Vec, #[strategy(vec(0..100, 4))] delta_right: Vec, ) { @@ -732,6 +734,7 @@ mod tests { ..((extent.end as i32 - width - low) as u32) }), ); + prop_assert_eq!(extent2.overlap_with(&extent1), Overlap::None); let extent2 = ManifestExtents::from_ranges_iter( diff --git a/icechunk/src/storage/redirect.rs b/icechunk/src/storage/redirect.rs index 31993ed4a..0e8b9bf70 100644 --- a/icechunk/src/storage/redirect.rs +++ b/icechunk/src/storage/redirect.rs @@ -77,6 +77,7 @@ impl RedirectStorage { "Cannot build http client for redirect Storage instance: {e}" ))) })?; + let req = client.get(self.url.clone()).build().map_err(|e| { StorageError::from(StorageErrorKind::BadRedirect(format!( "Cannot build http request for redirect Storage instance: {e}" diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs index 4361a9f2b..b4b1592d9 100644 --- a/icechunk/src/strategies.rs +++ b/icechunk/src/strategies.rs @@ -7,24 +7,35 @@ use crate::config::{ S3StaticCredentials, }; use crate::format::format_constants::SpecVersionBin; -use crate::format::manifest::ManifestExtents; +use crate::format::manifest::{ + ChunkPayload, ChunkRef, ManifestExtents, SecondsSinceEpoch, VirtualChunkLocation, + VirtualChunkRef, +}; use crate::format::snapshot::{ArrayShape, DimensionName}; -use crate::format::{ChunkIndices, Path}; +use crate::format::{ChunkId, ChunkIndices, NodeId, Path, manifest}; use crate::session::Session; use crate::storage::{ - ConcurrencySettings, RetriesSettings, Settings, new_in_memory_storage, + ConcurrencySettings, ETag, RetriesSettings, Settings, new_in_memory_storage, }; use crate::virtual_chunks::VirtualChunkContainer; use crate::{ObjectStoreConfig, Repository, RepositoryConfig}; +use bytes::Bytes; use chrono::{DateTime, Utc}; use prop::string::string_regex; +use proptest::collection::{btree_map, vec}; use proptest::prelude::*; -use proptest::{collection::vec, option, strategy::Strategy}; -use std::collections::HashMap; +use proptest::{ + array::{uniform8, uniform12}, + option, + strategy::Strategy, +}; +use std::collections::{BTreeMap, HashMap}; use std::num::{NonZeroU16, NonZeroU64}; use std::ops::{Bound, Range}; use std::path::PathBuf; +use crate::change_set::{ArrayData, Move}; + const MAX_NDIM: usize = 4; pub fn node_paths() -> impl Strategy { @@ -108,7 +119,7 @@ pub fn shapes_and_dims(max_ndim: Option) -> impl Strategy impl Strategy { +pub fn manifest_extents2(ndim: usize) -> impl Strategy { (vec(0u32..1000u32, ndim), vec(1u32..1000u32, ndim)).prop_map(|(start, delta)| { let stop = std::iter::zip(start.iter(), delta.iter()) .map(|(s, d)| s + d) @@ -117,8 +128,19 @@ pub fn manifest_extents(ndim: usize) -> impl Strategy { }) } +pub fn manifest_extents(ndim: usize) -> impl Strategy { + vec( + any::>() + .prop_filter("Could not construct a nonempty range", |range| { + !range.is_empty() + }), + ndim, + ) + .prop_map(ManifestExtents::from_ranges_iter) +} + prop_compose! { - pub fn chunk_indices(dim: usize, values_in: Range)(v in proptest::collection::vec(values_in, dim..(dim+1))) -> ChunkIndices { + pub fn chunk_indices(dim: usize, values_in: Range)(v in vec(values_in, dim..(dim+1))) -> ChunkIndices { ChunkIndices(v) } } @@ -219,7 +241,7 @@ pub fn object_store_config() -> BoxedStrategy { use ObjectStoreConfig::*; prop_oneof![ Just(InMemory), - proptest::collection::vec(string_regex("[a-zA-Z0-9\\-_]+").unwrap(), 1..4) + vec(string_regex("[a-zA-Z0-9\\-_]+").unwrap(), 1..4) .prop_map(|s| LocalFileSystem(PathBuf::from(s.join("/")))), s3_options().prop_map(S3), s3_options().prop_map(S3Compatible), @@ -255,8 +277,8 @@ pub fn manifest_preload_condition() -> BoxedStrategy { ]; leaf.prop_recursive(4, 20, 5, |inner| { prop_oneof![ - proptest::collection::vec(inner.clone(), 1..4).prop_map(Or), - proptest::collection::vec(inner.clone(), 1..4).prop_map(And), + vec(inner.clone(), 1..4).prop_map(Or), + vec(inner.clone(), 1..4).prop_map(And), ] }) .boxed() @@ -305,7 +327,7 @@ prop_compose! { prop_compose! { pub fn split_sizes() - (condition in manifest_split_condition(), dims in proptest::collection::vec(manifest_split_dim(), 1..5)) + (condition in manifest_split_condition(), dims in vec(manifest_split_dim(), 1..5)) -> (ManifestSplitCondition, Vec) { (condition, dims) } @@ -313,7 +335,7 @@ prop_compose! { prop_compose! { pub fn manifest_splitting_config() - (sizes in option::of(proptest::collection::vec(split_sizes(), 1..5))) + (sizes in option::of(vec(split_sizes(), 1..5))) -> ManifestSplittingConfig { ManifestSplittingConfig{split_sizes: sizes} } @@ -329,7 +351,7 @@ prop_compose! { prop_compose! { pub fn virtual_chunk_containers() - (containers in proptest::collection::vec(virtual_chunk_container(), 0..10)) + (containers in vec(virtual_chunk_container(), 0..10)) -> HashMap { containers.into_iter().map(|cont| (cont.url_prefix().to_string(), cont)).collect() } @@ -456,3 +478,141 @@ pub fn azure_credentials() -> BoxedStrategy { use AzureCredentials::*; prop_oneof![Just(FromEnv), azure_static_credentials().prop_map(Static)].boxed() } + +fn path_component() -> impl Strategy { + string_regex("[a-zA-Z0-9]{10}").expect("Could not generate a valid path component") +} + +fn file_path_components() -> impl Strategy> { + vec(path_component(), 8..15) +} + +// Given a collection of directory names, an absolute Unix style path +// using the directory names in order is generated +fn to_abs_unix_path(path_components: Vec) -> String { + format!("/{}", path_components.join("/")) +} + +// Generates Unix style absolute file paths +fn absolute_path() -> impl Strategy { + file_path_components().prop_map(to_abs_unix_path) +} + +pub fn path() -> impl Strategy { + absolute_path().prop_filter_map("Could not generate a valid path", |abs_path| { + Path::new(&abs_path).ok() + }) +} + +type DimensionShapeInfo = (u64, u64); + +prop_compose! { + fn dimension_shape_info()(dim_length in any::(), chunk_length in any::()) -> DimensionShapeInfo { + (dim_length, chunk_length.get()) + } +} + +prop_compose! { + fn array_shape()(dimensions in vec(dimension_shape_info(), 5..30)) -> ArrayShape { + ArrayShape::new(dimensions).unwrap() + } +} + +fn dimension_name() -> impl Strategy { + use DimensionName::*; + prop_oneof![Just(NotSpecified), any::().prop_map(Name)] +} + +prop_compose! { +pub fn bytes()(random_data in any::>()) -> Bytes { + Bytes::from(random_data) + } +} + +prop_compose! { +pub fn array_data()(shape in array_shape(), + dimension_names in option::of(vec(dimension_name(), 5..10)), + user_data in bytes()) -> ArrayData { + ArrayData{shape, dimension_names, user_data} + } +} + +pub fn node_id() -> impl Strategy { + uniform8(any::()).prop_map(NodeId::new) +} + +fn chunk_id() -> impl Strategy { + uniform12(any::()).prop_map(ChunkId::new) +} + +prop_compose! { + fn chunk_ref()(id in chunk_id(), offset in any::(), length in any::()) -> ChunkRef { + ChunkRef{ id, offset, length } + } +} + +fn etag() -> impl Strategy { + any::().prop_map(ETag) +} +fn checksum() -> impl Strategy { + use manifest::Checksum::*; + prop_oneof![ + any::().prop_map(SecondsSinceEpoch).prop_map(LastModified), + etag().prop_map(ETag) + ] +} + +fn non_empty_alphanumeric_string() -> impl Strategy { + string_regex("[a-zA-Z0-9]{1,}") + .expect("Could not generate a valid nonempty alphanumeric string") +} + +prop_compose! { + fn url_with_host_and_path()(protocol in transfer_protocol(), + host in non_empty_alphanumeric_string(), + path in non_empty_alphanumeric_string()) -> String { + format!("{}://{}/{}", protocol, host, path) + } +} + +fn virtual_chunk_location() -> impl Strategy { + url_with_host_and_path() + .prop_filter_map("Could not generate url with valid host and path", |url| { + VirtualChunkLocation::from_absolute_path(&url).ok() + }) +} + +prop_compose! { + fn virtual_chunk_ref()(location in virtual_chunk_location(), + offset in any::(), + length in any::(), + checksum in option::of(checksum())) -> VirtualChunkRef { + VirtualChunkRef{ location, offset, length, checksum } + } +} + +fn chunk_payload() -> impl Strategy { + use ChunkPayload::*; + prop_oneof![ + bytes().prop_map(Inline), + virtual_chunk_ref().prop_map(Virtual), + chunk_ref().prop_map(Ref) + ] +} + +type SplitManifest = BTreeMap>; + +pub fn chunk_indices2() -> impl Strategy { + (any::().prop_map(usize::from), any::>()) + .prop_flat_map(|(dim, data)| chunk_indices(dim, data)) +} + +pub fn split_manifest() -> impl Strategy { + btree_map(chunk_indices2(), option::of(chunk_payload()), 3..10) +} + +prop_compose! { + pub fn gen_move()(to in path(), from in path()) -> Move { + Move{to, from} + } +}