diff --git a/Cargo.lock b/Cargo.lock index c7e97db3d..4568027a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1868,6 +1868,7 @@ dependencies = [ "typed-path", "typetag", "url", + "urlencoding", "warp", "zstd", ] diff --git a/icechunk/Cargo.toml b/icechunk/Cargo.toml index d380fe2d9..e3fbe8f2c 100644 --- a/icechunk/Cargo.toml +++ b/icechunk/Cargo.toml @@ -65,8 +65,9 @@ anyhow = { version = "1.0.100", optional = true } dialoguer = { version = "0.12.0", optional = true } dirs = { version = "6.0.0", optional = true } assert_fs = { version = "1.1.3", optional = true } -flatbuffers = "25.2.10" flexbuffers = "25.9.23" +flatbuffers = "25.9.23" +urlencoding = "2.1.3" # reqwest's default-features enables default-tls which ultimately leads to an openssl dependencies # which we are not including as it is not included in manylinux wheels. Instead depend on rustls-tls only diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs index ead283ee5..44bcd52e6 100644 --- a/icechunk/src/format/manifest.rs +++ b/icechunk/src/format/manifest.rs @@ -4,6 +4,7 @@ use std::{ convert::Infallible, iter::zip, ops::Range, + string::FromUtf8Error, sync::Arc, }; @@ -241,6 +242,8 @@ pub enum VirtualReferenceErrorKind { InvalidObjectSize { expected: u64, available: u64 }, #[error("azure store configuration must include an account")] AzureConfigurationMustIncludeAccount, + #[error("decoding virtual chunk url")] + Decoding(#[from] FromUtf8Error), #[error("unknown error")] OtherError(#[from] Box), } diff --git a/icechunk/src/virtual_chunks.rs b/icechunk/src/virtual_chunks.rs index bf0062ee3..27e2f412f 100644 --- a/icechunk/src/virtual_chunks.rs +++ b/icechunk/src/virtual_chunks.rs @@ -572,8 +572,8 @@ impl ChunkFetcher for S3Fetcher { ))? }; - let key = chunk_location.path(); - let key = key.strip_prefix('/').unwrap_or(key); + let key = urlencoding::decode(chunk_location.path())?; + let key = key.strip_prefix('/').unwrap_or(key.as_ref()); let mut b = self .client @@ -771,7 +771,7 @@ impl ChunkFetcher for ObjectStoreFetcher { None => {} } - let path = Path::parse(chunk_location.path()) + let path = Path::parse(urlencoding::decode(chunk_location.path())?) .map_err(|e| VirtualReferenceErrorKind::OtherError(Box::new(e)))?; match self.client.get_opts(&path, options).await { diff --git a/icechunk/tests/test_virtual_refs.rs b/icechunk/tests/test_virtual_refs.rs index ef06396ef..6db877cd0 100644 --- a/icechunk/tests/test_virtual_refs.rs +++ b/icechunk/tests/test_virtual_refs.rs @@ -401,7 +401,7 @@ async fn test_repository_with_minio_virtual_refs() -> Result<(), Box> let bytes2 = Bytes::copy_from_slice(b"second0000"); let chunks = [ ("/path/to/chunk-1".into(), bytes1.clone()), - ("/path/to/chunk-2".into(), bytes2.clone()), + ("/path with spaces/to/chunk-2".into(), bytes2.clone()), ]; write_chunks_to_minio(chunks.iter().cloned()).await; @@ -542,7 +542,7 @@ async fn test_zarr_store_virtual_refs_minio_set_and_get() let bytes2 = Bytes::copy_from_slice(b"second0000"); let chunks = [ ("/path/to/chunk-1".into(), bytes1.clone()), - ("/path/to/chunk-2".into(), bytes2.clone()), + ("/path with spaces/to/chunk-2".into(), bytes2.clone()), ]; write_chunks_to_minio(chunks.iter().cloned()).await; @@ -867,7 +867,7 @@ async fn test_zarr_store_with_multiple_virtual_chunk_containers() let minio_bytes3 = Bytes::copy_from_slice(b"modified"); let chunks = [ ("/path/to/chunk-1".into(), minio_bytes1.clone()), - ("/path/to/chunk-2".into(), minio_bytes2.clone()), + ("/path with spaces/to/chunk-2".into(), minio_bytes2.clone()), ("/path/to/chunk-3".into(), minio_bytes3.clone()), ]; write_chunks_to_minio(chunks.iter().cloned()).await; @@ -1047,7 +1047,7 @@ async fn test_zarr_store_with_multiple_virtual_chunk_containers() [ "s3://earthmover-sample-data/netcdf/oscar_vel2018.nc".to_string(), "s3://testbucket/path/to/chunk-1".to_string(), - "s3://testbucket/path/to/chunk-2".to_string(), + "s3://testbucket/path%20with%20spaces/to/chunk-2".to_string(), "s3://testbucket/path/to/chunk-3".to_string(), format!("file://{}", local_chunks[0].0), format!("file://{}", local_chunks[1].0),