earth-mover · dcherian · May 13, 2025 · Feb 27, 2025 · Mar 4, 2025 · Mar 6, 2025
diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs
@@ -9,9 +9,11 @@ use std::{
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
 pub use object_store::gcp::GcpCredential;
+use regex::bytes::Regex;
 use serde::{Deserialize, Serialize};
 
 use crate::{
+    format::Path,
     storage,
     virtual_chunks::{ContainerName, VirtualChunkContainer, mk_default_containers},
 };
@@ -128,6 +130,82 @@ impl CachingConfig {
     }
 }
 
+#[derive(Debug, PartialEq, Eq, Serialize, Hash, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ManifestShardCondition {
+    Or(Vec<ManifestShardCondition>),
+    And(Vec<ManifestShardCondition>),
+    PathMatches { regex: String },
+    NameMatches { regex: String },
+}
+
+//```yaml
+//rules:
+//  - path: ./2m_temperature  # regex, 3D variable: (null, latitude, longitude)
+//    manifest-split-sizes:
+//      - 0: 120
+//  - path: ./temperature  # 4D variable: (time, level, latitude, longitude)
+//    manifest-split-sizes:
+//      - "level": 1  # alternatively 0: 1
+//      - "time": 12  #           and 1: 12
+//  - path: ./temperature
+//    manifest-split-sizes:
+//      - "level": 1
+//      - "time": 8760  # ~1 year
+//      - "latitude": null  # for unspecified, default is null, which means never split.
+//  - path: ./*   # the default rules
+//    manifest-split-sizes: null  # no splitting, just a single manifest per array
+//```
+
+impl ManifestShardCondition {
+    // from_yaml?
+    pub fn matches(&self, path: &Path) -> bool {
+        match self {
+            ManifestShardCondition::Or(vec) => vec.iter().any(|c| c.matches(path)),
+            ManifestShardCondition::And(vec) => vec.iter().all(|c| c.matches(path)),
+            // TODO: precompile the regex
+            ManifestShardCondition::PathMatches { regex } => Regex::new(regex)
+                .map(|regex| regex.is_match(path.to_string().as_bytes()))
+                .unwrap_or(false),
+            // TODO: precompile the regex
+            ManifestShardCondition::NameMatches { regex } => Regex::new(regex)
+                .map(|regex| {
+                    path.name()
+                        .map(|name| regex.is_match(name.as_bytes()))
+                        .unwrap_or(false)
+                })
+                .unwrap_or(false),
+        }
+    }
+}
+
+// FIXME: isn't this really another condition?
+#[derive(Debug, Hash, PartialEq, Eq, Serialize, Deserialize, Clone)]
+pub enum ShardDimCondition {
+    Axis(usize),
+    DimensionName(String),
+    // TODO: Since dimension name can be null,
+    // i don't think we can have DimensionName(r"*") catch the "Any" case
+    Any,
+}
+
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
+pub struct ManifestShardingConfig {
+    // TODO: need to preserve insertion order of conditions, so hashmap doesn't work
+    pub shard_sizes: Vec<(ManifestShardCondition, Vec<(ShardDimCondition, u32)>)>,
+}
+
+impl Default for ManifestShardingConfig {
+    fn default() -> Self {
+        let inner = vec![(ShardDimCondition::Any, u32::MAX)];
+        let new = vec![(
+            ManifestShardCondition::PathMatches { regex: r".*".to_string() },
+            inner,
+        )];
+        Self { shard_sizes: new }
+    }
+}
+
 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "snake_case")]
 pub enum ManifestPreloadCondition {
@@ -206,20 +284,33 @@ static DEFAULT_MANIFEST_PRELOAD_CONDITION: OnceLock<ManifestPreloadCondition> =
 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Default)]
 pub struct ManifestConfig {
     pub preload: Option<ManifestPreloadConfig>,
+    pub sharding: Option<ManifestShardingConfig>,
 }
 
 static DEFAULT_MANIFEST_PRELOAD_CONFIG: OnceLock<ManifestPreloadConfig> = OnceLock::new();
+static DEFAULT_MANIFEST_SHARDING_CONFIG: OnceLock<ManifestShardingConfig> =
+    OnceLock::new();
 
 impl ManifestConfig {
     pub fn merge(&self, other: Self) -> Self {
-        Self { preload: other.preload.or(self.preload.clone()) }
+        Self {
+            preload: other.preload.or(self.preload.clone()),
+            // FIXME: why prioritize one over the other?
+            sharding: other.sharding.or(self.sharding.clone()),
+        }
     }
 
     pub fn preload(&self) -> &ManifestPreloadConfig {
         self.preload.as_ref().unwrap_or_else(|| {
             DEFAULT_MANIFEST_PRELOAD_CONFIG.get_or_init(ManifestPreloadConfig::default)
         })
     }
+
+    pub fn sharding(&self) -> &ManifestShardingConfig {
+        self.sharding.as_ref().unwrap_or_else(|| {
+            DEFAULT_MANIFEST_SHARDING_CONFIG.get_or_init(ManifestShardingConfig::default)
+        })
+    }
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]

diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs
@@ -4,7 +4,7 @@ use crate::format::flatbuffers::generated;
 use bytes::Bytes;
 use flatbuffers::VerifierOptions;
 use futures::{Stream, TryStreamExt};
-use itertools::Itertools;
+use itertools::{Itertools, multiunzip, repeat_n};
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 
@@ -21,12 +21,6 @@ use super::{
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct ManifestExtents(Vec<Range<u32>>);
 
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct ManifestRef {
-    pub object_id: ManifestId,
-    pub extents: ManifestExtents,
-}
-
 impl ManifestExtents {
     pub fn new(from: &[u32], to: &[u32]) -> Self {
         let v = from
@@ -37,9 +31,83 @@ impl ManifestExtents {
         Self(v)
     }
 
+    pub fn contains(&self, coord: &[u32]) -> bool {
+        self.iter().zip(coord.iter()).all(|(range, that)| range.contains(that))
+    }
+
     pub fn iter(&self) -> impl Iterator<Item = &Range<u32>> {
         self.0.iter()
     }
+
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct ManifestRef {
+    pub object_id: ManifestId,
+    pub extents: ManifestExtents,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ManifestShards(Vec<ManifestExtents>);
+
+impl ManifestShards {
+    pub fn default(ndim: usize) -> Self {
+        Self(vec![ManifestExtents(repeat_n(0..u32::MAX, ndim).collect())])
+    }
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+    pub fn from_edges(iter: impl IntoIterator<Item = Vec<u32>>) -> Self {
+        let res = iter
+            .into_iter()
+            .map(|x| x.into_iter().tuple_windows())
+            .multi_cartesian_product()
+            .map(multiunzip)
+            .map(|(from, to): (Vec<u32>, Vec<u32>)| {
+                ManifestExtents::new(from.as_slice(), to.as_slice())
+            });
+        Self(res.collect())
+    }
+
+    // Returns the index of shard_range that includes ChunkIndices
+    // This can be used at write time to split manifests based on the config
+    // and at read time to choose which manifest to query for chunk payload
+    pub fn which(&self, coord: &ChunkIndices) -> Result<usize, IcechunkFormatError> {
+        // shard_range[i] must bound ChunkIndices
+        // 0 <= return value <= shard_range.len()
+        // it is possible that shard_range does not include a coord. say we have 2x2 shard grid
+        // but only shard (0,0) and shard (1,1) are populated with data.
+        // A coord located in (1, 0) should return Err
+        // Since shard_range need not form a regular grid, we must iterate through and find the first result.
+        // ManifestExtents in shard_range MUST NOT overlap with each other. How do we ensure this?
+        // ndim must be the same
+        // debug_assert_eq!(coord.0.len(), shard_range[0].len());
+        // FIXME: could optimize for unbounded single manifest
+        self.iter()
+            .enumerate()
+            .find(|(_, e)| e.contains(coord.0.as_slice()))
+            .map(|(i, _)| i)
+            .ok_or(IcechunkFormatError::from(
+                IcechunkFormatErrorKind::InvalidIndexForSharding {
+                    coords: coord.clone(),
+                },
+            ))
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &ManifestExtents> {
+        self.0.iter()
+    }
+
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
 }
 
 #[derive(Debug, Error)]
@@ -206,7 +274,7 @@ impl Manifest {
         }
 
         if array_manifests.is_empty() {
-            // empty manifet
+            // empty manifest
             return Ok(None);
         }
 

diff --git a/icechunk/src/format/mod.rs b/icechunk/src/format/mod.rs
@@ -245,6 +245,8 @@ pub enum IcechunkFormatErrorKind {
     NodeNotFound { path: Path },
     #[error("chunk coordinates not found `{coords:?}`")]
     ChunkCoordinatesNotFound { coords: ChunkIndices },
+    #[error("invalid chunk index for sharding manifests: {coords:?}")]
+    InvalidIndexForSharding { coords: ChunkIndices },
     #[error("manifest information cannot be found in snapshot `{manifest_id}`")]
     ManifestInfoNotFound { manifest_id: ManifestId },
     #[error("invalid magic numbers in file")]

diff --git a/icechunk/src/format/snapshot.rs b/icechunk/src/format/snapshot.rs
@@ -1,4 +1,6 @@
-use std::{collections::BTreeMap, convert::Infallible, num::NonZeroU64, sync::Arc};
+use std::{
+    collections::BTreeMap, convert::Infallible, num::NonZeroU64, ops::Index, sync::Arc,
+};
 
 use bytes::Bytes;
 use chrono::{DateTime, Utc};
@@ -37,6 +39,17 @@ impl DimensionShape {
 pub struct ArrayShape(Vec<DimensionShape>);
 
 impl ArrayShape {
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    pub fn num_chunks(&self) -> Vec<u32> {
+        self.max_chunk_indices_permitted().map(|x| x + 1).collect()
+    }
+
     pub fn new<I>(it: I) -> Option<Self>
     where
         I: IntoIterator<Item = (u64, u64)>,
@@ -87,6 +100,15 @@ impl ArrayShape {
     }
 }
 
+// Implement indexing for immutable access
+impl Index<usize> for ArrayShape {
+    type Output = DimensionShape;
+
+    fn index(&self, index: usize) -> &DimensionShape {
+        &self.0[index]
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum DimensionName {
     NotSpecified,
@@ -102,6 +124,15 @@ impl From<Option<&str>> for DimensionName {
     }
 }
 
+impl From<DimensionName> for Option<String> {
+    fn from(value: DimensionName) -> Option<String> {
+        match value {
+            DimensionName::NotSpecified => None,
+            DimensionName::Name(name) => Some(name),
+        }
+    }
+}
+
 impl From<&str> for DimensionName {
     fn from(value: &str) -> Self {
         if value.is_empty() {