diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ae08d346b..97ce8e683 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,6 +34,6 @@ repos:
         entry: just pre-commit
         language: system
         pass_filenames: false
-        files: "icechunk/"
+        files: "(^|.*/)(icechunk/|icechunk-python/src/).*"
 
 exclude: 'tests/data/.*'
diff --git a/Justfile b/Justfile
index f511a6931..c927130cb 100644
--- a/Justfile
+++ b/Justfile
@@ -5,6 +5,9 @@ alias pre := pre-commit
 test *args='':
   cargo test --all --all-targets {{args}}
 
+doctest *args='':
+  cargo test --doc {{args}}
+
 # run all tests with logs enabled
 test-logs level *args='':
   RUST_LOG=icechunk={{level}} cargo test --all --all-targets {{args}} -- --nocapture
@@ -46,6 +49,7 @@ pre-commit $RUSTFLAGS="-D warnings -W unreachable-pub -W bare-trait-objects":
   just build
   just format "--check"
   just lint "-p icechunk -p icechunk-python"
+  just doctest
   just test
   just run-all-examples
   just check-deps
diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml
index 13f28ad34..c8d506e98 100644
--- a/docs/.readthedocs.yaml
+++ b/docs/.readthedocs.yaml
@@ -6,11 +6,10 @@ build:
     python: "mambaforge-latest"
 
   jobs:
-     pre_create_environment:
-       - conda update --yes --quiet --name=base --channel=defaults conda
      install:
       - which mamba
-      - cd icechunk-python && maturin build && pip install ../target/wheels/*.whl && cd ../docs
+      - cd icechunk-python && maturin build && pip install "$(ls ../target/wheels/*.whl | head -n 1)[docs]" && cd ../docs
+      - mamba list
       # - cd icechunk-python && maturin develop && cd ../docs
 
 
diff --git a/docs/doc-env.yml b/docs/doc-env.yml
index 45d16f82a..20dee4679 100644
--- a/docs/doc-env.yml
+++ b/docs/doc-env.yml
@@ -4,39 +4,8 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - python>=3.10
-  - "sphinx"
+  - python>=3.12
   - pip
   - maturin
-  - xarray
-  - pooch
-  - scipy
-  - dask
-  - cftime
-  - distributed
   - cairo
   - rust
-  - maturin
-  - pip:
-    - "myst_nb"
-    - mkdocs-awesome-pages-plugin
-    - "pydata_sphinx_theme"
-    - mkdocs-mermaid2-plugin
-    - markdown-exec
-    - mkdocs-breadcrumbs-plugin
-    - mkdocs-minify-plugin
-    - mkdocs-open-in-new-tab
-    - mkdocs
-    - mkdocs-material[imaging]
-    - mkdocstrings[python]
-    - mkdocs-jupyter
-    - mkdocs-git-revision-date-localized-plugin
-    - mkdocs-git-committers-plugin-2
-    - mkdocs-macros-plugin
-    - mkdocs-include-markdown-plugin
-    - mkdocs-redirects
-    - mkdocs-git-authors-plugin
-    - "sphinx_copybutton"
-    - "sphinx_design"
-    - "sphinx_togglebutton"
-    - "sphinx-autodoc-typehints"
diff --git a/docs/docs/icechunk-python/performance.md b/docs/docs/icechunk-python/performance.md
new file mode 100644
index 000000000..fc94df52a
--- /dev/null
+++ b/docs/docs/icechunk-python/performance.md
@@ -0,0 +1,90 @@
+# Performance
+
+!!! info
+
+    This is advanced material, and you will need it only if you have arrays with more than a million chunks.
+    Icechunk aims to provide an excellent experience out of the box.
+
+## Preloading manifests
+
+Coming Soon.
+
+## Splitting manifests
+
+Icechunk stores chunk references in a chunk manifest file stored in `manifests/`.
+For very large arrays (millions of chunks), these files can get quite large.
+By default, Icechunk stores all chunk references in a single manifest file per array.
+Requesting even a single chunk requires downloading the entire manifest.
+In some cases, this can result in a slow time-to-first-byte or large memory usage.
+
+!!! note
+
+    Note that the chunk sizes in the following examples are tiny for demonstration purposes.
+
+To avoid that, Icechunk lets you split the manifest files by specifying a ``ManifestSplittingConfig``.
+
+```python exec="on" session="perf" source="material-block"
+import icechunk as ic
+from icechunk import ManifestSplitCondition, ManifestSplittingConfig, ManifestSplitDimCondition
+
+split_config = ManifestSplittingConfig.from_dict(
+    {
+        ManifestSplitCondition.AnyArray(): {
+            ManifestSplitDimCondition.DimensionName("time"): 365 * 24
+        }
+    }
+)
+repo_config = ic.RepositoryConfig(manifest=ic.ManifestConfig(splitting=split_config))
+```
+
+Then pass the config to `Repository.open` or `Repository.create`
+```python
+repo = ic.Repository.open(..., config=repo_config)
+```
+
+This particular example splits manifests so that each manifest contains `365 * 24` chunks along the time dimension, and every chunk along every other dimension in a single file.
+
+Options for specifying the arrays whose manifest you want to split are:
+
+1. [`ManifestSplitCondition.name_matches`](./reference.md#icechunk.ManifestSplitCondition.name_matches) takes a regular expression used to match an array's name;
+2. [`ManifestSplitCondition.path_matches`](./reference.md#icechunk.ManifestSplitCondition.path_matches) takes a regular expression used to match an array's path;
+3. [`ManifestSplitCondition.and_conditions`](./reference.md#icechunk.ManifestSplitCondition.and_conditions) to combine (1), (2), and (4) together; and
+4. [`ManifestSplitCondition.or_conditions`](./reference.md#icechunk.ManifestSplitCondition.or_conditions) to combine (1), (2), and (3) together.
+
+
+`And` and `Or` may be used to combine multiple path and/or name matches. For example,
+```python exec="on" session="perf" source="material-block"
+array_condition = ManifestSplitCondition.or_conditions(
+    [
+        ManifestSplitCondition.name_matches("temperature"),
+        ManifestSplitCondition.name_matches("salinity"),
+    ]
+)
+sconfig = ManifestSplittingConfig.from_dict(
+    {array_condition: {ManifestSplitDimCondition.DimensionName("longitude"): 3}}
+)
+```
+
+Options for specifying how to split along a specific axis or dimension are:
+
+1. [`ManifestSplitDimCondition.Axis`](./reference.md#icechunk.ManifestSplitDimCondition.Axis) takes an integer axis;
+2. [`ManifestSplitDimCondition.DimensionName`](./reference.md#icechunk.ManifestSplitDimCondition.DimensionName) takes a regular expression used to match the dimension names of the array;
+3. [`ManifestSplitDimCondition.Any`](./reference.md#icechunk.ManifestSplitDimCondition.Any) matches any _remaining_ dimension name or axis.
+
+
+For example, for an array with dimensions `time, latitude, longitude`, the following config
+```python exec="on" session="perf" source="material-block"
+from icechunk import ManifestSplitDimCondition
+
+{
+    ManifestSplitDimCondition.DimensionName("longitude"): 3,
+    ManifestSplitDimCondition.Axis(1): 2,
+    ManifestSplitDimCondition.Any(): 1,
+}
+```
+will result in splitting manifests so that each manifest contains (3 longitude chunks x 2 latitude chunks x 1 time chunk) = 6 chunks per manifest file.
+
+
+!!! note
+
+    Python dictionaries preserve insertion order, so the first condition encountered takes priority.
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index 0574e5e3a..0070c6f47 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -188,6 +188,7 @@ nav:
     - Quickstart: icechunk-python/quickstart.md
     - Configuration: icechunk-python/configuration.md
     - Storage: icechunk-python/storage.md
+    - Performance: icechunk-python/performance.md
     - FAQ: icechunk-python/faq.md
     - Xarray: icechunk-python/xarray.md
     - Parallel Writes: icechunk-python/parallel.md
diff --git a/icechunk-python/benchmarks/conftest.py b/icechunk-python/benchmarks/conftest.py
index 76b6bb3c4..d45e2b300 100644
--- a/icechunk-python/benchmarks/conftest.py
+++ b/icechunk-python/benchmarks/conftest.py
@@ -1,14 +1,13 @@
+import copy
 from typing import cast
 
 import pytest
 
 from benchmarks import helpers
 from benchmarks.datasets import (
-    ERA5,
-    ERA5_ARCO,
-    ERA5_SINGLE,
-    GB_8MB_CHUNKS,
-    GB_128MB_CHUNKS,
+    LARGE_1D,
+    LARGE_MANIFEST_SHARDED,
+    LARGE_MANIFEST_UNSHARDED,
     PANCAKE_WRITES,
     SIMPLE_1D,
     TEST_BUCKETS,
@@ -18,15 +17,22 @@
 )
 from icechunk import Repository, local_filesystem_storage
 
+try:
+    from icechunk import ManifestSplittingConfig  # noqa: F401
+
+    no_splitting = False
+except ImportError:
+    no_splitting = True
+
 
 def request_to_dataset(request, moar_prefix: str = "") -> Dataset:
     extra_prefix = request.config.getoption("--icechunk-prefix") + moar_prefix
     where = request.config.getoption("--where")
-    ds = request.param
+    ds = copy.deepcopy(request.param)
     if where == "local" and ds.skip_local:
         pytest.skip()
-    # for some reason, this gets run multiple times so we apply the prefix repeatedly
-    # if we don't catch that :(
+    # this gets run multiple times because the fixture scope is 'function'
+    # so we need this `force_idempotent` ugliness
     ds.storage_config = ds.storage_config.with_overwrite(
         **TEST_BUCKETS[where]
     ).with_extra(prefix=extra_prefix, force_idempotent=True)
@@ -50,13 +56,26 @@ def simple_write_dataset(request) -> BenchmarkWriteDataset:
     return cast(BenchmarkWriteDataset, ds)
 
 
+@pytest.fixture(params=[pytest.param(LARGE_1D, id="large-1d")])
+def large_write_dataset(request) -> BenchmarkWriteDataset:
+    moar_prefix = helpers.rdms()
+    ds = request_to_dataset(request, moar_prefix=moar_prefix)
+    return cast(BenchmarkWriteDataset, ds)
+
+
 @pytest.fixture(
     params=[
-        pytest.param(GB_8MB_CHUNKS, id="gb-8mb"),
-        pytest.param(GB_128MB_CHUNKS, id="gb-128mb"),
-        pytest.param(ERA5_SINGLE, id="era5-single"),
-        pytest.param(ERA5, id="era5-weatherbench"),
-        pytest.param(ERA5_ARCO, id="era5-arco"),
+        # pytest.param(GB_8MB_CHUNKS, id="gb-8mb"),
+        # pytest.param(GB_128MB_CHUNKS, id="gb-128mb"),
+        # pytest.param(ERA5_SINGLE, id="era5-single"),
+        # pytest.param(ERA5, id="era5-weatherbench"),
+        # pytest.param(ERA5_ARCO, id="era5-arco"),
+        pytest.param(LARGE_MANIFEST_UNSHARDED, id="large-manifest-no-split"),
+        pytest.param(
+            LARGE_MANIFEST_SHARDED,
+            id="large-manifest-split",
+            marks=pytest.mark.skipif(no_splitting, reason="no splitting"),
+        ),
     ],
 )
 def synth_dataset(request) -> BenchmarkReadDataset:
diff --git a/icechunk-python/benchmarks/datasets.py b/icechunk-python/benchmarks/datasets.py
index 95d06d749..ea82ef013 100644
--- a/icechunk-python/benchmarks/datasets.py
+++ b/icechunk-python/benchmarks/datasets.py
@@ -1,4 +1,5 @@
 import datetime
+import random
 import time
 import warnings
 from collections.abc import Callable
@@ -9,11 +10,19 @@
 import fsspec
 import numpy as np
 import platformdirs
+import pytest
 
 import icechunk as ic
+import icechunk.xarray
 import xarray as xr
 import zarr
-from benchmarks.helpers import get_coiled_kwargs, rdms, setup_logger
+from benchmarks.helpers import (
+    get_coiled_kwargs,
+    get_splitting_config,
+    rdms,
+    repo_config_with,
+    setup_logger,
+)
 
 rng = np.random.default_rng(seed=123)
 
@@ -180,7 +189,9 @@ def storage(self) -> ic.Storage:
             self._storage = self.storage_config.create()
         return self._storage
 
-    def create(self, clear: bool = False) -> ic.Repository:
+    def create(
+        self, clear: bool = False, config: ic.RepositoryConfig | None = None
+    ) -> ic.Repository:
         if clear:
             clear_uri = self.storage_config.clear_uri()
             if clear_uri is None:
@@ -199,7 +210,7 @@ def create(self, clear: bool = False) -> ic.Repository:
                 except FileNotFoundError:
                     pass
         logger.info(repr(self.storage))
-        return ic.Repository.create(self.storage)
+        return ic.Repository.create(self.storage, config=config)
 
     @property
     def store(self) -> ic.IcechunkStore:
@@ -220,8 +231,10 @@ class BenchmarkWriteDataset(Dataset):
 class BenchmarkReadDataset(Dataset):
     # data variable to load in `time_xarray_read_chunks`
     load_variables: list[str] | None = None
-    # Passed to .isel for `time_xarray_read_chunks`
+    # Passed to .isel for `time_xarray_read_chunks`, reads a single chunk
     chunk_selector: dict[str, Any] | None = None
+    # Passed to .isel for `time_xarray_read_chunks`, reads a large fraction of the data
+    full_load_selector: dict[str, Any] | None = None
     # name of (coordinate) variable used for testing "time to first byte"
     first_byte_variable: str | None
     # function used to construct the dataset prior to read benchmarks
@@ -229,10 +242,10 @@ class BenchmarkReadDataset(Dataset):
     # whether to skip this one on local runs
     skip_local: bool = False
 
-    def create(self, clear: bool = True):
+    def create(self, clear: bool = True, config: ic.RepositoryConfig | None = None):
         if clear is not True:
             raise ValueError("clear *must* be true for benchmark datasets.")
-        return super().create(clear=True)
+        return super().create(clear=True, config=config)
 
     def setup(self, force: bool = False) -> None:
         """
@@ -243,15 +256,15 @@ def setup(self, force: bool = False) -> None:
             raise NotImplementedError("setupfn has not been provided.")
 
         if force:
-            print("forced re-creating")
+            logger.info("forced re-creating")
             self.setupfn(self)
             return
 
         try:
             _ = self.store
         except ic.IcechunkError as e:
-            print("Read of existing store failed. Re-creating")
-            print(e)
+            logger.info("Read of existing store failed. Re-creating")
+            logger.info(e)
             self.setupfn(self)
 
 
@@ -378,6 +391,31 @@ def setup_era5(*args, **kwargs):
     return setup_for_benchmarks(*args, **kwargs, arrays_to_write=[])
 
 
+def setup_split_manifest_refs(dataset: Dataset, *, split_size: int | None):
+    shape = (500_000 * 1000,)
+    chunks = (1000,)
+
+    if split_size is not None:
+        try:
+            splitting = get_splitting_config(split_size=split_size)
+        except ImportError:
+            logger.info("splitting not supported")
+            pytest.skip("splitting not supported on this version")
+    else:
+        splitting = None
+    config = repo_config_with(splitting=splitting)
+    assert config is not None
+    if hasattr(config.manifest, "splitting"):
+        assert config.manifest.splitting == splitting
+    repo = dataset.create(config=config)
+    logger.info(repo.config)
+    session = repo.writable_session(branch="main")
+    ds = xr.Dataset({"array": ("x", np.ones(shape=shape))})
+    with zarr.config.set({"async.concurrency": 64}):
+        ic.xarray.to_icechunk(ds, session, encoding={"array": {"chunks": chunks}})
+    session.commit("wrote data")
+
+
 ERA5_ARCO_INGEST = IngestDataset(
     name="ERA5-ARCO",
     prefix="era5_arco",
@@ -448,6 +486,26 @@ def setup_era5(*args, **kwargs):
     setupfn=partial(setup_synthetic_gb_dataset, chunk_shape=(4, 512, 512)),
 )
 
+random_selector = sorted(random.choices(range(500_000 * 1000), k=50_000))
+
+# large manifest sharded, unsharded
+LARGE_MANIFEST_UNSHARDED = BenchmarkReadDataset(
+    storage_config=StorageConfig(prefix="large_manifest_no_split"),
+    chunk_selector={"x": 1},
+    full_load_selector={"x": random_selector},
+    load_variables=["array"],
+    first_byte_variable=None,
+    setupfn=partial(setup_split_manifest_refs, split_size=None),
+)
+LARGE_MANIFEST_SHARDED = BenchmarkReadDataset(
+    storage_config=StorageConfig(prefix="large_manifest_split"),
+    chunk_selector={"x": 1},
+    full_load_selector={"x": random_selector},
+    load_variables=["array"],
+    first_byte_variable=None,
+    setupfn=partial(setup_split_manifest_refs, split_size=100_000),
+)
+
 # TODO
 GPM_IMERG_VIRTUAL = BenchmarkReadDataset(
     storage_config=StorageConfig(
@@ -477,3 +535,9 @@ def setup_era5(*args, **kwargs):
     shape=(2000 * 1000,),
     chunks=(1000,),
 )
+LARGE_1D = BenchmarkWriteDataset(
+    storage_config=StorageConfig(prefix="large_1d_writes"),
+    num_arrays=1,
+    shape=(500_000 * 1000,),
+    chunks=(1000,),
+)
diff --git a/icechunk-python/benchmarks/helpers.py b/icechunk-python/benchmarks/helpers.py
index 96155ba11..e42c17d0c 100644
--- a/icechunk-python/benchmarks/helpers.py
+++ b/icechunk-python/benchmarks/helpers.py
@@ -2,6 +2,8 @@
 import os
 import subprocess
 
+import icechunk as ic
+
 
 def setup_logger():
     logger = logging.getLogger("icechunk-bench")
@@ -68,3 +70,31 @@ def rdms() -> str:
     import string
 
     return "".join(random.sample(string.ascii_lowercase, k=8))
+
+
+def repo_config_with(
+    *, inline_chunk_threshold_bytes: int | None = None, preload=None, splitting=None
+) -> ic.RepositoryConfig:
+    config = ic.RepositoryConfig.default()
+    if inline_chunk_threshold_bytes is not None:
+        config.inline_chunk_threshold_bytes = inline_chunk_threshold_bytes
+    if splitting is not None:
+        config.manifest = ic.ManifestConfig(preload=preload, splitting=splitting)
+    return config
+
+
+def get_splitting_config(*, split_size: int):
+    # helper to allow benchmarking versions before manifest splitting was introduced
+    from icechunk import (
+        ManifestSplitCondition,
+        ManifestSplitDimCondition,
+        ManifestSplittingConfig,
+    )
+
+    return ManifestSplittingConfig.from_dict(
+        {
+            ManifestSplitCondition.path_matches(".*"): {
+                ManifestSplitDimCondition.Any(): split_size
+            }
+        }
+    )
diff --git a/icechunk-python/benchmarks/pytest.ini b/icechunk-python/benchmarks/pytest.ini
new file mode 100644
index 000000000..c8c9c7579
--- /dev/null
+++ b/icechunk-python/benchmarks/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+asyncio_mode = auto
+asyncio_default_fixture_loop_scope = function
diff --git a/icechunk-python/benchmarks/runner.py b/icechunk-python/benchmarks/runner.py
index 59608c8a5..6b04ee3ed 100644
--- a/icechunk-python/benchmarks/runner.py
+++ b/icechunk-python/benchmarks/runner.py
@@ -10,6 +10,7 @@
 import subprocess
 import tempfile
 import tomllib
+from collections.abc import Callable
 from functools import partial
 
 import tqdm
@@ -33,6 +34,14 @@
 assert_cwd_is_icechunk_python()
 
 
+def process_map(*, serial: bool, func: Callable, iterable):
+    if serial:
+        for i in iterable:
+            func(i)
+    else:
+        tqdm.contrib.concurrent.process_map(func, iterable)
+
+
 def get_benchmark_deps(filepath: str) -> str:
     """needed since
     1. benchmark deps may have changed in the meantime.
@@ -93,13 +102,13 @@ def setup(self, *, force: bool):
         """Creates datasets for read benchmarks."""
         logger.info(f"setup_benchmarks for {self.ref} / {self.commit}")
         cmd = (
-            f"pytest {PYTEST_OPTIONS} -nauto "
+            f"pytest {PYTEST_OPTIONS} -s -nauto --benchmark-disable "
             f"-m setup_benchmarks --force-setup={force} "
             f"--where={self.where} "
             f"--icechunk-prefix=benchmarks/{self.prefix}/ "
             "benchmarks/"
         )
-        logger.info(cmd)
+        logger.info(">>> " + cmd)
         self.execute(cmd, check=True)
 
     def run(self, *, pytest_extra: str = "") -> None:
@@ -126,9 +135,9 @@ class LocalRunner(Runner):
     activate: str = "source .venv/bin/activate"
     bench_store_dir = CURRENTDIR
 
-    def __init__(self, *, ref: str, where: str):
-        super().__init__(ref=ref, where=where)
-        suffix = self.ref_commit
+    def __init__(self, *, ref: str, where: str, save_prefix: str):
+        super().__init__(ref=ref, where=where, save_prefix="")
+        suffix = self.commit
         self.base = f"{TMP}/icechunk-bench-{suffix}"
         self.cwd = f"{TMP}/icechunk-bench-{suffix}/icechunk"
         self.pycwd = f"{TMP}/icechunk-bench-{suffix}/icechunk/icechunk-python"
@@ -254,12 +263,13 @@ def run_there(where: str, *, args, save_prefix) -> None:
     )
 
     # we can only initialize in parallel since the two refs may have the same spec version.
-    tqdm.contrib.concurrent.process_map(partial(init_for_ref), runners)
+    process_map(serial=args.serial, func=partial(init_for_ref), iterable=runners)
 
     if not args.skip_setup:
         for runner in runners:
             runner.setup(force=args.force_setup)
 
+    # TODO: this could be parallelized for coiled runners
     for runner in tqdm.tqdm(runners):
         runner.run(pytest_extra=args.pytest)
 
@@ -268,6 +278,7 @@ def run_there(where: str, *, args, save_prefix) -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument("refs", help="refs to run benchmarks for", nargs="+")
     parser.add_argument("--pytest", help="passed to pytest", default="")
+    parser.add_argument("--serial", action="store_true", default=False)
     parser.add_argument(
         "--where",
         help="where to run? [local|s3|s3_ob|gcs], combinations are allowed: [s3|gcs]",
@@ -291,21 +302,24 @@ def run_there(where: str, *, args, save_prefix) -> None:
 
     save_prefix = rdms()
 
-    tqdm.contrib.concurrent.process_map(
-        partial(run_there, args=args, save_prefix=save_prefix), where
+    process_map(
+        serial=args.serial,
+        func=partial(run_there, args=args, save_prefix=save_prefix),
+        iterable=where,
     )
 
-    subprocess.run(
-        [
-            "aws",
-            "s3",
-            "cp",
-            f"s3://earthmover-scratch/benchmarks/{save_prefix}/",
-            f"/tmp/benchmarks/{save_prefix}/",
-            "--recursive",
-        ],
-        check=True,
-    )
+    if any(w != "local" for w in where):
+        subprocess.run(
+            [
+                "aws",
+                "s3",
+                "cp",
+                f"s3://earthmover-scratch/benchmarks/{save_prefix}/",
+                f"/tmp/benchmarks/{save_prefix}/",
+                "--recursive",
+            ],
+            check=True,
+        )
     refs = args.refs
 
     # TODO: clean this up
@@ -321,18 +335,18 @@ def run_there(where: str, *, args, save_prefix) -> None:
             key=os.path.getmtime,
             reverse=True,
         )
-        #  TODO: Use `just` here when we figure that out.
-        subprocess.run(
-            [
-                "pytest-benchmark",
-                "compare",
-                "--group=group,func,param",
-                "--sort=fullname",
-                "--columns=median",
-                "--name=normal",
-                *files,
-            ]
-        )
+    #  TODO: Use `just` here when we figure that out.
+    subprocess.run(
+        [
+            "pytest-benchmark",
+            "compare",
+            "--group=group,func",
+            "--sort=fullname",
+            "--columns=median",
+            "--name=normal",
+            *files,
+        ]
+    )
 
 
 # Compare wish-list:
diff --git a/icechunk-python/benchmarks/test_benchmark_reads.py b/icechunk-python/benchmarks/test_benchmark_reads.py
index b1ba9e6e1..f42dad868 100644
--- a/icechunk-python/benchmarks/test_benchmark_reads.py
+++ b/icechunk-python/benchmarks/test_benchmark_reads.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+import icechunk as ic
 import xarray as xr
 import zarr
 from benchmarks.datasets import Dataset
@@ -9,6 +10,21 @@
 # TODO: configurable?
 zarr.config.set({"async.concurrency": 64})
 
+pytestmark = pytest.mark.read_benchmark
+
+
+@pytest.fixture(
+    params=[
+        pytest.param(None, id="default"),
+        pytest.param(
+            ic.ManifestPreloadConfig(preload_if=ic.ManifestPreloadCondition.false()),
+            id="off",
+        ),
+    ],
+)
+def preload(request):
+    return request.param
+
 
 @pytest.mark.setup_benchmarks
 @pytest.mark.filterwarnings("ignore:datetime.datetime.utcnow")  # bah, boto!
@@ -30,7 +46,6 @@ def test_recreate_datasets(synth_dataset, request):
         synth_dataset.setup(force=force)
 
 
-@pytest.mark.read_benchmark
 def test_time_create_store(synth_dataset: Dataset, benchmark) -> None:
     """time to create the icechunk create the Repo, session, and store objects."""
     benchmark(operator.attrgetter("store"), synth_dataset)
@@ -48,7 +63,7 @@ def test_time_getsize_key(synth_dataset: Dataset, benchmark) -> None:
     @benchmark
     def fn():
         for array in synth_dataset.load_variables:
-            if group := synth_dataset.group is not None:
+            if (group := synth_dataset.group) is not None:
                 prefix = f"{group}/"
             else:
                 prefix = ""
@@ -66,7 +81,6 @@ def test_time_getsize_prefix(synth_dataset: Dataset, benchmark) -> None:
     benchmark(operator.methodcaller("nbytes_stored"), array)
 
 
-@pytest.mark.read_benchmark
 @pytest.mark.benchmark(group="zarr-read")
 def test_time_zarr_open(synth_dataset: Dataset, benchmark) -> None:
     """
@@ -79,7 +93,6 @@ def fn():
         zarr.open_group(synth_dataset.store, path=synth_dataset.group, mode="r")
 
 
-@pytest.mark.read_benchmark
 @pytest.mark.benchmark(group="zarr-read")
 def test_time_zarr_members(synth_dataset: Dataset, benchmark) -> None:
     # list_dir, maybe warmup=1
@@ -87,7 +100,6 @@ def test_time_zarr_members(synth_dataset: Dataset, benchmark) -> None:
     benchmark(operator.methodcaller("members"), group)
 
 
-@pytest.mark.read_benchmark
 @pytest.mark.benchmark(group="xarray-read", min_rounds=10)
 def test_time_xarray_open(synth_dataset: Dataset, benchmark) -> None:
     @benchmark
@@ -100,23 +112,65 @@ def fn():
         )
 
 
-# TODO: mark as slow?
-@pytest.mark.read_benchmark
+@pytest.mark.parametrize("selector", ["single-chunk", "full-read"])
 @pytest.mark.benchmark(group="xarray-read", min_rounds=2)
-def test_time_xarray_read_chunks(synth_dataset: Dataset, benchmark) -> None:
+def test_time_xarray_read_chunks_cold_cache(
+    synth_dataset: Dataset, selector, preload, benchmark
+) -> None:
     """128MB vs 8MB chunks. should see a difference."""
     if synth_dataset.load_variables is None:
         pytest.skip()
+
+    if selector == "single-chunk":
+        subsetter = synth_dataset.chunk_selector
+    elif selector == "full-read":
+        subsetter = synth_dataset.full_load_selector
+    else:
+        raise ValueError(f"bad selector: {selector}")
+
+    if subsetter is None:
+        pytest.skip(f"no selector specified for {selector}")
+
     # TODO: switch out concurrency "ideal_request_size"
+    @benchmark
+    def fn():
+        repo = ic.Repository.open(
+            storage=synth_dataset.storage,
+            config=ic.RepositoryConfig(manifest=ic.ManifestConfig(preload=preload)),
+        )
+        ds = xr.open_zarr(
+            repo.readonly_session("main").store,
+            group=synth_dataset.group,
+            chunks=None,
+            consolidated=False,
+        )
+        subset = ds.isel(subsetter)
+        subset[synth_dataset.load_variables].compute()
+
+
+@pytest.mark.benchmark(group="xarray-read", min_rounds=2)
+def test_time_xarray_read_chunks_hot_cache(
+    synth_dataset: Dataset, preload, benchmark
+) -> None:
+    """128MB vs 8MB chunks. should see a difference."""
+    if synth_dataset.load_variables is None:
+        pytest.skip()
+    # TODO: switch out concurrency "ideal_request_size"
+    repo = ic.Repository.open(
+        storage=synth_dataset.storage,
+        config=ic.RepositoryConfig(manifest=ic.ManifestConfig(preload=preload)),
+    )
     ds = xr.open_zarr(
-        synth_dataset.store, group=synth_dataset.group, chunks=None, consolidated=False
+        repo.readonly_session("main").store,
+        group=synth_dataset.group,
+        chunks=None,
+        consolidated=False,
     )
     subset = ds.isel(synth_dataset.chunk_selector)
     # important this cannot be `load`
     benchmark(operator.methodcaller("compute"), subset[synth_dataset.load_variables])
 
 
-@pytest.mark.read_benchmark
 @pytest.mark.benchmark(group="bytes-read")
 def test_time_first_bytes(synth_dataset: Dataset, benchmark) -> None:
     """TODO: this should be sensitive to manifest splitting"""
diff --git a/icechunk-python/benchmarks/test_benchmark_writes.py b/icechunk-python/benchmarks/test_benchmark_writes.py
index d82742220..8c82fa4c9 100644
--- a/icechunk-python/benchmarks/test_benchmark_writes.py
+++ b/icechunk-python/benchmarks/test_benchmark_writes.py
@@ -5,15 +5,40 @@
 
 import zarr
 from benchmarks import lib
+from benchmarks.helpers import get_splitting_config, repo_config_with
 from benchmarks.tasks import Executor, write
-from icechunk import Repository, RepositoryConfig, local_filesystem_storage
+from icechunk import (
+    Repository,
+    RepositoryConfig,
+    Session,
+    VirtualChunkSpec,
+    local_filesystem_storage,
+)
 
 NUM_CHUNK_REFS = 10_000
 NUM_VIRTUAL_CHUNK_REFS = 100_000
 
 
+pytestmark = pytest.mark.write_benchmark
+
+
+@pytest.fixture(
+    params=[
+        pytest.param(None, id="no-splitting"),
+        pytest.param(10000, id="split-size-10_000"),
+    ]
+)
+def splitting(request):
+    if request.param is None:
+        return None
+    else:
+        try:
+            return get_splitting_config(split_size=request.param)
+        except ImportError:
+            pytest.skip("splitting not supported on this version")
+
+
 # FIXME: figure out a reasonable default
-@pytest.mark.write_benchmark
 @pytest.mark.parametrize(
     "executor",
     [
@@ -53,8 +78,8 @@ def test_write_chunks_with_tasks(synth_write_dataset, benchmark, executor):
     benchmark.extra_info["data"] = diags
 
 
-@pytest.mark.write_benchmark
 def test_write_simple_1d(benchmark, simple_write_dataset):
+    """Simple write benchmarks. Shows 2-3X slower on GCS compared to S3"""
     dataset = simple_write_dataset
     repo = dataset.create()
     session = repo.writable_session(branch="main")
@@ -79,15 +104,6 @@ def write_task():
     benchmark(write_task)
 
 
-def repo_config_with(
-    *, inline_chunk_threshold_bytes: int | None = None
-) -> RepositoryConfig:
-    config = RepositoryConfig.default()
-    if inline_chunk_threshold_bytes is not None:
-        config.inline_chunk_threshold_bytes = inline_chunk_threshold_bytes
-
-
-@pytest.mark.write_benchmark
 @pytest.mark.benchmark(group="refs-write")
 @pytest.mark.parametrize("commit", [True, False])
 @pytest.mark.parametrize(
@@ -113,6 +129,7 @@ def write_chunk_refs(repo) -> None:
         if commit:
             session.commit("written!")
 
+    assert repo_config is not None
     repo = Repository.create(storage=local_filesystem_storage(tmpdir), config=repo_config)
     session = repo.writable_session("main")
     group = zarr.group(session.store)
@@ -132,9 +149,8 @@ def write_chunk_refs(repo) -> None:
     benchmark(write_chunk_refs, repo)
 
 
-@pytest.mark.write_benchmark
 @pytest.mark.benchmark(group="refs-write")
-def test_write_many_virtual_chunk_refs(benchmark, repo) -> None:
+def test_set_many_virtual_chunk_refs(benchmark, repo) -> None:
     """Benchmark the setting of many virtual chunk refs."""
     session = repo.writable_session("main")
     store = session.store
@@ -162,3 +178,44 @@ def write():
             store.set_virtual_ref(
                 f"array/c/{i}", location=f"s3://foo/bar/{i}.nc", offset=0, length=1
             )
+
+
+@pytest.mark.benchmark(group="refs-write")
+def test_write_split_manifest_refs(benchmark, splitting, large_write_dataset) -> None:
+    dataset = large_write_dataset
+    config = repo_config_with(splitting=splitting)
+    assert config is not None
+    if hasattr(config.manifest, "splitting"):
+        assert config.manifest.splitting == splitting
+    repo = dataset.create(config=config)
+    session = repo.writable_session(branch="main")
+    store = session.store
+    group = zarr.open_group(store, zarr_format=3)
+    group.create_array(
+        "array",
+        shape=dataset.shape,
+        chunks=dataset.chunks,
+        dtype="int8",
+        fill_value=0,
+        compressors=None,
+    )
+    session.commit("initialize")
+
+    def write_refs() -> Session:
+        session = repo.writable_session(branch="main")
+        num_chunks = dataset.shape[0] // dataset.chunks[0]
+        chunks = [
+            VirtualChunkSpec(
+                index=[i], location=f"s3://foo/bar/{i}.nc", offset=0, length=1
+            )
+            for i in range(num_chunks)
+        ]
+
+        session.store.set_virtual_refs("array", chunks)
+        # (args, kwargs)
+        return ((session,), {})
+
+    def commit(session_from_setup):
+        session_from_setup.commit("wrote refs")
+
+    benchmark.pedantic(commit, setup=write_refs, iterations=1, rounds=10)
diff --git a/icechunk-python/pyproject.toml b/icechunk-python/pyproject.toml
index 90b14c175..9c0dd5d83 100644
--- a/icechunk-python/pyproject.toml
+++ b/icechunk-python/pyproject.toml
@@ -50,6 +50,38 @@ benchmark = [
   "tqdm",
   "humanize",
   "platformdirs",
+  "ipdb",
+]
+docs = [
+  "scipy",
+  "cftime",
+  "pooch",
+  "dask>=2024.11.0,<2025.04.0",
+  "distributed>=2024.11.0,<2025.04.0",
+  "xarray>=2025.01.2",
+  "myst_nb",
+  "pydata_sphinx_theme",
+  "mkdocs-awesome-pages-plugin",
+  "mkdocs-mermaid2-plugin",
+  "markdown-exec",
+  "mkdocs-breadcrumbs-plugin",
+  "mkdocs-minify-plugin",
+  "mkdocs-open-in-new-tab",
+  "mkdocs",
+  "mkdocs-material[imaging]",
+  "mkdocstrings[python]",
+  "mkdocs-jupyter",
+  "mkdocs-git-revision-date-localized-plugin",
+  "mkdocs-git-committers-plugin-2",
+  "mkdocs-macros-plugin",
+  "mkdocs-include-markdown-plugin",
+  "mkdocs-redirects",
+  "mkdocs-git-authors-plugin",
+  "sphinx",
+  "sphinx_copybutton",
+  "sphinx_design",
+  "sphinx_togglebutton",
+  "sphinx-autodoc-typehints"
 ]
 
 [dependency-groups]
diff --git a/icechunk-python/python/icechunk/__init__.py b/icechunk-python/python/icechunk/__init__.py
index 7c3b3c99a..6eaefbc96 100644
--- a/icechunk-python/python/icechunk/__init__.py
+++ b/icechunk-python/python/icechunk/__init__.py
@@ -1,5 +1,7 @@
 # module
 
+from typing import TypeAlias
+
 from icechunk._icechunk_python import (
     AzureCredentials,
     AzureStaticCredentials,
@@ -23,6 +25,9 @@
     ManifestFileInfo,
     ManifestPreloadCondition,
     ManifestPreloadConfig,
+    ManifestSplitCondition,
+    ManifestSplitDimCondition,
+    ManifestSplittingConfig,
     ObjectStoreConfig,
     RebaseFailedError,
     RepositoryConfig,
@@ -107,6 +112,9 @@
     "ManifestFileInfo",
     "ManifestPreloadCondition",
     "ManifestPreloadConfig",
+    "ManifestSplitCondition",
+    "ManifestSplitDimCondition",
+    "ManifestSplittingConfig",
     "ObjectStoreConfig",
     "RebaseFailedError",
     "Repository",
@@ -164,4 +172,21 @@ def print_debug_info() -> None:
             continue
 
 
+# This monkey patch is a bit annoying. Python dicts preserve insertion order
+# But this gets mapped to a Rust HashMap which does *not* preserve order
+# So on the python side, we can accept a dict as a nicer API, and immediately
+# convert it to tuples that preserve order, and pass those to Rust
+
+SplitSizesDict: TypeAlias = dict[
+    ManifestSplitCondition, dict[ManifestSplitDimCondition, int]
+]
+
+
+def from_dict(split_sizes: SplitSizesDict) -> ManifestSplittingConfig:
+    unwrapped = tuple((k, tuple(v.items())) for k, v in split_sizes.items())
+    return ManifestSplittingConfig(unwrapped)
+
+
+ManifestSplittingConfig.from_dict = from_dict  # type: ignore[attr-defined]
+
 initialize_logs()
diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi
index fb06b0be7..e508f7dce 100644
--- a/icechunk-python/python/icechunk/_icechunk_python.pyi
+++ b/icechunk-python/python/icechunk/_icechunk_python.pyi
@@ -2,7 +2,7 @@ import abc
 import datetime
 from collections.abc import AsyncGenerator, AsyncIterator
 from enum import Enum
-from typing import Any
+from typing import Any, TypeAlias
 
 class S3Options:
     """Options for accessing an S3-compatible storage backend"""
@@ -482,12 +482,122 @@ class ManifestPreloadConfig:
         """
         ...
 
+class ManifestSplitCondition:
+    """Configuration for conditions under which manifests will be split into splits"""
+
+    @staticmethod
+    def or_conditions(
+        conditions: list[ManifestSplitCondition],
+    ) -> ManifestSplitCondition:
+        """Create a splitting condition that matches if any of `conditions` matches"""
+        ...
+    @staticmethod
+    def and_conditions(
+        conditions: list[ManifestSplitCondition],
+    ) -> ManifestSplitCondition:
+        """Create a splitting condition that matches only if all passed `conditions` match"""
+        ...
+    @staticmethod
+    def path_matches(regex: str) -> ManifestSplitCondition:
+        """Create a splitting condition that matches if the full path to the array matches the passed regex.
+
+        Array paths are absolute, as in `/path/to/my/array`
+        """
+        ...
+    @staticmethod
+    def name_matches(regex: str) -> ManifestSplitCondition:
+        """Create a splitting condition that matches if the array's name matches the passed regex.
+
+        Example, for an array  `/model/outputs/temperature`, the following will match:
+        ```
+        name_matches(".*temp.*")
+        ```
+        """
+        ...
+
+    @staticmethod
+    def AnyArray() -> ManifestSplitCondition:
+        """Create a splitting condition that matches any array."""
+        ...
+
+class ManifestSplitDimCondition:
+    """Conditions for specifying dimensions along which to shard manifests."""
+    class Axis:
+        """Split along specified integer axis."""
+        def __init__(self, axis: int) -> None: ...
+
+    class DimensionName:
+        """Split along specified named dimension."""
+        def __init__(self, regex: str) -> None: ...
+
+    class Any:
+        """Split along any other unspecified dimension."""
+        def __init__(self) -> None: ...
+
+DimSplitSize: TypeAlias = int
+SplitSizes: TypeAlias = tuple[
+    tuple[
+        ManifestSplitCondition, tuple[tuple[ManifestSplitDimCondition, DimSplitSize], ...]
+    ],
+    ...,
+]
+
+class ManifestSplittingConfig:
+    """Configuration for manifest splitting."""
+
+    def __init__(self, split_sizes: SplitSizes) -> None:
+        """Configuration for how Icechunk manifests will be split.
+
+        Parameters
+        ----------
+        split_sizes: tuple[tuple[ManifestSplitCondition, tuple[tuple[ManifestSplitDimCondition, int], ...]], ...]
+            The configuration for how Icechunk manifests will be preloaded.
+
+        Examples
+        --------
+
+        Split manifests for the `temperature` array, with 3 chunks per shard along the `longitude` dimension.
+        >>> ManifestSplittingConfig.from_dict(
+        ...     {
+        ...         ManifestSplitCondition.name_matches("temperature"): {
+        ...             ManifestSplitDimCondition.DimensionName("longitude"): 3
+        ...         }
+        ...     }
+        ... )
+        """
+        pass
+
+    @property
+    def split_sizes(self) -> SplitSizes:
+        """
+        Configuration for how Icechunk manifests will be split.
+
+        Returns
+        -------
+        tuple[tuple[ManifestSplitCondition, tuple[tuple[ManifestSplitDimCondition, int], ...]], ...]
+            The configuration for how Icechunk manifests will be preloaded.
+        """
+        ...
+
+    @split_sizes.setter
+    def split_sizes(self, value: SplitSizes) -> None:
+        """
+        Set the sizes for how Icechunk manifests will be split.
+
+        Parameters
+        ----------
+        value: tuple[tuple[ManifestSplitCondition, tuple[tuple[ManifestSplitDimCondition, int], ...]], ...]
+            The configuration for how Icechunk manifests will be preloaded.
+        """
+        ...
+
 class ManifestConfig:
     """Configuration for how Icechunk manifests"""
 
     def __init__(
         self,
         preload: ManifestPreloadConfig | None = None,
+        splitting: ManifestSplittingConfig | None = None,
     ) -> None:
         """
         Create a new `ManifestConfig` object
@@ -496,6 +606,8 @@ class ManifestConfig:
         ----------
         preload: ManifestPreloadConfig | None
             The configuration for how Icechunk manifests will be preloaded.
+        splitting: ManifestSplittingConfig | None
+            The configuration for how Icechunk manifests will be split.
         """
         ...
     @property
@@ -521,6 +633,30 @@ class ManifestConfig:
         """
         ...
 
+    @property
+    def splitting(self) -> ManifestSplittingConfig | None:
+        """
+        The configuration for how Icechunk manifests will be split.
+
+        Returns
+        -------
+        ManifestSplittingConfig | None
+            The configuration for how Icechunk manifests will be split.
+        """
+        ...
+
+    @splitting.setter
+    def splitting(self, value: ManifestSplittingConfig | None) -> None:
+        """
+        Set the configuration for how Icechunk manifests will be split.
+
+        Parameters
+        ----------
+        value: ManifestSplittingConfig | None
+            The configuration for how Icechunk manifests will be split.
+        """
+        ...
+
 class StorageConcurrencySettings:
     """Configuration for how Icechunk uses its Storage instance"""
 
diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs
index 35ef5fcfb..e968d8898 100644
--- a/icechunk-python/src/config.rs
+++ b/icechunk-python/src/config.rs
@@ -1,9 +1,11 @@
 use async_trait::async_trait;
 use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc};
 use serde::{Deserialize, Serialize};
+use std::hash::{Hash, Hasher};
 use std::{
     collections::HashMap,
     fmt::Display,
+    hash::DefaultHasher,
     num::{NonZeroU16, NonZeroU64},
     path::PathBuf,
     sync::Arc,
@@ -15,8 +17,9 @@ use icechunk::{
         AzureCredentials, AzureStaticCredentials, CachingConfig, CompressionAlgorithm,
         CompressionConfig, Credentials, GcsBearerCredential, GcsCredentials,
         GcsCredentialsFetcher, GcsStaticCredentials, ManifestConfig,
-        ManifestPreloadCondition, ManifestPreloadConfig, S3Credentials,
-        S3CredentialsFetcher, S3Options, S3StaticCredentials,
+        ManifestPreloadCondition, ManifestPreloadConfig, ManifestSplitCondition,
+        ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig,
+        S3Credentials, S3CredentialsFetcher, S3Options, S3StaticCredentials,
     },
     storage::{self, ConcurrencySettings},
     virtual_chunks::VirtualChunkContainer,
@@ -1013,26 +1016,260 @@ impl From<ManifestPreloadConfig> for PyManifestPreloadConfig {
     }
 }
 
+#[pyclass(name = "ManifestSplitCondition", eq)]
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+pub enum PyManifestSplitCondition {
+    Or(Vec<PyManifestSplitCondition>),
+    And(Vec<PyManifestSplitCondition>),
+    PathMatches { regex: String },
+    NameMatches { regex: String },
+    AnyArray(),
+}
+
+#[pymethods]
+impl PyManifestSplitCondition {
+    #[staticmethod]
+    pub fn or_conditions(conditions: Vec<PyManifestSplitCondition>) -> Self {
+        Self::Or(conditions)
+    }
+    #[staticmethod]
+    pub fn and_conditions(conditions: Vec<PyManifestSplitCondition>) -> Self {
+        Self::And(conditions)
+    }
+    #[staticmethod]
+    pub fn path_matches(regex: String) -> Self {
+        Self::PathMatches { regex }
+    }
+    #[staticmethod]
+    pub fn name_matches(regex: String) -> Self {
+        Self::NameMatches { regex }
+    }
+
+    pub fn __repr__(&self) -> String {
+        use PyManifestSplitCondition::*;
+        match self {
+            Or(conditions) => {
+                let mut res =
+                    conditions.iter().fold("Or(".to_string(), |mut state, condition| {
+                        state.push_str(&condition.__repr__());
+                        state
+                    });
+                res.push(')');
+                res
+            }
+            And(conditions) => {
+                let mut res =
+                    conditions.iter().fold("And(".to_string(), |mut state, condition| {
+                        state.push_str(&condition.__repr__());
+                        state
+                    });
+                res.push(')');
+                res
+            }
+            PathMatches { regex } => format!("PathMatches('{}')", regex),
+            NameMatches { regex } => format!("NameMatches('{}')", regex),
+            AnyArray() => "AnyArray".to_string(),
+        }
+    }
+
+    fn __hash__(&self) -> usize {
+        let mut hasher = DefaultHasher::new();
+        self.hash(&mut hasher);
+        hasher.finish() as usize
+    }
+}
+
+impl From<&PyManifestSplitCondition> for ManifestSplitCondition {
+    fn from(value: &PyManifestSplitCondition) -> Self {
+        use PyManifestSplitCondition::*;
+        match value {
+            Or(vec) => Self::Or(vec.iter().map(|c| c.into()).collect()),
+            And(vec) => Self::And(vec.iter().map(|c| c.into()).collect()),
+            PathMatches { regex } => Self::PathMatches { regex: regex.clone() },
+            NameMatches { regex } => Self::NameMatches { regex: regex.clone() },
+            AnyArray() => Self::AnyArray,
+        }
+    }
+}
+
+impl From<ManifestSplitCondition> for PyManifestSplitCondition {
+    fn from(value: ManifestSplitCondition) -> Self {
+        use ManifestSplitCondition::*;
+        match value {
+            AnyArray => Self::AnyArray(),
+            Or(vec) => Self::Or(vec.into_iter().map(|c| c.into()).collect()),
+            And(vec) => Self::And(vec.into_iter().map(|c| c.into()).collect()),
+            PathMatches { regex } => Self::PathMatches { regex },
+            NameMatches { regex } => Self::NameMatches { regex },
+        }
+    }
+}
+
+#[pyclass(name = "ManifestSplitDimCondition")]
+#[derive(Clone, Debug, Hash)]
+pub enum PyManifestSplitDimCondition {
+    Axis(usize),
+    DimensionName(String),
+    Any(),
+}
+
+#[pymethods]
+impl PyManifestSplitDimCondition {
+    pub fn __repr__(&self) -> String {
+        use PyManifestSplitDimCondition::*;
+        match self {
+            Axis(axis) => format!("Axis({})", axis),
+            DimensionName(name) => format!(r#"DimensionName("{}")"#, name),
+            Any() => "Rest".to_string(),
+        }
+    }
+
+    fn __hash__(&self) -> usize {
+        let mut hasher = DefaultHasher::new();
+        self.hash(&mut hasher);
+        hasher.finish() as usize
+    }
+}
+
+impl From<&PyManifestSplitDimCondition> for ManifestSplitDimCondition {
+    fn from(value: &PyManifestSplitDimCondition) -> Self {
+        use PyManifestSplitDimCondition::*;
+        match value {
+            Axis(axis) => ManifestSplitDimCondition::Axis(*axis),
+            DimensionName(name) => ManifestSplitDimCondition::DimensionName(name.clone()),
+            Any() => ManifestSplitDimCondition::Any,
+        }
+    }
+}
+impl From<ManifestSplitDimCondition> for PyManifestSplitDimCondition {
+    fn from(value: ManifestSplitDimCondition) -> Self {
+        use ManifestSplitDimCondition::*;
+        match value {
+            Axis(a) => PyManifestSplitDimCondition::Axis(a),
+            DimensionName(name) => PyManifestSplitDimCondition::DimensionName(name),
+            Any => PyManifestSplitDimCondition::Any(),
+        }
+    }
+}
+
+type DimConditions = Vec<(PyManifestSplitDimCondition, u32)>;
+
+#[pyclass(name = "ManifestSplittingConfig", eq)]
+#[derive(Debug)]
+pub struct PyManifestSplittingConfig {
+    #[pyo3(get, set)]
+    pub split_sizes: Option<Vec<(PyManifestSplitCondition, DimConditions)>>,
+}
+
+#[pymethods]
+impl PyManifestSplittingConfig {
+    #[new]
+    #[pyo3(signature = (split_sizes=None))]
+    fn new(split_sizes: Option<Vec<(PyManifestSplitCondition, DimConditions)>>) -> Self {
+        Self { split_sizes }
+    }
+
+    pub fn __repr__(&self) -> String {
+        match &self.split_sizes {
+            Some(split_sizes) => {
+                let reprs: Vec<String> = split_sizes
+                    .iter()
+                    .map(|(condition, dims)| {
+                        let condition_repr = format!("{:?}", condition); // Using Debug for PyManifestSplitCondition
+                        let dims_repr: Vec<String> = dims
+                            .iter()
+                            .map(|(dim_condition, num)| {
+                                format!("({:?}, {})", dim_condition, num)
+                            })
+                            .collect();
+                        format!("({}, [{}])", condition_repr, dims_repr.join(", "))
+                    })
+                    .collect();
+
+                format!("ManifestSplittingConfig({})", reprs.join(", "))
+            }
+            None => "ManifestSplittingConfig(None)".to_string(),
+        }
+    }
+}
+
+impl PartialEq for PyManifestSplittingConfig {
+    fn eq(&self, other: &Self) -> bool {
+        let x: ManifestSplittingConfig = self.into();
+        let y: ManifestSplittingConfig = other.into();
+        x == y
+    }
+}
+
+impl From<ManifestSplittingConfig> for PyManifestSplittingConfig {
+    fn from(value: ManifestSplittingConfig) -> Self {
+        Self {
+            split_sizes: value.split_sizes.map(|c| {
+                c.into_iter()
+                    .map(|(x, v)| {
+                        (
+                            x.into(),
+                            v.into_iter()
+                                .map(|split_dim| {
+                                    (split_dim.condition.into(), split_dim.num_chunks)
+                                })
+                                .collect(),
+                        )
+                    })
+                    .collect()
+            }),
+        }
+    }
+}
+
+impl From<&PyManifestSplittingConfig> for ManifestSplittingConfig {
+    fn from(value: &PyManifestSplittingConfig) -> Self {
+        Self {
+            split_sizes: value.split_sizes.as_ref().map(|c| {
+                c.iter()
+                    .map(|(x, v)| {
+                        (
+                            x.into(),
+                            v.iter()
+                                .map(|(cond, size)| ManifestSplitDim {
+                                    condition: cond.into(),
+                                    num_chunks: *size,
+                                })
+                                .collect(),
+                        )
+                    })
+                    .collect()
+            }),
+        }
+    }
+}
+
 #[pyclass(name = "ManifestConfig", eq)]
 #[derive(Debug, Default)]
 pub struct PyManifestConfig {
     #[pyo3(get, set)]
     pub preload: Option<Py<PyManifestPreloadConfig>>,
+    #[pyo3(get, set)]
+    pub splitting: Option<Py<PyManifestSplittingConfig>>,
 }
 
 #[pymethods]
 impl PyManifestConfig {
     #[new]
-    #[pyo3(signature = (preload=None))]
-    fn new(preload: Option<Py<PyManifestPreloadConfig>>) -> Self {
-        Self { preload }
+    #[pyo3(signature = (preload=None, splitting=None))]
+    fn new(
+        preload: Option<Py<PyManifestPreloadConfig>>,
+        splitting: Option<Py<PyManifestSplittingConfig>>,
+    ) -> Self {
+        Self { preload, splitting }
     }
 
     pub fn __repr__(&self) -> String {
         // TODO: improve repr
         format!(
-            r#"ManifestConfig(preload={pre})"#,
+            r#"ManifestConfig(preload={pre}, splitting={sha})"#,
             pre = format_option_to_string(self.preload.as_ref().map(|l| l.to_string())),
+            sha = format_option_to_string(self.splitting.as_ref().map(|l| l.to_string())),
         )
     }
 }
@@ -1049,6 +1286,7 @@ impl From<&PyManifestConfig> for ManifestConfig {
     fn from(value: &PyManifestConfig) -> Self {
         Python::with_gil(|py| Self {
             preload: value.preload.as_ref().map(|c| (&*c.borrow(py)).into()),
+            splitting: value.splitting.as_ref().map(|c| (&*c.borrow(py)).into()),
         })
     }
 }
@@ -1061,6 +1299,10 @@ impl From<ManifestConfig> for PyManifestConfig {
                 Py::new(py, Into::<PyManifestPreloadConfig>::into(c))
                     .expect("Cannot create instance of ManifestPreloadConfig")
             }),
+            splitting: value.splitting.map(|c| {
+                Py::new(py, Into::<PyManifestSplittingConfig>::into(c))
+                    .expect("Cannot create instance of ManifestSplittingConfig")
+            }),
         })
     }
 }
diff --git a/icechunk-python/src/lib.rs b/icechunk-python/src/lib.rs
index 793e324c1..305cb2564 100644
--- a/icechunk-python/src/lib.rs
+++ b/icechunk-python/src/lib.rs
@@ -17,6 +17,9 @@ use config::{
     PyRepositoryConfig, PyS3Credentials, PyS3Options, PyS3StaticCredentials, PyStorage,
     PyStorageConcurrencySettings, PyStorageSettings, PyVirtualChunkContainer,
 };
+use config::{
+    PyManifestSplitCondition, PyManifestSplitDimCondition, PyManifestSplittingConfig,
+};
 use conflicts::{
     PyBasicConflictSolver, PyConflict, PyConflictDetector, PyConflictSolver,
     PyConflictType, PyVersionSelection,
@@ -114,6 +117,9 @@ fn _icechunk_python(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyManifestPreloadConfig>()?;
     m.add_class::<PyManifestPreloadCondition>()?;
     m.add_class::<PyManifestConfig>()?;
+    m.add_class::<PyManifestSplitDimCondition>()?;
+    m.add_class::<PyManifestSplitCondition>()?;
+    m.add_class::<PyManifestSplittingConfig>()?;
     m.add_class::<PyStorageSettings>()?;
     m.add_class::<PyGCSummary>()?;
     m.add_class::<PyDiff>()?;
diff --git a/icechunk-python/tests/data/split-repo/chunks/28H289BT7JSZKZ6W5F70 b/icechunk-python/tests/data/split-repo/chunks/28H289BT7JSZKZ6W5F70
new file mode 100644
index 000000000..962a9be12
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/28H289BT7JSZKZ6W5F70 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/29CS30CK3D3CFVFBEZ80 b/icechunk-python/tests/data/split-repo/chunks/29CS30CK3D3CFVFBEZ80
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/29CS30CK3D3CFVFBEZ80 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/39KTEACWZDMQ42C0TK5G b/icechunk-python/tests/data/split-repo/chunks/39KTEACWZDMQ42C0TK5G
new file mode 100644
index 000000000..962a9be12
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/39KTEACWZDMQ42C0TK5G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/3CQH7MY9P8QFNE5TWMZ0 b/icechunk-python/tests/data/split-repo/chunks/3CQH7MY9P8QFNE5TWMZ0
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/3CQH7MY9P8QFNE5TWMZ0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/3JWEBSQZ1AQT9RK46JHG b/icechunk-python/tests/data/split-repo/chunks/3JWEBSQZ1AQT9RK46JHG
new file mode 100644
index 000000000..2c31cb971
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/3JWEBSQZ1AQT9RK46JHG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/4DCZD8GC46090M65S46G b/icechunk-python/tests/data/split-repo/chunks/4DCZD8GC46090M65S46G
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/4DCZD8GC46090M65S46G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/4KXQX71CPA8ASE25X02G b/icechunk-python/tests/data/split-repo/chunks/4KXQX71CPA8ASE25X02G
new file mode 100644
index 000000000..ac035c9cc
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/4KXQX71CPA8ASE25X02G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/5M551EG8VHX6GK2RVYEG b/icechunk-python/tests/data/split-repo/chunks/5M551EG8VHX6GK2RVYEG
new file mode 100644
index 000000000..962a9be12
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/5M551EG8VHX6GK2RVYEG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/8Y59PE3EQ1NHY32W80PG b/icechunk-python/tests/data/split-repo/chunks/8Y59PE3EQ1NHY32W80PG
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/8Y59PE3EQ1NHY32W80PG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/9JSXG2S6R7JW530266FG b/icechunk-python/tests/data/split-repo/chunks/9JSXG2S6R7JW530266FG
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/9JSXG2S6R7JW530266FG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/9Q4DF3ZK93B35EW1Q0W0 b/icechunk-python/tests/data/split-repo/chunks/9Q4DF3ZK93B35EW1Q0W0
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/9Q4DF3ZK93B35EW1Q0W0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/A14DFW1AMR9GJY82B480 b/icechunk-python/tests/data/split-repo/chunks/A14DFW1AMR9GJY82B480
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/A14DFW1AMR9GJY82B480 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/C43XA0532YJN8FFJQSV0 b/icechunk-python/tests/data/split-repo/chunks/C43XA0532YJN8FFJQSV0
new file mode 100644
index 000000000..75b907aa6
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/C43XA0532YJN8FFJQSV0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/DB0FK4TGE2CYP5N5H5MG b/icechunk-python/tests/data/split-repo/chunks/DB0FK4TGE2CYP5N5H5MG
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/DB0FK4TGE2CYP5N5H5MG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/DN7QKGTXF72JRSP1R59G b/icechunk-python/tests/data/split-repo/chunks/DN7QKGTXF72JRSP1R59G
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/DN7QKGTXF72JRSP1R59G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/E06NN106F0RD1DKC57B0 b/icechunk-python/tests/data/split-repo/chunks/E06NN106F0RD1DKC57B0
new file mode 100644
index 000000000..f387b8a8b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/E06NN106F0RD1DKC57B0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/E88W8NWPPQ0DDP6MAQE0 b/icechunk-python/tests/data/split-repo/chunks/E88W8NWPPQ0DDP6MAQE0
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/E88W8NWPPQ0DDP6MAQE0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/EDKREXA5PA5KMVMYT1T0 b/icechunk-python/tests/data/split-repo/chunks/EDKREXA5PA5KMVMYT1T0
new file mode 100644
index 000000000..d35e88dde
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/EDKREXA5PA5KMVMYT1T0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/EMXV4G2NH651YQHCN5SG b/icechunk-python/tests/data/split-repo/chunks/EMXV4G2NH651YQHCN5SG
new file mode 100644
index 000000000..d35e88dde
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/EMXV4G2NH651YQHCN5SG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/F2FF589RGM5TQPJ4XS30 b/icechunk-python/tests/data/split-repo/chunks/F2FF589RGM5TQPJ4XS30
new file mode 100644
index 000000000..e06affbd1
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/F2FF589RGM5TQPJ4XS30 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/FX2THCNDEF1TPJVVW6CG b/icechunk-python/tests/data/split-repo/chunks/FX2THCNDEF1TPJVVW6CG
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/FX2THCNDEF1TPJVVW6CG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/GD7FA0K8HRGV0XK609W0 b/icechunk-python/tests/data/split-repo/chunks/GD7FA0K8HRGV0XK609W0
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/GD7FA0K8HRGV0XK609W0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/HNW4TH4RRN5ECR7PH4Z0 b/icechunk-python/tests/data/split-repo/chunks/HNW4TH4RRN5ECR7PH4Z0
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/HNW4TH4RRN5ECR7PH4Z0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/JH2K5GX77HJWDR3BRHN0 b/icechunk-python/tests/data/split-repo/chunks/JH2K5GX77HJWDR3BRHN0
new file mode 100644
index 000000000..e06affbd1
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/JH2K5GX77HJWDR3BRHN0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/KYA9AWYNRNH4KDN3YS1G b/icechunk-python/tests/data/split-repo/chunks/KYA9AWYNRNH4KDN3YS1G
new file mode 100644
index 000000000..2c31cb971
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/KYA9AWYNRNH4KDN3YS1G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/MHYQZGW0T34H7566ND30 b/icechunk-python/tests/data/split-repo/chunks/MHYQZGW0T34H7566ND30
new file mode 100644
index 000000000..75b907aa6
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/MHYQZGW0T34H7566ND30 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/MJC2EWJ8DTBA7P46REFG b/icechunk-python/tests/data/split-repo/chunks/MJC2EWJ8DTBA7P46REFG
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/MJC2EWJ8DTBA7P46REFG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/N4EXVBQKBGCMTXX6PEJ0 b/icechunk-python/tests/data/split-repo/chunks/N4EXVBQKBGCMTXX6PEJ0
new file mode 100644
index 000000000..75b907aa6
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/N4EXVBQKBGCMTXX6PEJ0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/P6C9986C6R2P9EP30XCG b/icechunk-python/tests/data/split-repo/chunks/P6C9986C6R2P9EP30XCG
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/P6C9986C6R2P9EP30XCG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/P9JDEJ02DTDBGJDASRB0 b/icechunk-python/tests/data/split-repo/chunks/P9JDEJ02DTDBGJDASRB0
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/P9JDEJ02DTDBGJDASRB0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/PAMW4ZQ7FVP7E1AZR2W0 b/icechunk-python/tests/data/split-repo/chunks/PAMW4ZQ7FVP7E1AZR2W0
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/PAMW4ZQ7FVP7E1AZR2W0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/PZ5F8FQKKA144F08ANG0 b/icechunk-python/tests/data/split-repo/chunks/PZ5F8FQKKA144F08ANG0
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/PZ5F8FQKKA144F08ANG0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/QDZDYMBSPJ4M0T34BEPG b/icechunk-python/tests/data/split-repo/chunks/QDZDYMBSPJ4M0T34BEPG
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/QDZDYMBSPJ4M0T34BEPG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/QEDGZDXTPJEX179WCT6G b/icechunk-python/tests/data/split-repo/chunks/QEDGZDXTPJEX179WCT6G
new file mode 100644
index 000000000..bb0edad16
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/QEDGZDXTPJEX179WCT6G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/RQM2HG8XAD9KP8S2TRKG b/icechunk-python/tests/data/split-repo/chunks/RQM2HG8XAD9KP8S2TRKG
new file mode 100644
index 000000000..bb0edad16
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/RQM2HG8XAD9KP8S2TRKG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/T394Y31WDT1VCVFY3A80 b/icechunk-python/tests/data/split-repo/chunks/T394Y31WDT1VCVFY3A80
new file mode 100644
index 000000000..bb0edad16
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/T394Y31WDT1VCVFY3A80 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/TXZ35HQMB7RHFJKRNXZG b/icechunk-python/tests/data/split-repo/chunks/TXZ35HQMB7RHFJKRNXZG
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/TXZ35HQMB7RHFJKRNXZG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/V66JMMKW77DCFEGRVSK0 b/icechunk-python/tests/data/split-repo/chunks/V66JMMKW77DCFEGRVSK0
new file mode 100644
index 000000000..e06affbd1
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/V66JMMKW77DCFEGRVSK0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/VGFD4TNTAETR77B9RK8G b/icechunk-python/tests/data/split-repo/chunks/VGFD4TNTAETR77B9RK8G
new file mode 100644
index 000000000..9fc52b944
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/VGFD4TNTAETR77B9RK8G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/VJM5MNFQ7KQ7ERCWFNT0 b/icechunk-python/tests/data/split-repo/chunks/VJM5MNFQ7KQ7ERCWFNT0
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/VJM5MNFQ7KQ7ERCWFNT0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/W4G0HS519TZDQBDAD01G b/icechunk-python/tests/data/split-repo/chunks/W4G0HS519TZDQBDAD01G
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/W4G0HS519TZDQBDAD01G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/XZAFNSMV272D9X70HC50 b/icechunk-python/tests/data/split-repo/chunks/XZAFNSMV272D9X70HC50
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/XZAFNSMV272D9X70HC50 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/YFGVD91WZJM9FKAB4PQG b/icechunk-python/tests/data/split-repo/chunks/YFGVD91WZJM9FKAB4PQG
new file mode 100644
index 000000000..2c31cb971
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/YFGVD91WZJM9FKAB4PQG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/YGWDY7HYDBVBPKSJYK40 b/icechunk-python/tests/data/split-repo/chunks/YGWDY7HYDBVBPKSJYK40
new file mode 100644
index 000000000..edf286185
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/YGWDY7HYDBVBPKSJYK40 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/YQ0KA7YQ737KNCT5F0BG b/icechunk-python/tests/data/split-repo/chunks/YQ0KA7YQ737KNCT5F0BG
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/YQ0KA7YQ737KNCT5F0BG differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/YRS4NW2FJM8P66MCF8E0 b/icechunk-python/tests/data/split-repo/chunks/YRS4NW2FJM8P66MCF8E0
new file mode 100644
index 000000000..13f13092b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/YRS4NW2FJM8P66MCF8E0 differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/YWQDK4GG3H7JF189CN6G b/icechunk-python/tests/data/split-repo/chunks/YWQDK4GG3H7JF189CN6G
new file mode 100644
index 000000000..cf0ff5021
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/YWQDK4GG3H7JF189CN6G differ
diff --git a/icechunk-python/tests/data/split-repo/chunks/Z50RSERZRJZNTFRPDW80 b/icechunk-python/tests/data/split-repo/chunks/Z50RSERZRJZNTFRPDW80
new file mode 100644
index 000000000..d35e88dde
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/chunks/Z50RSERZRJZNTFRPDW80 differ
diff --git a/icechunk-python/tests/data/split-repo/config.yaml b/icechunk-python/tests/data/split-repo/config.yaml
new file mode 100644
index 000000000..9d0481775
--- /dev/null
+++ b/icechunk-python/tests/data/split-repo/config.yaml
@@ -0,0 +1,18 @@
+inline_chunk_threshold_bytes: 12
+get_partial_values_concurrency: null
+compression: null
+caching: null
+storage: null
+virtual_chunk_containers: null
+manifest:
+  preload: null
+  splitting:
+    split_sizes:
+    - - !name_matches
+        regex: split_*
+      - - condition: !Axis 0
+          num_chunks: 1
+        - condition: !DimensionName longitude
+          num_chunks: 1
+        - condition: Any
+          num_chunks: 3
diff --git a/icechunk-python/tests/data/split-repo/manifests/15KJFGXKPM1KRGFZ52D0 b/icechunk-python/tests/data/split-repo/manifests/15KJFGXKPM1KRGFZ52D0
new file mode 100644
index 000000000..1afe9ad0b
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/15KJFGXKPM1KRGFZ52D0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/19KPV7V2275XRM01YAWG b/icechunk-python/tests/data/split-repo/manifests/19KPV7V2275XRM01YAWG
new file mode 100644
index 000000000..fe6be90e5
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/19KPV7V2275XRM01YAWG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/2PQ1EA39NB2VK7QK688G b/icechunk-python/tests/data/split-repo/manifests/2PQ1EA39NB2VK7QK688G
new file mode 100644
index 000000000..71b13f684
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/2PQ1EA39NB2VK7QK688G differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/3QYVB4AX7J2XF0QAEX1G b/icechunk-python/tests/data/split-repo/manifests/3QYVB4AX7J2XF0QAEX1G
new file mode 100644
index 000000000..3060ec0de
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/3QYVB4AX7J2XF0QAEX1G differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/62CS3ENEZP4MJHPAW12G b/icechunk-python/tests/data/split-repo/manifests/62CS3ENEZP4MJHPAW12G
new file mode 100644
index 000000000..38cd2951e
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/62CS3ENEZP4MJHPAW12G differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/7V2NE5Q4H7SN9QF0ZHJ0 b/icechunk-python/tests/data/split-repo/manifests/7V2NE5Q4H7SN9QF0ZHJ0
new file mode 100644
index 000000000..51c355f50
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/7V2NE5Q4H7SN9QF0ZHJ0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/7WT30WPG1HF0P2QXGV6G b/icechunk-python/tests/data/split-repo/manifests/7WT30WPG1HF0P2QXGV6G
new file mode 100644
index 000000000..40cf62c36
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/7WT30WPG1HF0P2QXGV6G differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/88Z1G93EH4VF9X28BBHG b/icechunk-python/tests/data/split-repo/manifests/88Z1G93EH4VF9X28BBHG
new file mode 100644
index 000000000..418c8a034
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/88Z1G93EH4VF9X28BBHG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/AAJX4KKQQH72RK949AA0 b/icechunk-python/tests/data/split-repo/manifests/AAJX4KKQQH72RK949AA0
new file mode 100644
index 000000000..97c290f61
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/AAJX4KKQQH72RK949AA0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/B81H9ZEGPV7E15CWXA10 b/icechunk-python/tests/data/split-repo/manifests/B81H9ZEGPV7E15CWXA10
new file mode 100644
index 000000000..889cefcd0
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/B81H9ZEGPV7E15CWXA10 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/CZMYYDSX2SX36J47WN9G b/icechunk-python/tests/data/split-repo/manifests/CZMYYDSX2SX36J47WN9G
new file mode 100644
index 000000000..970f7737f
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/CZMYYDSX2SX36J47WN9G differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/D5CVGKP2G5DP6C277YAG b/icechunk-python/tests/data/split-repo/manifests/D5CVGKP2G5DP6C277YAG
new file mode 100644
index 000000000..d1b01c50e
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/D5CVGKP2G5DP6C277YAG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/D6QG80A0HJW564ZXRBRG b/icechunk-python/tests/data/split-repo/manifests/D6QG80A0HJW564ZXRBRG
new file mode 100644
index 000000000..ed8db30e5
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/D6QG80A0HJW564ZXRBRG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/DDPY0EEPKZXW1ACW41HG b/icechunk-python/tests/data/split-repo/manifests/DDPY0EEPKZXW1ACW41HG
new file mode 100644
index 000000000..216f9d01c
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/DDPY0EEPKZXW1ACW41HG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/DNTQPT8BPWQJ1N2K3KN0 b/icechunk-python/tests/data/split-repo/manifests/DNTQPT8BPWQJ1N2K3KN0
new file mode 100644
index 000000000..7c4949d0e
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/DNTQPT8BPWQJ1N2K3KN0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/EN0XHAX4APG96CNX1PF0 b/icechunk-python/tests/data/split-repo/manifests/EN0XHAX4APG96CNX1PF0
new file mode 100644
index 000000000..032d67037
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/EN0XHAX4APG96CNX1PF0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/F22E2GTDKW73XFV884JG b/icechunk-python/tests/data/split-repo/manifests/F22E2GTDKW73XFV884JG
new file mode 100644
index 000000000..83abec9c4
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/F22E2GTDKW73XFV884JG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/FM7YJ4BBE12X6CE0T4AG b/icechunk-python/tests/data/split-repo/manifests/FM7YJ4BBE12X6CE0T4AG
new file mode 100644
index 000000000..53d8f5165
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/FM7YJ4BBE12X6CE0T4AG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/HB9W7VCB4QA786S2XJBG b/icechunk-python/tests/data/split-repo/manifests/HB9W7VCB4QA786S2XJBG
new file mode 100644
index 000000000..84986daed
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/HB9W7VCB4QA786S2XJBG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/J3ZNDGTGA4AY29S8SJVG b/icechunk-python/tests/data/split-repo/manifests/J3ZNDGTGA4AY29S8SJVG
new file mode 100644
index 000000000..ce2cbe5e3
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/J3ZNDGTGA4AY29S8SJVG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/KCR2FSZ5WFVV39SRQWB0 b/icechunk-python/tests/data/split-repo/manifests/KCR2FSZ5WFVV39SRQWB0
new file mode 100644
index 000000000..f24bfaaac
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/KCR2FSZ5WFVV39SRQWB0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/MZG15YADGEG849HP3DPG b/icechunk-python/tests/data/split-repo/manifests/MZG15YADGEG849HP3DPG
new file mode 100644
index 000000000..7a6957c40
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/MZG15YADGEG849HP3DPG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/NGW2FB9H515MEMPRJCNG b/icechunk-python/tests/data/split-repo/manifests/NGW2FB9H515MEMPRJCNG
new file mode 100644
index 000000000..351219978
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/NGW2FB9H515MEMPRJCNG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/NYEQXA5QV0780CE19HFG b/icechunk-python/tests/data/split-repo/manifests/NYEQXA5QV0780CE19HFG
new file mode 100644
index 000000000..bd45bbc50
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/NYEQXA5QV0780CE19HFG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/PZCTNRCT17DDAH1Q8B00 b/icechunk-python/tests/data/split-repo/manifests/PZCTNRCT17DDAH1Q8B00
new file mode 100644
index 000000000..b6cd4a2d3
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/PZCTNRCT17DDAH1Q8B00 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/Q8WDWGQ0T78AEXKHRV40 b/icechunk-python/tests/data/split-repo/manifests/Q8WDWGQ0T78AEXKHRV40
new file mode 100644
index 000000000..85f274efa
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/Q8WDWGQ0T78AEXKHRV40 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/QDR73V4TZ4M6CW1PNPSG b/icechunk-python/tests/data/split-repo/manifests/QDR73V4TZ4M6CW1PNPSG
new file mode 100644
index 000000000..16f4125ef
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/QDR73V4TZ4M6CW1PNPSG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/QYDWRAXGKCFNV0645F80 b/icechunk-python/tests/data/split-repo/manifests/QYDWRAXGKCFNV0645F80
new file mode 100644
index 000000000..21efa897c
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/QYDWRAXGKCFNV0645F80 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/RF0DB8QD7R68HNHPAXVG b/icechunk-python/tests/data/split-repo/manifests/RF0DB8QD7R68HNHPAXVG
new file mode 100644
index 000000000..98b9d1ef4
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/RF0DB8QD7R68HNHPAXVG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/RNXAWAZHFV0CDV8JJVBG b/icechunk-python/tests/data/split-repo/manifests/RNXAWAZHFV0CDV8JJVBG
new file mode 100644
index 000000000..723dd9ebc
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/RNXAWAZHFV0CDV8JJVBG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/RVXQ0DMYANQPV6X32690 b/icechunk-python/tests/data/split-repo/manifests/RVXQ0DMYANQPV6X32690
new file mode 100644
index 000000000..cf2941bb7
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/RVXQ0DMYANQPV6X32690 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/TTSZ8VYDX0Y7PB6A13H0 b/icechunk-python/tests/data/split-repo/manifests/TTSZ8VYDX0Y7PB6A13H0
new file mode 100644
index 000000000..4e1b16fd5
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/TTSZ8VYDX0Y7PB6A13H0 differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/WS2BZ61Q1V564ZG6GSJG b/icechunk-python/tests/data/split-repo/manifests/WS2BZ61Q1V564ZG6GSJG
new file mode 100644
index 000000000..dc8d1f089
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/WS2BZ61Q1V564ZG6GSJG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/XXCGBC5VMRPB89PFMFGG b/icechunk-python/tests/data/split-repo/manifests/XXCGBC5VMRPB89PFMFGG
new file mode 100644
index 000000000..2aaa21c73
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/XXCGBC5VMRPB89PFMFGG differ
diff --git a/icechunk-python/tests/data/split-repo/manifests/ZEQVP57D4FWAZJ0BJV6G b/icechunk-python/tests/data/split-repo/manifests/ZEQVP57D4FWAZJ0BJV6G
new file mode 100644
index 000000000..d928e4237
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/manifests/ZEQVP57D4FWAZJ0BJV6G differ
diff --git a/icechunk-python/tests/data/split-repo/refs/branch.main/ref.json b/icechunk-python/tests/data/split-repo/refs/branch.main/ref.json
new file mode 100644
index 000000000..6b53d3ec5
--- /dev/null
+++ b/icechunk-python/tests/data/split-repo/refs/branch.main/ref.json
@@ -0,0 +1 @@
+{"snapshot":"BYJMXJ1CHYE86YSHESR0"}
\ No newline at end of file
diff --git a/icechunk-python/tests/data/split-repo/snapshots/BYJMXJ1CHYE86YSHESR0 b/icechunk-python/tests/data/split-repo/snapshots/BYJMXJ1CHYE86YSHESR0
new file mode 100644
index 000000000..afdc9e3fc
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/snapshots/BYJMXJ1CHYE86YSHESR0 differ
diff --git a/icechunk-python/tests/data/split-repo/snapshots/EH9VY081CEHEFY5Y11E0 b/icechunk-python/tests/data/split-repo/snapshots/EH9VY081CEHEFY5Y11E0
new file mode 100644
index 000000000..037ac5c78
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/snapshots/EH9VY081CEHEFY5Y11E0 differ
diff --git a/icechunk-python/tests/data/split-repo/snapshots/QYX6FZ0NZAKSV5XMDSGG b/icechunk-python/tests/data/split-repo/snapshots/QYX6FZ0NZAKSV5XMDSGG
new file mode 100644
index 000000000..3e36d2d58
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/snapshots/QYX6FZ0NZAKSV5XMDSGG differ
diff --git a/icechunk-python/tests/data/split-repo/snapshots/W1ZXYGWWEXKXS2J6TQGG b/icechunk-python/tests/data/split-repo/snapshots/W1ZXYGWWEXKXS2J6TQGG
new file mode 100644
index 000000000..9cc9f561c
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/snapshots/W1ZXYGWWEXKXS2J6TQGG differ
diff --git a/icechunk-python/tests/data/split-repo/snapshots/X8NA4KMB3XYEDM2FAJEG b/icechunk-python/tests/data/split-repo/snapshots/X8NA4KMB3XYEDM2FAJEG
new file mode 100644
index 000000000..62ab4514d
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/snapshots/X8NA4KMB3XYEDM2FAJEG differ
diff --git a/icechunk-python/tests/data/split-repo/transactions/BYJMXJ1CHYE86YSHESR0 b/icechunk-python/tests/data/split-repo/transactions/BYJMXJ1CHYE86YSHESR0
new file mode 100644
index 000000000..9442700a1
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/transactions/BYJMXJ1CHYE86YSHESR0 differ
diff --git a/icechunk-python/tests/data/split-repo/transactions/QYX6FZ0NZAKSV5XMDSGG b/icechunk-python/tests/data/split-repo/transactions/QYX6FZ0NZAKSV5XMDSGG
new file mode 100644
index 000000000..985857cfc
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/transactions/QYX6FZ0NZAKSV5XMDSGG differ
diff --git a/icechunk-python/tests/data/split-repo/transactions/W1ZXYGWWEXKXS2J6TQGG b/icechunk-python/tests/data/split-repo/transactions/W1ZXYGWWEXKXS2J6TQGG
new file mode 100644
index 000000000..b1a08d3b3
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/transactions/W1ZXYGWWEXKXS2J6TQGG differ
diff --git a/icechunk-python/tests/data/split-repo/transactions/X8NA4KMB3XYEDM2FAJEG b/icechunk-python/tests/data/split-repo/transactions/X8NA4KMB3XYEDM2FAJEG
new file mode 100644
index 000000000..3096c0164
Binary files /dev/null and b/icechunk-python/tests/data/split-repo/transactions/X8NA4KMB3XYEDM2FAJEG differ
diff --git a/icechunk-python/tests/test_can_read_old.py b/icechunk-python/tests/test_can_read_old.py
index c2ccf45bb..c00e3a579 100644
--- a/icechunk-python/tests/test_can_read_old.py
+++ b/icechunk-python/tests/test_can_read_old.py
@@ -20,11 +20,21 @@
 import icechunk as ic
 import zarr
 
+UPDATED_SPLITTING_CONFIG = ic.ManifestSplittingConfig.from_dict(
+    {
+        ic.ManifestSplitCondition.name_matches("split_*"): {
+            ic.ManifestSplitDimCondition.Axis(0): 1,
+            ic.ManifestSplitDimCondition.DimensionName("longitude"): 1,
+            ic.ManifestSplitDimCondition.Any(): 3,
+        }
+    }
+)
 
-def mk_repo(*, create: bool, config: ic.RepositoryConfig | None = None) -> ic.Repository:
-    """Create a store that can access virtual chunks in localhost MinIO"""
-    store_path = "./tests/data/test-repo"
 
+def mk_repo(
+    *, create: bool, store_path: str, config: ic.RepositoryConfig | None = None
+) -> ic.Repository:
+    """Create a store that can access virtual chunks in localhost MinIO"""
     if create and config is None:
         config = ic.RepositoryConfig.default()
         config.inline_chunk_threshold_bytes = 12
@@ -52,6 +62,107 @@ def mk_repo(*, create: bool, config: ic.RepositoryConfig | None = None) -> ic.Re
     return repo
 
 
+async def write_a_split_repo() -> None:
+    """Write the test repository with manifest splitting."""
+
+    store_path = "./tests/data/split-repo"
+    config = ic.RepositoryConfig.default()
+    config.inline_chunk_threshold_bytes = 12
+    config.manifest = ic.ManifestConfig(
+        splitting=ic.ManifestSplittingConfig.from_dict(
+            {
+                ic.ManifestSplitCondition.name_matches("split_*"): {
+                    ic.ManifestSplitDimCondition.Axis(0): 1,
+                    ic.ManifestSplitDimCondition.DimensionName("latitude"): 1,
+                    ic.ManifestSplitDimCondition.Any(): 3,
+                }
+            }
+        )
+    )
+
+    print(f"Writing repository to {store_path}")
+    repo = mk_repo(create=True, store_path=store_path, config=config)
+    session = repo.writable_session("main")
+    store = session.store
+
+    root = zarr.group(store=store)
+    group1 = root.create_group(
+        "group1", attributes={"this": "is a nice group", "icechunk": 1, "size": 42.0}
+    )
+
+    # these chunks will be materialized
+    big_chunks = group1.create_array(
+        "split",
+        shape=(10, 10),
+        chunks=(3, 3),
+        dimension_names=(None, "longitude"),
+        dtype="float32",
+        fill_value=float("nan"),
+        attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
+    )
+
+    # these chunks will be inline
+    small_chunks = group1.create_array(
+        "small_chunks",
+        shape=(5,),
+        chunks=(1,),
+        dtype="int8",
+        fill_value=8,
+        attributes={"this": "is a nice array", "icechunk": 1, "size": 42.0},
+    )
+    session.commit("empty structure")
+
+    session = repo.writable_session("main")
+    big_chunks = zarr.open_array(session.store, path="/group1/split", mode="a")
+    small_chunks = zarr.open_array(session.store, path="/group1/small_chunks", mode="a")
+    big_chunks[:] = 120
+    small_chunks[:] = 0
+    session.commit("write data")
+
+    session = repo.writable_session("main")
+    big_chunks = zarr.open_array(session.store, path="group1/split", mode="a")
+    small_chunks = zarr.open_array(session.store, path="group1/small_chunks", mode="a")
+    big_chunks[:] = 12
+    small_chunks[:] = 1
+    session.commit("write data again")
+
+    ### new config
+    config = ic.RepositoryConfig.default()
+    config.inline_chunk_threshold_bytes = 12
+    config.manifest = ic.ManifestConfig(splitting=UPDATED_SPLITTING_CONFIG)
+    repo = mk_repo(create=False, store_path=store_path, config=config)
+    repo.save_config()
+    session = repo.writable_session("main")
+    big_chunks = zarr.open_array(session.store, path="group1/split", mode="a")
+    small_chunks = zarr.open_array(session.store, path="group1/small_chunks", mode="a")
+    big_chunks[:] = 14
+    small_chunks[:] = 3
+    session.commit("write data again with more splits")
+
+
+async def test_icechunk_can_read_old_repo_with_manifest_splitting():
+    repo = mk_repo(create=False, store_path="./tests/data/split-repo")
+    ancestry = list(repo.ancestry(branch="main"))[::-1]
+
+    init_snapshot = ancestry[1]
+    assert init_snapshot.message == "empty structure"
+    assert len(init_snapshot.manifests) == 0
+
+    snapshot = ancestry[2]
+    assert snapshot.message == "write data"
+    assert len(snapshot.manifests) == 9
+
+    snapshot = ancestry[3]
+    assert snapshot.message == "write data again"
+    assert len(snapshot.manifests) == 9
+
+    snapshot = ancestry[4]
+    assert snapshot.message == "write data again with more splits"
+    assert len(snapshot.manifests) == 17
+
+    assert repo.config.manifest.splitting == UPDATED_SPLITTING_CONFIG
+
+
 async def write_a_test_repo() -> None:
     """Write the test repository.
 
@@ -63,7 +174,7 @@ async def write_a_test_repo() -> None:
     """
 
     print("Writing repository to ./tests/data/test-repo")
-    repo = mk_repo(create=True)
+    repo = mk_repo(create=True, store_path="./tests/data/test-repo")
     session = repo.writable_session("main")
     store = session.store
 
@@ -169,7 +280,7 @@ async def test_icechunk_can_read_old_repo() -> None:
     # we import here so it works when the script is ran by pytest
     from tests.conftest import write_chunks_to_minio
 
-    repo = mk_repo(create=False)
+    repo = mk_repo(create=False, store_path="./tests/data/test-repo")
     main_snapshot = repo.lookup_branch("main")
 
     expected_main_history = [
@@ -282,4 +393,5 @@ async def test_icechunk_can_read_old_repo() -> None:
 
     # we import here so it works when the script is ran by pytest
 
-    asyncio.run(write_a_test_repo())
+    # asyncio.run(write_a_test_repo())
+    asyncio.run(write_a_split_repo())
diff --git a/icechunk-python/tests/test_manifest_splitting.py b/icechunk-python/tests/test_manifest_splitting.py
new file mode 100644
index 000000000..b29fb7b18
--- /dev/null
+++ b/icechunk-python/tests/test_manifest_splitting.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python3
+
+import math
+import os
+import tempfile
+
+import numpy as np
+import pytest
+
+import icechunk as ic
+import xarray as xr
+import zarr
+from icechunk import ManifestSplitCondition, ManifestSplitDimCondition
+from icechunk.xarray import to_icechunk
+
+SHAPE = (3, 4, 17)
+CHUNKS = (1, 1, 1)
+DIMS = ("time", "latitude", "longitude")
+
+
+def test_manifest_splitting_appends():
+    array_condition = ManifestSplitCondition.or_conditions(
+        [
+            ManifestSplitCondition.name_matches("temperature"),
+            ManifestSplitCondition.name_matches("salinity"),
+        ]
+    )
+    sconfig = ic.ManifestSplittingConfig.from_dict(
+        {array_condition: {ManifestSplitDimCondition.DimensionName("longitude"): 3}}
+    )
+    config = ic.RepositoryConfig(
+        inline_chunk_threshold_bytes=0, manifest=ic.ManifestConfig(splitting=sconfig)
+    )
+    with tempfile.TemporaryDirectory() as tmpdir:
+        ### simple create repo with manifest splitting
+        storage = ic.local_filesystem_storage(tmpdir)
+        repo = ic.Repository.create(storage, config=config)
+        assert repo.config.manifest.splitting is not None
+
+        ds = xr.Dataset(
+            {
+                "temperature": (DIMS, np.arange(math.prod(SHAPE)).reshape(SHAPE)),
+                "salinity": (DIMS, np.arange(math.prod(SHAPE)).reshape(SHAPE)),
+            }
+        )
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(
+                ds,
+                session,
+                encoding={
+                    "temperature": {"chunks": CHUNKS},
+                    "salinity": {"chunks": CHUNKS},
+                },
+                mode="w",
+            )
+        session.commit("initialize")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        xr.testing.assert_identical(roundtripped, ds)
+
+        nchunks = math.prod(SHAPE) * 2
+        nmanifests = 6 * 2
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+        #### check that config is persisted and used when writing after re-open
+        ### append along time - no splitting specified along this dimension
+        repo = ic.Repository.open(storage)
+        assert repo.config.manifest.splitting is not None
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(ds, session, mode="a", append_dim="time")
+        session.commit("appended")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        xr.testing.assert_identical(roundtripped, xr.concat([ds, ds], dim="time"))
+        nchunks += math.prod(SHAPE) * 2
+        nmanifests += 6 * 2
+
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+        #### check that config is persisted and used when writing after re-open
+        ### append along longitude - splitting specified
+        NEWSHAPE = (2 * SHAPE[0], *SHAPE[1:-1], 2)
+        newds = xr.Dataset(
+            {
+                "temperature": (DIMS, np.arange(math.prod(NEWSHAPE)).reshape(NEWSHAPE)),
+                "salinity": (DIMS, np.arange(math.prod(NEWSHAPE)).reshape(NEWSHAPE)),
+            }
+        )
+        repo = ic.Repository.open(storage)
+        assert repo.config.manifest.splitting is not None
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(newds, session, mode="a", append_dim="longitude")
+        session.commit("appended")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        xr.testing.assert_identical(
+            roundtripped,
+            xr.concat([xr.concat([ds, ds], dim="time"), newds], dim="longitude"),
+        )
+        nchunks += math.prod(NEWSHAPE) * 2
+        # the lon size goes from 17 -> 19 so one extra manifest,
+        # compared to previous writes
+        nmanifests += 7 * 2
+
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+
+def test_manifest_overwrite_splitting_config_on_read():
+    sconfig = ic.ManifestSplittingConfig.from_dict(
+        {
+            ManifestSplitCondition.name_matches("temperature"): {
+                ManifestSplitDimCondition.DimensionName("longitude"): 3
+            }
+        }
+    )
+
+    new_sconfig = ic.ManifestSplittingConfig.from_dict(
+        {
+            ManifestSplitCondition.name_matches("temperature"): {
+                ManifestSplitDimCondition.DimensionName("longitude"): 10
+            }
+        }
+    )
+
+    config = ic.RepositoryConfig(
+        inline_chunk_threshold_bytes=0, manifest=ic.ManifestConfig(splitting=sconfig)
+    )
+    new_config = ic.RepositoryConfig(
+        inline_chunk_threshold_bytes=0, manifest=ic.ManifestConfig(splitting=new_sconfig)
+    )
+    with tempfile.TemporaryDirectory() as tmpdir:
+        ### simple create repo with manifest splitting
+        storage = ic.local_filesystem_storage(tmpdir)
+        repo = ic.Repository.create(storage, config=config)
+        assert repo.config.manifest.splitting is not None
+
+        ds = xr.Dataset(
+            {"temperature": (DIMS, np.arange(math.prod(SHAPE)).reshape(SHAPE))}
+        )
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(
+                ds, session, encoding={"temperature": {"chunks": CHUNKS}}, mode="w"
+            )
+        session.commit("initialize")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        xr.testing.assert_identical(roundtripped, ds)
+
+        nchunks = math.prod(SHAPE)
+        nmanifests = 6
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+        #### check that config is overwritten on read
+        ### append along time - no splitting specified along this dimension
+        repo = ic.Repository.open(storage, config=new_config)
+        assert repo.config.manifest.splitting is not None
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(ds, session, mode="a", append_dim="time")
+        session.commit("appended")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        xr.testing.assert_identical(roundtripped, xr.concat([ds, ds], dim="time"))
+        nchunks = 2 * math.prod(SHAPE)
+        nmanifests += 2
+
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+
+def test_manifest_splitting_sparse_regions():
+    sconfig = ic.ManifestSplittingConfig.from_dict(
+        {
+            ManifestSplitCondition.name_matches("temperature"): {
+                ManifestSplitDimCondition.DimensionName("longitude"): 3
+            }
+        }
+    )
+    config = ic.RepositoryConfig(
+        inline_chunk_threshold_bytes=0, manifest=ic.ManifestConfig(splitting=sconfig)
+    )
+    with tempfile.TemporaryDirectory() as tmpdir:
+        ### simple create repo with manifest splitting
+        storage = ic.local_filesystem_storage(tmpdir)
+        repo = ic.Repository.create(storage, config=config)
+        assert repo.config.manifest.splitting is not None
+
+        ds = xr.Dataset(
+            {"temperature": (DIMS, np.arange(math.prod(SHAPE)).reshape(SHAPE))},
+            coords=dict(zip(DIMS, map(np.arange, SHAPE), strict=False)),
+        )
+        session = repo.writable_session("main")
+        ds.chunk(-1).to_zarr(
+            session.store,
+            encoding={"temperature": {"chunks": CHUNKS}},
+            mode="w",
+            compute=False,
+            consolidated=False,
+        )
+        session.commit("initialize")
+
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": False}):
+            to_icechunk(ds.isel(longitude=slice(1, 4)), session, region="auto")
+        session.commit("write region 1")
+        roundtripped = xr.open_dataset(session.store, engine="zarr", consolidated=False)
+        expected = xr.zeros_like(ds)
+        expected.loc[{"longitude": slice(1, 3)}] = ds.isel(longitude=slice(1, 4))
+        xr.testing.assert_identical(roundtripped, expected)
+
+        nchunks = math.prod(ds.isel(longitude=slice(1, 4)).sizes.values()) + len(
+            ds.coords
+        )
+        nmanifests = 2 + len(ds.coords)
+        assert len(os.listdir(f"{tmpdir}/chunks")) == nchunks
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
+
+
+@pytest.mark.parametrize(
+    "config, expected_split_sizes",
+    [
+        (
+            {
+                ManifestSplitDimCondition.DimensionName("longitude"): 3,
+                ManifestSplitDimCondition.Axis(1): 2,
+                ManifestSplitDimCondition.Any(): 1,
+            },
+            (1, 2, 3),
+        ),
+        (
+            {
+                ManifestSplitDimCondition.DimensionName("longitude"): 3,
+                ManifestSplitDimCondition.Any(): 1,
+                # this next one gets overwritten by Any
+                ManifestSplitDimCondition.Axis(1): 2,
+            },
+            (1, 1, 3),
+        ),
+        (
+            {
+                ManifestSplitDimCondition.DimensionName("longitude"): 3,
+                ManifestSplitDimCondition.DimensionName("latitude"): 3,
+                # this next one gets overwritten by DimensionName above.
+                ManifestSplitDimCondition.Axis(1): 13,
+                ManifestSplitDimCondition.Any(): 1,
+            },
+            (1, 3, 3),
+        ),
+    ],
+)
+def test_manifest_splitting_complex_config(config, expected_split_sizes):
+    sconfig = ic.ManifestSplittingConfig.from_dict(
+        {ManifestSplitCondition.AnyArray(): config}
+    )
+    config = ic.RepositoryConfig(
+        inline_chunk_threshold_bytes=0, manifest=ic.ManifestConfig(splitting=sconfig)
+    )
+    with tempfile.TemporaryDirectory() as tmpdir:
+        ### simple create repo with manifest splitting
+        storage = ic.local_filesystem_storage(tmpdir)
+        repo = ic.Repository.create(storage, config=config)
+        assert repo.config.manifest.splitting is not None
+
+        ds = xr.Dataset(
+            {"temperature": (DIMS, np.arange(math.prod(SHAPE)).reshape(SHAPE))},
+        )
+        session = repo.writable_session("main")
+        with zarr.config.set({"array.write_empty_chunks": True}):
+            to_icechunk(
+                ds, session, mode="w", encoding={"temperature": {"chunks": CHUNKS}}
+            )
+        session.commit("write")
+
+        nmanifests = math.prod(
+            math.ceil(nchunks / splitsize)
+            for nchunks, splitsize in zip(SHAPE, expected_split_sizes, strict=False)
+        )
+        assert len(os.listdir(f"{tmpdir}/manifests")) == nmanifests
diff --git a/icechunk/proptest-regressions/format/manifest.txt b/icechunk/proptest-regressions/format/manifest.txt
index fa7d6b4d1..e16e02555 100644
--- a/icechunk/proptest-regressions/format/manifest.txt
+++ b/icechunk/proptest-regressions/format/manifest.txt
@@ -6,3 +6,4 @@
 # everyone who runs the test benefits from these saved cases.
 cc b1f4a1a052fb80b62239be41d1c339fcd58a928a39c8a0eecfbd81ab3180443d # shrinks to input = _TestManifestExtentsArgs { indices: [ChunkIndices([0, 0, 0, 0])] }
 cc fe24d66f88ea0db85211ef04950903aeb3de6d3086ded541bc8efdb0649258e1 # shrinks to input = _TestManifestExtentsArgs { indices: [ChunkIndices([0, 0, 0, 0]), ChunkIndices([0, 0, 0, 0])] }
+cc 4644669257cdfaff2725d7b088a169773dbfbb1f7046da64eca635530d772f45 # shrinks to shape_dim = ShapeDim { shape: ArrayShape([DimensionShape { dim_length: 1, chunk_length: 1 }, DimensionShape { dim_length: 2, chunk_length: 1 }]), dimension_names: None }
diff --git a/icechunk/src/change_set.rs b/icechunk/src/change_set.rs
index 622cf4595..8a5f880be 100644
--- a/icechunk/src/change_set.rs
+++ b/icechunk/src/change_set.rs
@@ -31,6 +31,7 @@ pub struct ChangeSet {
     updated_arrays: HashMap<NodeId, ArrayData>,
     updated_groups: HashMap<NodeId, Bytes>,
     // It's important we keep these sorted, we use this fact in TransactionLog creation
+    // TODO: could track ManifestExtents
     set_chunks: BTreeMap<NodeId, BTreeMap<ChunkIndices, Option<ChunkPayload>>>,
     deleted_groups: HashSet<(Path, NodeId)>,
     deleted_arrays: HashSet<(Path, NodeId)>,
diff --git a/icechunk/src/config.rs b/icechunk/src/config.rs
index d662b8895..1f25418f8 100644
--- a/icechunk/src/config.rs
+++ b/icechunk/src/config.rs
@@ -9,9 +9,11 @@ use std::{
 use async_trait::async_trait;
 use chrono::{DateTime, Utc};
 pub use object_store::gcp::GcpCredential;
+use regex::bytes::Regex;
 use serde::{Deserialize, Serialize};
 
 use crate::{
+    format::Path,
     storage,
     virtual_chunks::{ContainerName, VirtualChunkContainer, mk_default_containers},
 };
@@ -128,6 +130,106 @@ impl CachingConfig {
     }
 }
 
+#[derive(Debug, PartialEq, Eq, Serialize, Hash, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ManifestSplitCondition {
+    Or(Vec<ManifestSplitCondition>),
+    And(Vec<ManifestSplitCondition>),
+    PathMatches { regex: String },
+    NameMatches { regex: String },
+    AnyArray,
+}
+
+//```yaml
+//rules:
+//  - path: ./2m_temperature  # regex, 3D variable: (null, latitude, longitude)
+//    manifest-split-sizes:
+//      - 0: 120
+//  - path: ./temperature  # 4D variable: (time, level, latitude, longitude)
+//    manifest-split-sizes:
+//      - "level": 1  # alternatively 0: 1
+//      - "time": 12  #           and 1: 12
+//  - path: ./temperature
+//    manifest-split-sizes:
+//      - "level": 1
+//      - "time": 8760  # ~1 year
+//      - "latitude": null  # for unspecified, default is null, which means never split.
+//  - path: ./*   # the default rules
+//    manifest-split-sizes: null  # no splitting, just a single manifest per array
+//```
+
+impl ManifestSplitCondition {
+    // from_yaml?
+    pub fn matches(&self, path: &Path) -> bool {
+        use ManifestSplitCondition::*;
+        match self {
+            AnyArray => true,
+            Or(vec) => vec.iter().any(|c| c.matches(path)),
+            And(vec) => vec.iter().all(|c| c.matches(path)),
+            // TODO: precompile the regex
+            PathMatches { regex } => Regex::new(regex)
+                .map(|regex| regex.is_match(path.to_string().as_bytes()))
+                .unwrap_or(false),
+            // TODO: precompile the regex
+            NameMatches { regex } => Regex::new(regex)
+                .map(|regex| {
+                    path.name()
+                        .map(|name| regex.is_match(name.as_bytes()))
+                        .unwrap_or(false)
+                })
+                .unwrap_or(false),
+        }
+    }
+}
+
+#[derive(Debug, Hash, PartialEq, Eq, Serialize, Deserialize, Clone)]
+pub enum ManifestSplitDimCondition {
+    Axis(usize),
+    DimensionName(String),
+    // TODO: Since dimension name can be null,
+    // i don't think we can have DimensionName(r"*") catch the "Any" case
+    Any,
+}
+
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
+pub struct ManifestSplitDim {
+    pub condition: ManifestSplitDimCondition,
+    pub num_chunks: u32,
+}
+
+#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
+pub struct ManifestSplittingConfig {
+    // need to preserve insertion order of conditions, so hashmap doesn't work
+    pub split_sizes: Option<Vec<(ManifestSplitCondition, Vec<ManifestSplitDim>)>>,
+}
+
+impl Default for ManifestSplittingConfig {
+    fn default() -> Self {
+        let inner = vec![ManifestSplitDim {
+            condition: ManifestSplitDimCondition::Any,
+            num_chunks: u32::MAX,
+        }];
+        let new = vec![(
+            ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+            inner,
+        )];
+        Self { split_sizes: Some(new) }
+    }
+}
+
+impl ManifestSplittingConfig {
+    pub fn with_size(split_size: u32) -> Self {
+        let split_sizes = vec![(
+            ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+            vec![ManifestSplitDim {
+                condition: ManifestSplitDimCondition::Any,
+                num_chunks: split_size,
+            }],
+        )];
+        ManifestSplittingConfig { split_sizes: Some(split_sizes) }
+    }
+}
+
 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
 #[serde(rename_all = "snake_case")]
 pub enum ManifestPreloadCondition {
@@ -206,13 +308,19 @@ static DEFAULT_MANIFEST_PRELOAD_CONDITION: OnceLock<ManifestPreloadCondition> =
 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Default)]
 pub struct ManifestConfig {
     pub preload: Option<ManifestPreloadConfig>,
+    pub splitting: Option<ManifestSplittingConfig>,
 }
 
 static DEFAULT_MANIFEST_PRELOAD_CONFIG: OnceLock<ManifestPreloadConfig> = OnceLock::new();
+static DEFAULT_MANIFEST_SPLITTING_CONFIG: OnceLock<ManifestSplittingConfig> =
+    OnceLock::new();
 
 impl ManifestConfig {
     pub fn merge(&self, other: Self) -> Self {
-        Self { preload: other.preload.or(self.preload.clone()) }
+        Self {
+            preload: other.preload.or(self.preload.clone()),
+            splitting: other.splitting.or(self.splitting.clone()),
+        }
     }
 
     pub fn preload(&self) -> &ManifestPreloadConfig {
@@ -220,6 +328,23 @@ impl ManifestConfig {
             DEFAULT_MANIFEST_PRELOAD_CONFIG.get_or_init(ManifestPreloadConfig::default)
         })
     }
+
+    pub fn splitting(&self) -> &ManifestSplittingConfig {
+        self.splitting.as_ref().unwrap_or_else(|| {
+            DEFAULT_MANIFEST_SPLITTING_CONFIG
+                .get_or_init(ManifestSplittingConfig::default)
+        })
+    }
+    // for testing only, create a config with no preloading and no splitting
+    pub fn empty() -> Self {
+        ManifestConfig {
+            preload: Some(ManifestPreloadConfig {
+                max_total_refs: None,
+                preload_if: None,
+            }),
+            splitting: None,
+        }
+    }
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Default)]
diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs
index e8af0d7a8..121892d50 100644
--- a/icechunk/src/format/manifest.rs
+++ b/icechunk/src/format/manifest.rs
@@ -4,7 +4,7 @@ use crate::format::flatbuffers::generated;
 use bytes::Bytes;
 use flatbuffers::VerifierOptions;
 use futures::{Stream, TryStreamExt};
-use itertools::Itertools;
+use itertools::{Itertools, multiunzip};
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 
@@ -37,11 +37,86 @@ impl ManifestExtents {
         Self(v)
     }
 
+    pub fn contains(&self, coord: &[u32]) -> bool {
+        self.iter().zip(coord.iter()).all(|(range, that)| range.contains(that))
+    }
+
     pub fn iter(&self) -> impl Iterator<Item = &Range<u32>> {
         self.0.iter()
     }
+
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
 }
 
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct ManifestSplits(Vec<ManifestExtents>);
+
+impl ManifestSplits {
+    /// Used at read-time
+    pub fn from_extents(extents: Vec<ManifestExtents>) -> Self {
+        assert!(!extents.is_empty());
+        Self(extents)
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    // Build up ManifestSplits from a iterable of shard edges or boundaries
+    // along each dimension.
+    /// # Examples
+    /// ```
+    /// use icechunk::format::manifest::{ManifestSplits, ManifestExtents};
+    /// let actual = ManifestSplits::from_edges(vec![vec![0u32, 1, 2], vec![3u32, 4, 5]]);
+    /// let expected = ManifestSplits::from_extents(vec![
+    ///     ManifestExtents::new(&[0, 3], &[1, 4]),
+    ///     ManifestExtents::new(&[0, 4], &[1, 5]),
+    ///     ManifestExtents::new(&[1, 3], &[2, 4]),
+    ///     ManifestExtents::new(&[1, 4], &[2, 5]),
+    ///     ]
+    /// );
+    /// assert_eq!(actual, expected);
+    /// ```
+    pub fn from_edges(iter: impl IntoIterator<Item = Vec<u32>>) -> Self {
+        // let iter = vec![vec![0u32, 1, 2], vec![3u32, 4, 5]]
+        let res = iter
+            .into_iter()
+            // vec![(0, 1), (1, 2)], vec![(3, 4), (4, 5)]
+            .map(|x| x.into_iter().tuple_windows())
+            // vec![((0, 1), (3, 4)), ((0, 1), (4, 5)),
+            //      ((1, 2), (3, 4)), ((1, 2), (4, 5))]
+            .multi_cartesian_product()
+            // vec![((0, 3), (1, 4)), ((0, 4), (1, 5)),
+            //      ((1, 3), (2, 4)), ((1, 4), (2, 5))]
+            .map(multiunzip)
+            .map(|(from, to): (Vec<u32>, Vec<u32>)| {
+                ManifestExtents::new(from.as_slice(), to.as_slice())
+            });
+        Self(res.collect())
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &ManifestExtents> {
+        self.0.iter()
+    }
+
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+/// Helper function for constructing uniformly spaced manifest split edges
+pub fn uniform_manifest_split_edges(num_chunks: u32, split_size: &u32) -> Vec<u32> {
+    (0u32..=num_chunks)
+        .step_by(*split_size as usize)
+        .chain((num_chunks % split_size != 0).then_some(num_chunks))
+        .collect()
+}
 #[derive(Debug, Error)]
 #[non_exhaustive]
 pub enum VirtualReferenceErrorKind {
@@ -215,7 +290,7 @@ impl Manifest {
         }
 
         if array_manifests.is_empty() {
-            // empty manifet
+            // empty manifest
             return Ok(None);
         }
 
@@ -448,3 +523,48 @@ static ROOT_OPTIONS: VerifierOptions = VerifierOptions {
     max_apparent_size: 1 << 31, // taken from the default
     ignore_missing_null_terminator: true,
 };
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used, clippy::panic)]
+mod tests {
+    use super::*;
+    use crate::strategies::{ShapeDim, shapes_and_dims};
+    use icechunk_macros;
+    use proptest::prelude::*;
+
+    proptest! {
+        #[icechunk_macros::test]
+        fn test_manifest_split_from_edges(shape_dim in shapes_and_dims(Some(5))) {
+            // Note: using the shape, chunks strategy to generate chunk_shape, split_shape
+            let ShapeDim { shape, .. } = shape_dim;
+
+            let num_chunks = shape.iter().map(|x| x.array_length()).collect::<Vec<_>>();
+            let split_shape = shape.iter().map(|x| x.chunk_length()).collect::<Vec<_>>();
+
+            let ndim = shape.len();
+            let edges: Vec<Vec<u32>> =
+                (0usize..ndim).map(|axis| {
+                    uniform_manifest_split_edges(num_chunks[axis] as u32, &(split_shape[axis] as u32))
+                }
+                ).collect();
+
+            let splits = ManifestSplits::from_edges(edges.into_iter());
+            for edge in splits.iter() {
+                // must be ndim ranges
+                prop_assert_eq!(edge.len(), ndim);
+                for range in edge.iter() {
+                    prop_assert!(range.end > range.start);
+                }
+            }
+
+            // when using from_edges, extents must not exactly overlap
+            for edges in splits.iter().combinations(2) {
+                let is_equal = std::iter::zip(edges[0].iter(), edges[1].iter())
+                    .all(|(range1, range2)| {
+                        (range1.start == range2.start) && (range1.end == range2.end)
+                    });
+                prop_assert!(!is_equal);
+            }
+        }
+    }
+}
diff --git a/icechunk/src/format/snapshot.rs b/icechunk/src/format/snapshot.rs
index 043b3c1bc..2262bbbbe 100644
--- a/icechunk/src/format/snapshot.rs
+++ b/icechunk/src/format/snapshot.rs
@@ -37,6 +37,25 @@ impl DimensionShape {
 pub struct ArrayShape(Vec<DimensionShape>);
 
 impl ArrayShape {
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    pub fn get(&self, ax: usize) -> Option<DimensionShape> {
+        if ax > self.len() - 1 { None } else { Some(self.0[ax].clone()) }
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = &DimensionShape> {
+        self.0.iter()
+    }
+
+    pub fn num_chunks(&self) -> impl Iterator<Item = u32> {
+        self.max_chunk_indices_permitted().map(|x| x + 1)
+    }
+
     pub fn new<I>(it: I) -> Option<Self>
     where
         I: IntoIterator<Item = (u64, u64)>,
@@ -102,6 +121,15 @@ impl From<Option<&str>> for DimensionName {
     }
 }
 
+impl From<DimensionName> for Option<String> {
+    fn from(value: DimensionName) -> Option<String> {
+        match value {
+            DimensionName::NotSpecified => None,
+            DimensionName::Name(name) => Some(name),
+        }
+    }
+}
+
 impl From<&str> for DimensionName {
     fn from(value: &str) -> Self {
         if value.is_empty() {
diff --git a/icechunk/src/repository.rs b/icechunk/src/repository.rs
index 75ef2322d..d7652a815 100644
--- a/icechunk/src/repository.rs
+++ b/icechunk/src/repository.rs
@@ -147,6 +147,7 @@ impl Repository {
         storage: Arc<dyn Storage + Send + Sync>,
         virtual_chunk_credentials: HashMap<ContainerName, Credentials>,
     ) -> RepositoryResult<Self> {
+        debug!("Creating Repository");
         if !storage.can_write() {
             return Err(RepositoryErrorKind::ReadonlyStorage(
                 "Cannot create repository".to_string(),
@@ -228,6 +229,7 @@ impl Repository {
         storage: Arc<dyn Storage + Send + Sync>,
         virtual_chunk_credentials: HashMap<ContainerName, Credentials>,
     ) -> RepositoryResult<Self> {
+        debug!("Opening Repository");
         let storage_c = Arc::clone(&storage);
         let handle1 = tokio::spawn(
             async move { Self::fetch_config(storage_c.as_ref()).await }.in_current_span(),
@@ -939,15 +941,37 @@ mod tests {
     use crate::{
         Repository, Storage,
         config::{
-            CachingConfig, ManifestConfig, ManifestPreloadConfig, RepositoryConfig,
+            CachingConfig, ManifestConfig, ManifestPreloadConfig, ManifestSplitCondition,
+            ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig,
+            RepositoryConfig,
+        },
+        format::{
+            ByteRange, ChunkIndices,
+            manifest::{ChunkPayload, ManifestSplits},
+            snapshot::{ArrayShape, DimensionName},
         },
-        format::{ChunkIndices, manifest::ChunkPayload, snapshot::ArrayShape},
         new_local_filesystem_storage,
+        session::get_chunk,
         storage::new_in_memory_storage,
     };
 
     use super::*;
 
+    async fn assert_manifest_count(
+        storage: &Arc<dyn Storage + Send + Sync>,
+        total_manifests: usize,
+    ) {
+        assert_eq!(
+            storage
+                .list_manifests(&storage.default_settings())
+                .await
+                .unwrap()
+                .count()
+                .await,
+            total_manifests
+        );
+    }
+
     #[tokio::test]
     async fn test_repository_persistent_config() -> Result<(), Box<dyn Error>> {
         let storage: Arc<dyn Storage + Send + Sync> = new_in_memory_storage().await?;
@@ -1126,6 +1150,391 @@ mod tests {
         ));
     }
 
+    async fn create_repo_with_split_manifest_config(
+        path: &Path,
+        shape: &ArrayShape,
+        dimension_names: &Option<Vec<DimensionName>>,
+        split_config: &ManifestSplittingConfig,
+        storage: Option<Arc<dyn Storage + Send + Sync>>,
+    ) -> Result<Repository, Box<dyn Error>> {
+        let backend: Arc<dyn Storage + Send + Sync> =
+            storage.unwrap_or(new_in_memory_storage().await?);
+        let storage = Arc::clone(&backend);
+
+        let man_config = ManifestConfig {
+            preload: Some(ManifestPreloadConfig {
+                max_total_refs: None,
+                preload_if: None,
+            }),
+            splitting: Some(split_config.clone()),
+        };
+        let config = RepositoryConfig {
+            manifest: Some(man_config),
+            ..RepositoryConfig::default()
+        };
+        let repository =
+            Repository::create(Some(config), storage, HashMap::new()).await?;
+
+        let mut session = repository.writable_session("main").await?;
+
+        let def = Bytes::from_static(br#"{"this":"array"}"#);
+        session.add_group(Path::root(), def.clone()).await?;
+        session
+            .add_array(path.clone(), shape.clone(), dimension_names.clone(), def.clone())
+            .await?;
+        session.commit("initialized", None).await?;
+
+        Ok(repository)
+    }
+
+    #[tokio::test]
+    async fn tests_manifest_splitting_simple() -> Result<(), Box<dyn Error>> {
+        let dim_size = 25u32;
+        let chunk_size = 1u32;
+        let split_size = 3u32;
+
+        let shape =
+            ArrayShape::new(vec![(dim_size.into(), chunk_size.into()), (2, 1), (1, 1)])
+                .unwrap();
+        let dimension_names = Some(vec!["t".into()]);
+        let temp_path: Path = "/temperature".try_into().unwrap();
+        let split_config = ManifestSplittingConfig::with_size(split_size);
+
+        let backend: Arc<dyn Storage + Send + Sync> = new_in_memory_storage().await?;
+        let logging = Arc::new(LoggingStorage::new(Arc::clone(&backend)));
+        let storage: Arc<dyn Storage + Send + Sync> = logging.clone();
+        let repository = create_repo_with_split_manifest_config(
+            &temp_path,
+            &shape,
+            &dimension_names,
+            &split_config,
+            Some(Arc::clone(&storage)),
+        )
+        .await?;
+
+        let mut total_manifests = 0;
+        assert_manifest_count(&backend, total_manifests).await;
+
+        logging.clear();
+        let ops = logging.fetch_operations();
+        assert!(ops.is_empty());
+        let mut session = repository.writable_session("main").await?;
+
+        // only add refs that will be packed in the first split.
+        for i in 0..2 {
+            session
+                .set_chunk_ref(
+                    temp_path.clone(),
+                    ChunkIndices(vec![i, 0, 0]),
+                    Some(ChunkPayload::Inline(format!("{0}", i).into())),
+                )
+                .await?
+        }
+        session.commit("first split", None).await?;
+        total_manifests += 1;
+        assert_manifest_count(&storage, total_manifests).await;
+
+        // now only last split
+        let last_chunk = dim_size - 1;
+        let mut session = repository.writable_session("main").await?;
+        session
+            .set_chunk_ref(
+                temp_path.clone(),
+                ChunkIndices(vec![last_chunk, 0, 0]),
+                Some(ChunkPayload::Inline(format!("{0}", last_chunk).into())),
+            )
+            .await?;
+        session.commit("last split", None).await?;
+        total_manifests += 2; // FIXME: this should be +1 once writes are optimized
+        assert_manifest_count(&storage, total_manifests).await;
+
+        // check that reads are optimized; we should only fetch the last split for this query
+        let logging2 = Arc::new(LoggingStorage::new(Arc::clone(&backend)));
+        let storage2: Arc<dyn Storage + Send + Sync> = logging2.clone();
+        let config = RepositoryConfig {
+            manifest: Some(ManifestConfig::empty()),
+            ..RepositoryConfig::default()
+        };
+        let read_repo = Repository::open(Some(config), storage2, HashMap::new()).await?;
+        let session = read_repo
+            .readonly_session(&VersionInfo::BranchTipRef("main".to_string()))
+            .await?;
+        get_chunk(
+            session
+                .get_chunk_reader(
+                    &temp_path,
+                    &ChunkIndices(vec![last_chunk, 0, 0]),
+                    &ByteRange::ALL,
+                )
+                .await
+                .unwrap(),
+        )
+        .await
+        .unwrap()
+        .unwrap();
+        let ops = logging2.fetch_operations();
+        assert_eq!(
+            ops.iter().filter(|(op, _)| op == "fetch_manifest_splitting").count(),
+            1
+        );
+
+        // fetching a chunk that wasn't written shouldn't fetch any more manifests
+        logging2.clear();
+        get_chunk(
+            session
+                .get_chunk_reader(
+                    &temp_path,
+                    &ChunkIndices(vec![split_size + 1, 0, 0]),
+                    &ByteRange::ALL,
+                )
+                .await
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+        let ops = logging2.fetch_operations();
+        assert_eq!(
+            ops.iter().filter(|(op, _)| op == "fetch_manifest_splitting").count(),
+            0
+        );
+
+        // write one ref per split
+        let mut session = repository.writable_session("main").await?;
+        for i in (0..dim_size).step_by(split_size as usize) {
+            total_manifests += 1;
+            session
+                .set_chunk_ref(
+                    temp_path.clone(),
+                    ChunkIndices(vec![i, 0, 0]),
+                    Some(ChunkPayload::Inline(format!("{0}", i).into())),
+                )
+                .await?
+        }
+        session.commit("wrote all splits", None).await?;
+        assert_manifest_count(&storage, total_manifests).await;
+
+        let mut session = repository.writable_session("main").await?;
+        for i in 0..dim_size {
+            session
+                .set_chunk_ref(
+                    temp_path.clone(),
+                    ChunkIndices(vec![i, 0, 0]),
+                    Some(ChunkPayload::Inline(format!("{0}", i).into())),
+                )
+                .await?
+        }
+        // We are counting total manifests in the `assert_manifest_count` helper function
+        // So we keep a running count of the total and update that at each step.
+        total_manifests += dim_size.div_ceil(split_size) as usize;
+        session.commit("full overwrite", None).await?;
+        assert_manifest_count(&storage, total_manifests).await;
+
+        // test reads
+        for i in 0..dim_size {
+            let val = get_chunk(
+                session
+                    .get_chunk_reader(
+                        &temp_path,
+                        &ChunkIndices(vec![i, 0, 0]),
+                        &ByteRange::ALL,
+                    )
+                    .await
+                    .unwrap(),
+            )
+            .await
+            .unwrap()
+            .unwrap();
+            assert_eq!(val, Bytes::copy_from_slice(format!("{0}", i).as_bytes()));
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_manifest_splitting_complex_config() -> Result<(), Box<dyn Error>> {
+        let shape = ArrayShape::new(vec![(25, 1), (10, 1), (3, 1), (4, 1)]).unwrap();
+        let dimension_names = Some(vec!["t".into(), "z".into(), "y".into(), "x".into()]);
+        let temp_path: Path = "/temperature".try_into().unwrap();
+
+        let split_sizes = vec![
+            (
+                ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::DimensionName("t".to_string()),
+                    num_chunks: 12,
+                }],
+            ),
+            (
+                ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Axis(2),
+                    num_chunks: 2,
+                }],
+            ),
+            (
+                ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Any,
+                    num_chunks: 9,
+                }],
+            ),
+        ];
+        let split_config = ManifestSplittingConfig { split_sizes: Some(split_sizes) };
+        let repo = create_repo_with_split_manifest_config(
+            &temp_path,
+            &shape,
+            &dimension_names,
+            &split_config,
+            None,
+        )
+        .await?;
+
+        let session = repo.writable_session("main").await?;
+        let actual =
+            split_config.get_split_sizes(&session.get_node(&temp_path).await?)?;
+        let expected = ManifestSplits::from_edges(vec![
+            vec![0, 12, 24, 25],
+            vec![0, 9, 10],
+            vec![0, 2, 3],
+            vec![0, 4],
+        ]);
+        assert_eq!(actual, expected);
+
+        let split_sizes = vec![(
+            ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+            vec![
+                ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::DimensionName("t".to_string()),
+                    num_chunks: 12,
+                },
+                ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Axis(2),
+                    num_chunks: 2,
+                },
+                ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Any,
+                    num_chunks: 9,
+                },
+            ],
+        )];
+        let split_config = ManifestSplittingConfig { split_sizes: Some(split_sizes) };
+        let repo = create_repo_with_split_manifest_config(
+            &temp_path,
+            &shape,
+            &dimension_names,
+            &split_config,
+            None,
+        )
+        .await?;
+
+        let session = repo.writable_session("main").await?;
+        let actual =
+            split_config.get_split_sizes(&session.get_node(&temp_path).await?)?;
+        assert_eq!(actual, expected);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_manifest_splitting_complex_writes() -> Result<(), Box<dyn Error>> {
+        let t_split_size = 12u32;
+        let y_split_size = 2u32;
+
+        let shape = ArrayShape::new(vec![(25, 1), (10, 1), (3, 1), (4, 1)]).unwrap();
+        let dimension_names = Some(vec!["t".into(), "z".into(), "y".into(), "x".into()]);
+        let temp_path: Path = "/temperature".try_into().unwrap();
+
+        let split_sizes = vec![
+            (
+                ManifestSplitCondition::AnyArray,
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::DimensionName("t".to_string()),
+                    num_chunks: t_split_size,
+                }],
+            ),
+            (
+                ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Axis(2),
+                    num_chunks: y_split_size,
+                }],
+            ),
+            (
+                ManifestSplitCondition::NameMatches { regex: r".*".to_string() },
+                vec![ManifestSplitDim {
+                    condition: ManifestSplitDimCondition::Any,
+                    num_chunks: 9,
+                }],
+            ),
+        ];
+
+        let expected_split_sizes = [t_split_size, 9, y_split_size, 9];
+
+        let split_config = ManifestSplittingConfig { split_sizes: Some(split_sizes) };
+        let backend: Arc<dyn Storage + Send + Sync> = new_in_memory_storage().await?;
+        let logging = Arc::new(LoggingStorage::new(Arc::clone(&backend)));
+        let logging_c: Arc<dyn Storage + Send + Sync> = logging.clone();
+        let repository = create_repo_with_split_manifest_config(
+            &temp_path,
+            &shape,
+            &dimension_names,
+            &split_config,
+            Some(logging_c),
+        )
+        .await?;
+
+        let mut total_manifests = 0;
+        assert_manifest_count(&backend, total_manifests).await;
+
+        logging.clear();
+        let ops = logging.fetch_operations();
+        assert!(ops.is_empty());
+
+        let mut add = 0;
+        for ax in 0..shape.len() {
+            let mut session = repository.writable_session("main").await?;
+            let axis_size = shape.get(ax).unwrap().array_length();
+            for i in 0..axis_size {
+                let mut index = vec![0u32, 0, 0, 0];
+                index[ax] = i as u32;
+                session
+                    .set_chunk_ref(
+                        temp_path.clone(),
+                        ChunkIndices(index),
+                        Some(ChunkPayload::Inline(format!("{0}", i).into())),
+                    )
+                    .await?
+            }
+
+            add += (axis_size as u32).div_ceil(expected_split_sizes[ax]) as usize
+                - 1 * ((ax > 0) as usize);
+            dbg!(&ax, &add);
+            total_manifests += add;
+            session.commit(format!("finished axis {0}", ax).as_ref(), None).await?;
+            assert_manifest_count(&backend, total_manifests).await;
+
+            for i in 0..shape.get(ax).unwrap().array_length() {
+                let mut index = vec![0u32, 0, 0, 0];
+                index[ax] = i as u32;
+                let val = get_chunk(
+                    session
+                        .get_chunk_reader(
+                            &temp_path,
+                            &ChunkIndices(index),
+                            &ByteRange::ALL,
+                        )
+                        .await
+                        .unwrap(),
+                )
+                .await
+                .unwrap()
+                .unwrap();
+                assert_eq!(val, Bytes::copy_from_slice(format!("{0}", i).as_bytes()));
+            }
+        }
+        Ok(())
+    }
+
     #[tokio::test]
     /// Writes four arrays to a repo arrays, checks preloading of the manifests
     ///
@@ -1241,6 +1650,7 @@ mod tests {
                 max_total_refs: Some(2),
                 preload_if: None,
             }),
+            ..ManifestConfig::default()
         };
         let config = RepositoryConfig {
             manifest: Some(man_config),
diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs
index 1e3ddcd7d..8a4c257b2 100644
--- a/icechunk/src/session.rs
+++ b/icechunk/src/session.rs
@@ -13,7 +13,8 @@ use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use err_into::ErrorInto;
 use futures::{FutureExt, Stream, StreamExt, TryStreamExt, future::Either, stream};
-use itertools::Itertools as _;
+use itertools::{Itertools as _, repeat_n};
+use regex::bytes::Regex;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 use tokio::task::JoinError;
@@ -23,6 +24,7 @@ use crate::{
     RepositoryConfig, Storage, StorageError,
     asset_manager::AssetManager,
     change_set::{ArrayData, ChangeSet},
+    config::{ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig},
     conflicts::{Conflict, ConflictResolution, ConflictSolver},
     error::ICError,
     format::{
@@ -30,8 +32,8 @@ use crate::{
         IcechunkFormatErrorKind, ManifestId, NodeId, ObjectId, Path, SnapshotId,
         manifest::{
             ChunkInfo, ChunkPayload, ChunkRef, Manifest, ManifestExtents, ManifestRef,
-            VirtualChunkLocation, VirtualChunkRef, VirtualReferenceError,
-            VirtualReferenceErrorKind,
+            ManifestSplits, VirtualChunkLocation, VirtualChunkRef, VirtualReferenceError,
+            VirtualReferenceErrorKind, uniform_manifest_split_edges,
         },
         snapshot::{
             ArrayShape, DimensionName, ManifestFileInfo, NodeData, NodeSnapshot,
@@ -104,6 +106,8 @@ pub enum SessionErrorKind {
         "invalid chunk index: coordinates {coords:?} are not valid for array at {path}"
     )]
     InvalidIndex { coords: ChunkIndices, path: Path },
+    #[error("invalid chunk index for splitting manifests: {coords:?}")]
+    InvalidIndexForSplitManifests { coords: ChunkIndices },
     #[error("`to` snapshot ancestry doesn't include `from`")]
     BadSnapshotChainForDiff,
 }
@@ -156,6 +160,34 @@ impl From<VirtualReferenceError> for SessionError {
 
 pub type SessionResult<T> = Result<T, SessionError>;
 
+impl ManifestSplits {
+    // Returns the index of split_range that includes ChunkIndices
+    // This can be used at write time to split manifests based on the config
+    // and at read time to choose which manifest to query for chunk payload
+    pub fn which(&self, coord: &ChunkIndices) -> SessionResult<usize> {
+        // split_range[i] must bound ChunkIndices
+        // 0 <= return value <= split_range.len()
+        // it is possible that split_range does not include a coord. say we have 2x2 split grid
+        // but only split (0,0) and split (1,1) are populated with data.
+        // A coord located in (1, 0) should return Err
+        // Since split_range need not form a regular grid, we must iterate through and find the first result.
+        // ManifestExtents in split_range MUST NOT overlap with each other. How do we ensure this?
+        // ndim must be the same
+        // debug_assert_eq!(coord.0.len(), split_range[0].len());
+        // FIXME: could optimize for unbounded single manifest
+        // Note: I don't think we can distinguish between out of bounds index for the array
+        //       and an index that is part of a split that hasn't been written yet.
+        self.iter()
+            .enumerate()
+            .find(|(_, e)| e.contains(coord.0.as_slice()))
+            .map(|(i, _)| i)
+            .ok_or(
+                SessionErrorKind::InvalidIndexForSplitManifests { coords: coord.clone() }
+                    .into(),
+            )
+    }
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct Session {
     config: RepositoryConfig,
@@ -548,6 +580,8 @@ impl Session {
             }
             .into()),
             NodeData::Array { manifests, .. } => {
+                // Note: at this point, coords could be invalid for the array shape
+                //       but we just let that pass.
                 // check the chunks modified in this session first
                 // TODO: I hate rust forces me to clone to search in a hashmap. How to do better?
                 let session_chunk =
@@ -705,19 +739,33 @@ impl Session {
         manifests: &[ManifestRef],
         coords: &ChunkIndices,
     ) -> SessionResult<Option<ChunkPayload>> {
-        // FIXME: use manifest extents
-        for manifest in manifests {
-            let manifest = self.fetch_manifest(&manifest.object_id).await?;
-            match manifest.get_chunk_payload(&node, coords) {
-                Ok(payload) => {
-                    return Ok(Some(payload.clone()));
-                }
-                Err(IcechunkFormatError {
-                    kind: IcechunkFormatErrorKind::ChunkCoordinatesNotFound { .. },
-                    ..
-                }) => {}
-                Err(err) => return Err(err.into()),
+        if manifests.is_empty() {
+            // no chunks have been written, and the requested coords was not
+            // in the changeset, return None to Zarr.
+            return Ok(None);
+        }
+        let splits = ManifestSplits::from_extents(
+            manifests.iter().map(|m| m.extents.clone()).collect(),
+        );
+        let index = match splits.which(coords) {
+            Ok(index) => index,
+            // for an invalid coordinate, we bail.
+            // This happens for two cases:
+            // (1) the "coords" is out-of-range for the array shape
+            // (2) the "coords" belongs to a shard that hasn't been written yet.
+            Err(_) => return Ok(None),
+        };
+
+        let manifest = self.fetch_manifest(&manifests[index].object_id).await?;
+        match manifest.get_chunk_payload(&node, coords) {
+            Ok(payload) => {
+                return Ok(Some(payload.clone()));
             }
+            Err(IcechunkFormatError {
+                kind: IcechunkFormatErrorKind::ChunkCoordinatesNotFound { .. },
+                ..
+            }) => {}
+            Err(err) => return Err(err.into()),
         }
         Ok(None)
     }
@@ -836,6 +884,7 @@ impl Session {
                     branch_name,
                     &self.snapshot_id,
                     &self.change_set,
+                    &self.config,
                     message,
                     Some(properties),
                 )
@@ -858,6 +907,7 @@ impl Session {
                         branch_name,
                         &self.snapshot_id,
                         &self.change_set,
+                        &self.config,
                         message,
                         Some(properties),
                     )
@@ -1394,10 +1444,46 @@ pub fn construct_valid_byte_range(
     }
 }
 
+#[derive(Default, Debug)]
+struct SplitManifest {
+    from: Vec<u32>,
+    to: Vec<u32>,
+    chunks: Vec<ChunkInfo>,
+}
+
+impl SplitManifest {
+    fn update(&mut self, chunk: ChunkInfo) {
+        if self.from.is_empty() {
+            debug_assert!(self.to.is_empty());
+            debug_assert!(self.chunks.is_empty());
+            // important to remember that `to` is not inclusive, so we need +1
+            let mut coord = chunk.coord.0.clone();
+            self.to.extend(coord.iter().map(|n| *n + 1));
+            self.from.append(&mut coord);
+        } else {
+            for (existing, coord) in self.from.iter_mut().zip(chunk.coord.0.iter()) {
+                if coord < existing {
+                    *existing = *coord
+                }
+            }
+            for (existing, coord) in self.to.iter_mut().zip(chunk.coord.0.iter()) {
+                // important to remember that `to` is not inclusive, so we need +1
+                let range_value = coord + 1;
+                if range_value > *existing {
+                    *existing = range_value
+                }
+            }
+        }
+
+        self.chunks.push(chunk)
+    }
+}
+
 struct FlushProcess<'a> {
     asset_manager: Arc<AssetManager>,
     change_set: &'a ChangeSet,
     parent_id: &'a SnapshotId,
+    config: &'a RepositoryConfig,
     manifest_refs: HashMap<NodeId, Vec<ManifestRef>>,
     manifest_files: HashSet<ManifestFileInfo>,
 }
@@ -1407,52 +1493,81 @@ impl<'a> FlushProcess<'a> {
         asset_manager: Arc<AssetManager>,
         change_set: &'a ChangeSet,
         parent_id: &'a SnapshotId,
+        config: &'a RepositoryConfig,
     ) -> Self {
         Self {
             asset_manager,
             change_set,
             parent_id,
+            config,
             manifest_refs: Default::default(),
             manifest_files: Default::default(),
         }
     }
 
+    async fn write_manifests_from_iterator(
+        &mut self,
+        node_id: &NodeId,
+        chunks: impl Stream<Item = SessionResult<ChunkInfo>>,
+        splits: ManifestSplits,
+    ) -> SessionResult<()> {
+        // TODO: think about optimizing writes to manifests
+        // TODO: add benchmarks
+        let split_refs = chunks
+            .try_fold(
+                // TODO: have the changeset track this HashMap
+                HashMap::<usize, SplitManifest>::with_capacity(splits.len()),
+                |mut split_refs, chunk| async {
+                    let split_index = splits.which(&chunk.coord);
+                    split_index.map(|index| {
+                        split_refs.entry(index).or_default().update(chunk);
+                        split_refs
+                    })
+                },
+            )
+            .await?;
+
+        for (_, shard) in split_refs.into_iter() {
+            let shard_chunks =
+                stream::iter(shard.chunks.into_iter().map(Ok::<ChunkInfo, Infallible>));
+
+            if let Some(new_manifest) = Manifest::from_stream(shard_chunks).await.unwrap()
+            {
+                let new_manifest = Arc::new(new_manifest);
+                let new_manifest_size =
+                    self.asset_manager.write_manifest(Arc::clone(&new_manifest)).await?;
+
+                let file_info =
+                    ManifestFileInfo::new(new_manifest.as_ref(), new_manifest_size);
+                self.manifest_files.insert(file_info);
+
+                let new_ref = ManifestRef {
+                    object_id: new_manifest.id().clone(),
+                    extents: ManifestExtents::new(&shard.from, &shard.to),
+                };
+
+                self.manifest_refs
+                    .entry(node_id.clone())
+                    .and_modify(|v| v.push(new_ref.clone()))
+                    .or_insert_with(|| vec![new_ref]);
+            }
+        }
+
+        Ok(())
+    }
+
     /// Write a manifest for a node that was created in this session
     /// It doesn't need to look at previous manifests because the node is new
     async fn write_manifest_for_new_node(
         &mut self,
         node_id: &NodeId,
         node_path: &Path,
+        splits: ManifestSplits,
     ) -> SessionResult<()> {
-        let mut from = vec![];
-        let mut to = vec![];
         let chunks = stream::iter(
-            self.change_set
-                .new_array_chunk_iterator(node_id, node_path)
-                .map(Ok::<ChunkInfo, Infallible>),
+            self.change_set.new_array_chunk_iterator(node_id, node_path).map(Ok),
         );
-        let chunks = aggregate_extents(&mut from, &mut to, chunks, |ci| &ci.coord);
-
-        if let Some(new_manifest) = Manifest::from_stream(chunks).await.unwrap() {
-            let new_manifest = Arc::new(new_manifest);
-            let new_manifest_size =
-                self.asset_manager.write_manifest(Arc::clone(&new_manifest)).await?;
-
-            let file_info =
-                ManifestFileInfo::new(new_manifest.as_ref(), new_manifest_size);
-            self.manifest_files.insert(file_info);
-
-            let new_ref = ManifestRef {
-                object_id: new_manifest.id().clone(),
-                extents: ManifestExtents::new(&from, &to),
-            };
-
-            self.manifest_refs
-                .entry(node_id.clone())
-                .and_modify(|v| v.push(new_ref.clone()))
-                .or_insert_with(|| vec![new_ref]);
-        }
-        Ok(())
+        self.write_manifests_from_iterator(node_id, chunks, splits).await
     }
 
     /// Write a manifest for a node that was modified in this session
@@ -1461,39 +1576,19 @@ impl<'a> FlushProcess<'a> {
     async fn write_manifest_for_existing_node(
         &mut self,
         node: &NodeSnapshot,
+        splits: ManifestSplits,
     ) -> SessionResult<()> {
+        let asset_manager = Arc::clone(&self.asset_manager);
         let updated_chunks = updated_node_chunks_iterator(
-            self.asset_manager.as_ref(),
+            asset_manager.as_ref(),
             self.change_set,
             self.parent_id,
             node.clone(),
         )
         .await
         .map_ok(|(_path, chunk_info)| chunk_info);
-        let mut from = vec![];
-        let mut to = vec![];
-        let updated_chunks =
-            aggregate_extents(&mut from, &mut to, updated_chunks, |ci| &ci.coord);
-
-        if let Some(new_manifest) = Manifest::from_stream(updated_chunks).await? {
-            let new_manifest = Arc::new(new_manifest);
-            let new_manifest_size =
-                self.asset_manager.write_manifest(Arc::clone(&new_manifest)).await?;
-
-            let file_info =
-                ManifestFileInfo::new(new_manifest.as_ref(), new_manifest_size);
-            self.manifest_files.insert(file_info);
-
-            let new_ref = ManifestRef {
-                object_id: new_manifest.id().clone(),
-                extents: ManifestExtents::new(&from, &to),
-            };
-            self.manifest_refs
-                .entry(node.id.clone())
-                .and_modify(|v| v.push(new_ref.clone()))
-                .or_insert_with(|| vec![new_ref]);
-        }
-        Ok(())
+
+        self.write_manifests_from_iterator(&node.id, updated_chunks, splits).await
     }
 
     /// Record the previous manifests for an array that was not modified in the session
@@ -1524,6 +1619,89 @@ impl<'a> FlushProcess<'a> {
     }
 }
 
+impl ManifestSplitDimCondition {
+    fn matches(&self, axis: usize, dimname: Option<String>) -> bool {
+        match self {
+            ManifestSplitDimCondition::Axis(ax) => ax == &axis,
+            ManifestSplitDimCondition::DimensionName(regex) => dimname
+                .map(|name| {
+                    Regex::new(regex)
+                        .map(|regex| regex.is_match(name.as_bytes()))
+                        .unwrap_or(false)
+                })
+                .unwrap_or(false),
+            ManifestSplitDimCondition::Any => true,
+        }
+    }
+}
+
+impl ManifestSplittingConfig {
+    pub fn get_split_sizes(&self, node: &NodeSnapshot) -> SessionResult<ManifestSplits> {
+        match &node.node_data {
+            NodeData::Group => Err(SessionErrorKind::NotAnArray {
+                node: node.clone(),
+                message: "attempting to split manifest for group".to_string(),
+            }
+            .into()),
+            NodeData::Array { shape, dimension_names, .. } => {
+                let ndim = shape.len();
+                let num_chunks = shape.num_chunks().collect::<Vec<_>>();
+                let mut edges: Vec<Vec<u32>> =
+                    (0..ndim).map(|axis| vec![0, num_chunks[axis]]).collect();
+
+                // This is ugly but necessary to handle:
+                //   - path: *
+                //     manifest-split-size:
+                //     - t : 10
+                //   - path: *
+                //     manifest-split-size:
+                //     - y : 2
+                // which is now identical to:
+                //   - path: *
+                //     manifest-split-size:
+                //     - t : 10
+                //     - y : 2
+                let mut already_matched: HashSet<usize> = HashSet::new();
+
+                #[allow(clippy::expect_used)]
+                let split_sizes = self
+                    .split_sizes
+                    .clone()
+                    .or_else(|| Self::default().split_sizes)
+                    .expect("logic bug");
+
+                for (condition, dim_specs) in split_sizes.iter() {
+                    if condition.matches(&node.path) {
+                        let dimension_names = dimension_names.clone().unwrap_or(
+                            repeat_n(DimensionName::NotSpecified, ndim).collect(),
+                        );
+                        for (axis, dimname) in itertools::enumerate(dimension_names) {
+                            if already_matched.contains(&axis) {
+                                continue;
+                            }
+                            for ManifestSplitDim {
+                                condition: dim_condition,
+                                num_chunks: split_size,
+                            } in dim_specs.iter()
+                            {
+                                if dim_condition.matches(axis, dimname.clone().into()) {
+                                    edges[axis] = uniform_manifest_split_edges(
+                                        num_chunks[axis],
+                                        split_size,
+                                    );
+                                    already_matched.insert(axis);
+                                    break;
+                                };
+                            }
+                        }
+                    }
+                }
+                Ok(ManifestSplits::from_edges(edges))
+            }
+        }
+    }
+}
+
 async fn flush(
     mut flush_data: FlushProcess<'_>,
     message: &str,
@@ -1535,6 +1713,7 @@ async fn flush(
 
     let old_snapshot =
         flush_data.asset_manager.fetch_snapshot(flush_data.parent_id).await?;
+    let splitting_config = flush_data.config.manifest().splitting();
 
     // We first go through all existing nodes to see if we need to rewrite any manifests
 
@@ -1552,7 +1731,16 @@ async fn flush(
         if flush_data.change_set.has_chunk_changes(node_id) {
             trace!(path=%node.path, "Node has changes, writing a new manifest");
             // Array wasn't deleted and has changes in this session
-            flush_data.write_manifest_for_existing_node(&node).await?;
+            // get the new node to handle changes in size, e.g. appends.
+            let new_node = get_existing_node(
+                flush_data.asset_manager.as_ref(),
+                flush_data.change_set,
+                flush_data.parent_id,
+                &node.path,
+            )
+            .await?;
+            let splits = splitting_config.get_split_sizes(&new_node)?;
+            flush_data.write_manifest_for_existing_node(&node, splits).await?;
         } else {
             trace!(path=%node.path, "Node has no changes, keeping the previous manifest");
             // Array wasn't deleted but has no changes in this session
@@ -1566,7 +1754,15 @@ async fn flush(
 
     for (node_path, node_id) in flush_data.change_set.new_arrays() {
         trace!(path=%node_path, "New node, writing a manifest");
-        flush_data.write_manifest_for_new_node(node_id, node_path).await?;
+        let node = get_node(
+            &flush_data.asset_manager,
+            flush_data.change_set,
+            flush_data.parent_id,
+            node_path,
+        )
+        .await?;
+        let splits = splitting_config.get_split_sizes(&node)?;
+        flush_data.write_manifest_for_new_node(node_id, node_path, splits).await?;
     }
 
     trace!("Building new snapshot");
@@ -1679,13 +1875,14 @@ async fn do_commit(
     branch_name: &str,
     snapshot_id: &SnapshotId,
     change_set: &ChangeSet,
+    config: &RepositoryConfig,
     message: &str,
     properties: Option<SnapshotProperties>,
 ) -> SessionResult<SnapshotId> {
     info!(branch_name, old_snapshot_id=%snapshot_id, "Commit started");
     let parent_snapshot = snapshot_id.clone();
     let properties = properties.unwrap_or_default();
-    let flush_data = FlushProcess::new(asset_manager, change_set, snapshot_id);
+    let flush_data = FlushProcess::new(asset_manager, change_set, snapshot_id, config);
     let new_snapshot = flush(flush_data, message, properties).await?;
 
     debug!(branch_name, new_snapshot_id=%new_snapshot, "Updating branch");
@@ -1726,63 +1923,6 @@ async fn fetch_manifest(
     Ok(asset_manager.fetch_manifest(manifest_id, manifest_info.size_bytes).await?)
 }
 
-/// Map the iterator to accumulate the extents of the chunks traversed
-///
-/// As we are processing chunks to create a manifest, we need to keep track
-/// of the extents of the manifests. This means, for each coordinate, we need
-/// to record its minimum and maximum values.
-///
-/// This very ugly code does that, without having to traverse the iterator twice.
-/// It adapts the stream using [`StreamExt::map_ok`] and keeps a running min/max
-/// for each coordinate.
-///
-/// When the iterator is fully traversed, the min and max values will be
-/// available in `from` and `to` arguments.
-///
-/// Yes, this is horrible.
-fn aggregate_extents<'a, T: std::fmt::Debug, E>(
-    from: &'a mut Vec<u32>,
-    to: &'a mut Vec<u32>,
-    it: impl Stream<Item = Result<T, E>> + 'a,
-    extract_index: impl for<'b> Fn(&'b T) -> &'b ChunkIndices + 'a,
-) -> impl Stream<Item = Result<T, E>> + 'a {
-    // we initialize the destination with an empty array, because we don't know
-    // the dimensions of the array yet. On the first element we will re-initialize
-    *from = Vec::new();
-    *to = Vec::new();
-    it.map_ok(move |t| {
-        // these are the coordinates for the chunk
-        let idx = extract_index(&t);
-
-        // we need to initialize the mins/maxes the first time
-        // we initialize with the value of the first element
-        // this obviously doesn't work for empty streams
-        // but we never generate manifests for them
-        if from.is_empty() {
-            *from = idx.0.clone();
-            // important to remember that `to` is not inclusive, so we need +1
-            *to = idx.0.iter().map(|n| n + 1).collect();
-        } else {
-            // We need to iterate over coordinates, and update the
-            // minimum and maximum for each if needed
-            for (coord_idx, value) in idx.0.iter().enumerate() {
-                if let Some(from_current) = from.get_mut(coord_idx) {
-                    if value < from_current {
-                        *from_current = *value
-                    }
-                }
-                if let Some(to_current) = to.get_mut(coord_idx) {
-                    let range_value = value + 1;
-                    if range_value > *to_current {
-                        *to_current = range_value
-                    }
-                }
-            }
-        }
-        t
-    })
-}
-
 #[cfg(test)]
 #[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
 mod tests {
@@ -1798,13 +1938,11 @@ mod tests {
             basic_solver::{BasicConflictSolver, VersionSelection},
             detector::ConflictDetector,
         },
-        format::manifest::ManifestExtents,
+        format::manifest::{ManifestExtents, ManifestSplits},
         refs::{Ref, fetch_tag},
         repository::VersionInfo,
         storage::new_in_memory_storage,
-        strategies::{
-            ShapeDim, chunk_indices, empty_writable_session, node_paths, shapes_and_dims,
-        },
+        strategies::{ShapeDim, empty_writable_session, node_paths, shapes_and_dims},
     };
 
     use super::*;
@@ -1970,36 +2108,22 @@ mod tests {
         prop_assert!(session.delete_group(path.clone()).await.is_ok());
     }
 
-    #[proptest(async = "tokio")]
-    async fn test_aggregate_extents(
-        #[strategy(proptest::collection::vec(chunk_indices(3, 0..1_000_000), 1..50))]
-        indices: Vec<ChunkIndices>,
-    ) {
-        let mut from = vec![];
-        let mut to = vec![];
+    #[tokio::test]
+    async fn test_which_split() -> Result<(), Box<dyn Error>> {
+        let splits = ManifestSplits::from_edges(vec![vec![0, 10, 20]]);
 
-        let expected_from = vec![
-            indices.iter().map(|i| i.0[0]).min().unwrap(),
-            indices.iter().map(|i| i.0[1]).min().unwrap(),
-            indices.iter().map(|i| i.0[2]).min().unwrap(),
-        ];
-        let expected_to = vec![
-            indices.iter().map(|i| i.0[0]).max().unwrap() + 1,
-            indices.iter().map(|i| i.0[1]).max().unwrap() + 1,
-            indices.iter().map(|i| i.0[2]).max().unwrap() + 1,
-        ];
+        assert_eq!(splits.which(&ChunkIndices(vec![1])).unwrap(), 0);
+        assert_eq!(splits.which(&ChunkIndices(vec![11])).unwrap(), 1);
 
-        let _ = aggregate_extents(
-            &mut from,
-            &mut to,
-            stream::iter(indices.into_iter().map(Ok::<ChunkIndices, Infallible>)),
-            |idx| idx,
-        )
-        .count()
-        .await;
+        let edges = vec![vec![0, 10, 20], vec![0, 10, 20]];
+
+        let splits = ManifestSplits::from_edges(edges);
+        assert_eq!(splits.which(&ChunkIndices(vec![1, 1])).unwrap(), 0);
+        assert_eq!(splits.which(&ChunkIndices(vec![1, 10])).unwrap(), 1);
+        assert_eq!(splits.which(&ChunkIndices(vec![1, 11])).unwrap(), 1);
+        assert!(splits.which(&ChunkIndices(vec![21, 21])).is_err());
 
-        prop_assert_eq!(from, expected_from);
-        prop_assert_eq!(to, expected_to);
+        Ok(())
     }
 
     #[tokio_test]
diff --git a/icechunk/src/storage/logging.rs b/icechunk/src/storage/logging.rs
index 2f680fc94..60bbef059 100644
--- a/icechunk/src/storage/logging.rs
+++ b/icechunk/src/storage/logging.rs
@@ -37,6 +37,11 @@ impl LoggingStorage {
     pub fn fetch_operations(&self) -> Vec<(String, String)> {
         self.fetch_log.lock().expect("poison lock").clone()
     }
+
+    #[allow(clippy::expect_used)] // this implementation is intended for tests only
+    pub fn clear(&self) {
+        self.fetch_log.lock().expect("poison lock").clear();
+    }
 }
 
 impl fmt::Display for LoggingStorage {
diff --git a/icechunk/src/strategies.rs b/icechunk/src/strategies.rs
index 09274e4b4..2b3a20310 100644
--- a/icechunk/src/strategies.rs
+++ b/icechunk/src/strategies.rs
@@ -122,5 +122,4 @@ prop_compose! {
     pub fn chunk_indices(dim: usize, values_in: Range<u32>)(v in proptest::collection::vec(values_in, dim..(dim+1))) -> ChunkIndices {
         ChunkIndices(v)
     }
-
 }
diff --git a/icechunk/tests/test_concurrency.rs b/icechunk/tests/test_concurrency.rs
index a1552313b..aa5c02163 100644
--- a/icechunk/tests/test_concurrency.rs
+++ b/icechunk/tests/test_concurrency.rs
@@ -2,7 +2,8 @@
 use bytes::Bytes;
 use chrono::Utc;
 use icechunk::{
-    Repository, Storage,
+    Repository, RepositoryConfig, Storage,
+    config::{ManifestConfig, ManifestSplittingConfig},
     format::{ByteRange, ChunkIndices, Path, snapshot::ArrayShape},
     session::{Session, get_chunk},
     storage::new_in_memory_storage,
@@ -69,10 +70,19 @@ async fn test_concurrency_in_tigris() -> Result<(), Box<dyn std::error::Error>>
 async fn do_test_concurrency(
     storage: Arc<dyn Storage + Send + Sync>,
 ) -> Result<(), Box<dyn std::error::Error>> {
-    let repo = Repository::create(None, storage, HashMap::new()).await?;
+    let shape = ArrayShape::new(vec![(N as u64, 1), (N as u64, 1)]).unwrap();
+
+    let config = RepositoryConfig {
+        manifest: Some(ManifestConfig {
+            splitting: Some(ManifestSplittingConfig::with_size(2)),
+            ..Default::default()
+        }),
+        ..Default::default()
+    };
+    let repo = Repository::create(Some(config), storage, HashMap::new()).await?;
+
     let mut ds = repo.writable_session("main").await?;
 
-    let shape = ArrayShape::new(vec![(N as u64, 1), (N as u64, 1)]).unwrap();
     let dimension_names = Some(vec!["x".into(), "y".into()]);
     let user_data = Bytes::new();
     let new_array_path: Path = "/array".try_into().unwrap();
diff --git a/icechunk/tests/test_gc.rs b/icechunk/tests/test_gc.rs
index 05a4715b5..7e44f3a4c 100644
--- a/icechunk/tests/test_gc.rs
+++ b/icechunk/tests/test_gc.rs
@@ -8,6 +8,10 @@ use futures::{StreamExt, TryStreamExt};
 use icechunk::{
     Repository, RepositoryConfig, Storage,
     asset_manager::AssetManager,
+    config::{
+        ManifestConfig, ManifestSplitCondition, ManifestSplitDim,
+        ManifestSplitDimCondition, ManifestSplittingConfig,
+    },
     format::{ByteRange, ChunkIndices, Path, snapshot::ArrayShape},
     new_in_memory_storage,
     ops::gc::{
@@ -61,9 +65,25 @@ pub async fn do_test_gc(
     storage: Arc<dyn Storage + Send + Sync>,
 ) -> Result<(), Box<dyn std::error::Error>> {
     let storage_settings = storage.default_settings();
+
+    let shape = ArrayShape::new(vec![(1100, 1)]).unwrap();
+    let manifest_split_size = 10;
+    let split_sizes = Some(vec![(
+        ManifestSplitCondition::PathMatches { regex: r".*".to_string() },
+        vec![ManifestSplitDim {
+            condition: ManifestSplitDimCondition::Any,
+            num_chunks: manifest_split_size,
+        }],
+    )]);
+    let man_config = ManifestConfig {
+        splitting: Some(ManifestSplittingConfig { split_sizes }),
+        ..ManifestConfig::default()
+    };
+
     let repo = Repository::create(
         Some(RepositoryConfig {
             inline_chunk_threshold_bytes: Some(0),
+            manifest: Some(man_config),
             ..Default::default()
         }),
         Arc::clone(&storage),
@@ -76,8 +96,6 @@ pub async fn do_test_gc(
     let user_data = Bytes::new();
     ds.add_group(Path::root(), user_data.clone()).await?;
 
-    let shape = ArrayShape::new(vec![(1100, 1)]).unwrap();
-
     let array_path: Path = "/array".try_into().unwrap();
     ds.add_array(array_path.clone(), shape, None, user_data.clone()).await?;
     // we write more than 1k chunks to go beyond the chunk size for object listing and delete
@@ -146,13 +164,13 @@ pub async fn do_test_gc(
     )
     .await?;
     assert_eq!(summary.chunks_deleted, 10);
-    assert_eq!(summary.manifests_deleted, 1);
+    assert_eq!(summary.manifests_deleted, 110);
     assert_eq!(summary.snapshots_deleted, 1);
     assert!(summary.bytes_deleted > summary.chunks_deleted);
 
     // 10 chunks should be drop
     assert_eq!(storage.list_chunks(&storage_settings).await?.count().await, 1100);
-    assert_eq!(storage.list_manifests(&storage_settings).await?.count().await, 1);
+    assert_eq!(storage.list_manifests(&storage_settings).await?.count().await, 110);
     assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 2);
 
     // Opening the repo on main should give the right data
diff --git a/icechunk/tests/test_large_manifests.rs b/icechunk/tests/test_large_manifests.rs
new file mode 100644
index 000000000..f37e3921c
--- /dev/null
+++ b/icechunk/tests/test_large_manifests.rs
@@ -0,0 +1,111 @@
+#![allow(clippy::expect_used, clippy::unwrap_used, dead_code)]
+use bytes::Bytes;
+use icechunk::{
+    Repository, RepositoryConfig, Store,
+    config::{ManifestConfig, ManifestSplittingConfig},
+    format::{
+        ChunkIndices,
+        manifest::{ChunkPayload, VirtualChunkLocation, VirtualChunkRef},
+    },
+    session::Session,
+    storage::new_in_memory_storage,
+};
+use std::{collections::HashMap, sync::Arc};
+use tokio::{
+    sync::{RwLock, Semaphore},
+    task::JoinSet,
+};
+
+const TOTAL_NUM_REFS: usize = 100_000_000;
+const MANIFEST_SPLIT_SIZE: u32 = 1_000_000;
+const CHUNK_SIZE: u32 = 1;
+const TASK_CHUNK_SIZE: usize = 1_000; // each task writes this many refs in serial
+const NUM_TASKS: usize = TOTAL_NUM_REFS / TASK_CHUNK_SIZE;
+const MAX_CONCURRENT_TASKS: usize = 100;
+
+// #[tokio::test]
+async fn test_write_large_number_of_refs() -> Result<(), Box<dyn std::error::Error>> {
+    let storage = new_in_memory_storage().await?;
+    let config = RepositoryConfig {
+        inline_chunk_threshold_bytes: Some(128),
+        manifest: Some(ManifestConfig {
+            splitting: Some(ManifestSplittingConfig::with_size(MANIFEST_SPLIT_SIZE)),
+            ..Default::default()
+        }),
+        ..Default::default()
+    };
+    let repo = Repository::create(Some(config), storage, HashMap::new()).await?;
+    let session = Arc::new(RwLock::new(repo.writable_session("main").await?));
+    let store = Store::from_session(Arc::clone(&session)).await;
+
+    store
+        .set(
+            "zarr.json",
+            Bytes::copy_from_slice(br#"{"zarr_format":3, "node_type":"group"}"#),
+        )
+        .await?;
+
+    let array_json = format!(
+        r#"{{
+    "zarr_format": 3,
+    "node_type": "array",
+    "attributes": {{"foo": 42}},
+    "shape": [{0}],
+    "data_type": "float32",
+    "chunk_grid": {{"name": "regular", "configuration": {{"chunk_shape": [{1}]}}}},
+    "chunk_key_encoding": {{"name": "default", "configuration": {{"separator": "/"}}}},
+    "fill_value": 0.0,
+    "codecs": [{{"name": "mycodec", "configuration": {{"foo": 42}}}}],
+    "storage_transformers": [{{"name": "mytransformer", "configuration": {{"bar": 43}}}}],
+    "dimension_names": ["x"]
+}}"#,
+        TOTAL_NUM_REFS, CHUNK_SIZE
+    );
+
+    let zarr_meta = Bytes::copy_from_slice(array_json.as_ref());
+    store.set("array/zarr.json", zarr_meta).await?;
+
+    let mut set = JoinSet::new();
+
+    let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_TASKS));
+
+    dbg!("num_tasks", &NUM_TASKS);
+    for i in 0..NUM_TASKS {
+        let semaphore = semaphore.clone();
+        let cloned_session = Arc::clone(&session);
+        set.spawn(async move {
+            let _permit = semaphore.acquire().await.unwrap();
+            write_chunk_refs_batch(cloned_session, i).await;
+        });
+    }
+
+    set.join_all().await;
+
+    session.write().await.commit("first", None).await?;
+
+    Ok(())
+}
+
+async fn write_chunk_refs_batch(session: Arc<RwLock<Session>>, batch: usize) {
+    // let mut guard = session.write().await;
+    dbg!("starting batch", &batch);
+    for i in batch * TASK_CHUNK_SIZE..(batch + 1) * TASK_CHUNK_SIZE {
+        let payload = ChunkPayload::Virtual(VirtualChunkRef {
+            location: VirtualChunkLocation(format!("s3://foo/bar/{}", i)),
+            offset: 0,
+            length: 1,
+            checksum: None,
+        });
+        session
+            .write()
+            .await
+            .set_chunk_ref(
+                "/array".try_into().unwrap(),
+                ChunkIndices(vec![i as u32]),
+                Some(payload),
+            )
+            .await
+            .expect("Failed to write chunk ref");
+    }
+    dbg!("finished batch", &batch);
+}