Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 0 additions & 144 deletions icechunk-python/tests/test_zarr/test_stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
from collections.abc import Callable
from typing import Any, TypeVar, cast

import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
import numpy as np
import pytest
from hypothesis import assume, note, settings
from hypothesis.stateful import (
invariant,
precondition,
rule,
run_state_machine_as_test,
Expand All @@ -24,10 +22,8 @@
from zarr.core.buffer import default_buffer_prototype
from zarr.testing.stateful import ZarrHierarchyStateMachine
from zarr.testing.strategies import (
basic_indices,
node_names,
np_array_and_chunks,
orthogonal_indices,
)

PROTOTYPE = default_buffer_prototype()
Expand Down Expand Up @@ -236,126 +232,6 @@ def check_list_dir(self, data: st.DataObject) -> None:
# In Icechunk, we always return the `c` so ignore this inconsistency.
assert model_ls == store_ls, (model_ls, store_ls)

##### TODO: port everything below to zarr
@precondition(lambda self: bool(self.all_arrays))
@rule(data=st.data())
def check_array(self, data: st.DataObject) -> None:
path = data.draw(st.sampled_from(sorted(self.all_arrays)))
actual = zarr.open_array(self.store, path=path)[:]
expected = zarr.open_array(self.model, path=path)[:]
np.testing.assert_equal(actual, expected)

@precondition(lambda self: bool(self.all_arrays))
@rule(data=st.data())
def overwrite_array_orthogonal_indexing(self, data: st.DataObject) -> None:
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
model_array = zarr.open_array(path=array, store=self.model)
store_array = zarr.open_array(path=array, store=self.store)
indexer, _ = data.draw(orthogonal_indices(shape=model_array.shape))
note(f"overwriting array orthogonal {indexer=}")
shape = model_array.oindex[indexer].shape # type: ignore[union-attr]
new_data = data.draw(npst.arrays(shape=shape, dtype=model_array.dtype))
model_array.oindex[indexer] = new_data
store_array.oindex[indexer] = new_data

##### TODO: delete after next Zarr release (Jun 18, 2025)
@rule()
@with_frequency(0.25)
def clear(self) -> None:
note("clearing")
import zarr

self._sync(self.store.clear())
self._sync(self.model.clear())

assert self._sync(self.store.is_empty("/"))
assert self._sync(self.model.is_empty("/"))

self.all_groups.clear()
self.all_arrays.clear()

zarr.group(store=self.store)
zarr.group(store=self.model)

assert not self._sync(self.store.is_empty("/"))
# TODO: MemoryStore is broken?
# assert not self._sync(self.model.is_empty("/"))

def draw_directory(self, data: st.DataObject) -> str:
group_st = (
st.sampled_from(sorted(self.all_groups)) if self.all_groups else st.nothing()
)
array_st = (
st.sampled_from(sorted(self.all_arrays)) if self.all_arrays else st.nothing()
)
array_or_group = data.draw(st.one_of(group_st, array_st))
if data.draw(st.booleans()) and array_or_group in self.all_arrays:
arr = zarr.open_array(path=array_or_group, store=self.model)
path = data.draw(
st.one_of(
st.sampled_from([array_or_group]),
chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape).map(
lambda x: f"{array_or_group}/c/"
),
)
)
else:
path = array_or_group
return path

@precondition(lambda self: bool(self.all_arrays))
@rule(data=st.data())
def delete_chunk(self, data: st.DataObject) -> None:
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
arr = zarr.open_array(path=array, store=self.model)
chunk_path = data.draw(
chunk_paths(ndim=arr.ndim, numblocks=arr.cdata_shape, subset=False)
)
path = f"{array}/c/{chunk_path}"
note(f"deleting chunk {path=!r}")
self._sync(self.model.delete(path))
self._sync(self.store.delete(path))

@precondition(lambda self: bool(self.all_arrays))
@rule(data=st.data())
def overwrite_array_basic_indexing(self, data: st.DataObject) -> None:
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
model_array = zarr.open_array(path=array, store=self.model)
store_array = zarr.open_array(path=array, store=self.store)
slicer = data.draw(basic_indices(shape=model_array.shape))
note(f"overwriting array basic {slicer=}")
shape = model_array[slicer].shape # type: ignore [union-attr]
new_data = data.draw(npst.arrays(shape=shape, dtype=model_array.dtype))
model_array[slicer] = new_data
store_array[slicer] = new_data

@precondition(lambda self: bool(self.all_arrays))
@rule(data=st.data())
def resize_array(self, data: st.DataObject) -> None:
array = data.draw(st.sampled_from(sorted(self.all_arrays)))
model_array = zarr.open_array(path=array, store=self.model)
store_array = zarr.open_array(path=array, store=self.store)
ndim = model_array.ndim
new_shape = data.draw(npst.array_shapes(max_dims=ndim, min_dims=ndim, min_side=1))
note(f"resizing array from {model_array.shape} to {new_shape}")
model_array.resize(new_shape)
store_array.resize(new_shape)

@precondition(lambda self: bool(self.all_arrays) or bool(self.all_groups))
@rule(data=st.data())
def delete_dir(self, data: st.DataObject) -> None:
path = self.draw_directory(data)
note(f"delete_dir with {path=!r}")
self._sync(self.model.delete_dir(path))
self._sync(self.store.delete_dir(path))

matches = set()
for node in self.all_groups | self.all_arrays:
if node.startswith(path):
matches.add(node)
self.all_groups = self.all_groups - matches
self.all_arrays = self.all_arrays - matches

@rule()
def pickle_objects(self) -> None:
if not self.store.session.has_uncommitted_changes:
Expand All @@ -364,26 +240,6 @@ def pickle_objects(self) -> None:

pickle.loads(pickle.dumps(self.repo))

@invariant()
def check_list_prefix_from_root(self) -> None:
model_list = self._sync_iter(self.model.list_prefix(""))
store_list = self._sync_iter(self.store.list_prefix(""))
note(f"Checking {len(model_list)} expected keys vs {len(store_list)} actual keys")
assert sorted(model_list) == sorted(store_list), (
sorted(model_list),
sorted(store_list),
)

# check that our internal state matches that of the store and model
assert all(
f"{path}/zarr.json" in model_list
for path in self.all_groups | self.all_arrays
)
assert all(
f"{path}/zarr.json" in store_list
for path in self.all_groups | self.all_arrays
)


def test_zarr_hierarchy() -> None:
def mk_test_instance_sync() -> ModifiedZarrHierarchyStateMachine:
Expand Down
Loading