Skip to content

Commit

Permalink
Further testing and fixing. WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
martaiborra committed Sep 23, 2024
1 parent e3f128f commit fa282d9
Show file tree
Hide file tree
Showing 11 changed files with 215 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/blosc2/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1447,7 +1447,7 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) ->
Decompression parameters. The default values are in :class:`blosc2.DParams`.
Keyword arguments supported:
cparams: :class:`blosc2.DParams`
dparams: :class:`blosc2.DParams`
All the decompression parameters that you want to use as
a :class:`blosc2.DParams` instance.
others: Any
Expand Down
42 changes: 31 additions & 11 deletions src/blosc2/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,13 +1289,15 @@ def copy(self, dtype: np.dtype = None, **kwargs: dict) -> NDArray:
"""
if dtype is None:
dtype = self.dtype
kwargs["cparams"] = kwargs.get("cparams", asdict(self.schunk.cparams)).copy()
kwargs["dparams"] = kwargs.get("dparams", asdict(self.schunk.dparams)).copy()
kwargs["cparams"] = kwargs.get("cparams").copy() if isinstance(kwargs.get("cparams"), dict) \
else asdict(self.schunk.cparams)
kwargs["dparams"] = kwargs.get("dparams").copy() if isinstance(kwargs.get("dparams"), dict) \
else asdict(self.schunk.dparams)
if "meta" not in kwargs:
# Copy metalayers as well
meta_dict = {meta: self.schunk.meta[meta] for meta in self.schunk.meta}
kwargs["meta"] = meta_dict
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)

return super().copy(dtype, **kwargs)

Expand Down Expand Up @@ -1370,7 +1372,7 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: dict) -> NDArray:
>>> print(type(c))
<class 'blosc2.ndarray.NDArray'>
"""
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
key, mask = process_key(key, self.shape)
start, stop, step = get_ndarray_start_stop(self.ndim, key, self.shape)
key = (start, stop)
Expand Down Expand Up @@ -2329,7 +2331,7 @@ def empty(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict)
dtype('int32')
"""
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2362,7 +2364,7 @@ def uninit(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict
dtype('float64')
"""
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2395,7 +2397,7 @@ def nans(shape: int | tuple | list, dtype: np.dtype = np.float64, **kwargs: dict
dtype('float64')
"""
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2434,7 +2436,7 @@ def zeros(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict)
dtype('float64')
"""
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2487,7 +2489,7 @@ def full(shape: int | tuple | list, fill_value: bytes | int | float | bool, dtyp
if dtype is None:
dtype = np.dtype(type(fill_value))
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2534,7 +2536,7 @@ def frombuffer(
>>> a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype)
"""
shape = _check_shape(shape)
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
Expand Down Expand Up @@ -2597,7 +2599,7 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra
>>> # Create a NDArray from a NumPy array
>>> nda = blosc2.asarray(a)
"""
_check_ndarray_kwargs(**kwargs)
kwargs = _check_ndarray_kwargs(**kwargs)
chunks = kwargs.pop("chunks", None)
blocks = kwargs.pop("blocks", None)
# Use the chunks and blocks from the array if they are not passed
Expand Down Expand Up @@ -2648,6 +2650,20 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra


def _check_ndarray_kwargs(**kwargs):
if "storage" in kwargs:
for key in kwargs:
if key in list(blosc2.Storage.__annotations__):
raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage")
storage = kwargs.get("storage")
del kwargs["storage"]
kwargs = {**kwargs, **asdict(storage)}
else:
cparams = kwargs.get("cparams", {})
cparams = cparams if isinstance(cparams, dict) else asdict(cparams)
dparams = kwargs.get("dparams", {})
dparams = dparams if isinstance(dparams, dict) else asdict(dparams)
kwargs["cparams"] = cparams
kwargs["dparams"] = dparams
supported_keys = [
"chunks",
"blocks",
Expand All @@ -2659,17 +2675,21 @@ def _check_ndarray_kwargs(**kwargs):
"mode",
"mmap_mode",
"initial_mapping_size",
"storage",
]
for key in kwargs:
if key not in supported_keys:
raise KeyError(
f"Only {supported_keys} are supported as keyword arguments, and you passed '{key}'"
)

if "cparams" in kwargs and "chunks" in kwargs["cparams"]:
raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead")
if "cparams" in kwargs and "blocks" in kwargs["cparams"]:
raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead")

return kwargs


def get_slice_nchunks(schunk: blosc2.SChunk,
key: tuple[(int, int)] | int | slice | Sequence[slice]
Expand Down
12 changes: 8 additions & 4 deletions src/blosc2/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class CParams:
Parameters
----------
codec: :class:`Codec`
codec: :class:`Codec` or int
The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
codec_meta: int
The metadata for the compressor code, 0 by default.
Expand All @@ -65,7 +65,7 @@ class CParams:
splitmode: :class:`SplitMode`
The split mode for the blocks.
The default value is :py:obj:`SplitMode.ALWAYS_SPLIT <SplitMode>`.
filters: :class:`Filter` list
filters: :class:`Filter` or int list
The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
:py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
Expand All @@ -74,15 +74,15 @@ class CParams:
tuner: :class:`Tuner`
The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
"""
codec: blosc2.Codec = blosc2.Codec.ZSTD
codec: blosc2.Codec | int = blosc2.Codec.ZSTD
codec_meta: int = 0
clevel: int = 1
use_dict: bool = False
typesize: int = 8
nthreads: int = field(default_factory=default_nthreads)
blocksize: int = 0
splitmode: blosc2.SplitMode = blosc2.SplitMode.ALWAYS_SPLIT
filters: list[blosc2.Filter] = field(default_factory=default_filters)
filters: list[blosc2.Filter | int] = field(default_factory=default_filters)
filters_meta: list[int] = field(default_factory=default_filters_meta)
tuner: blosc2.Tuner = blosc2.Tuner.STUNE

Expand All @@ -95,6 +95,10 @@ def __post_init__(self):
if len(self.filters) > len(self.filters_meta):
raise ValueError("Number of filters cannot exceed number of filters meta")

for i in range(len(self.filters)):
if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA:
self.filters_meta[i] = self.typesize


@dataclass
class DParams:
Expand Down
6 changes: 2 additions & 4 deletions tests/ndarray/test_c2array_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su
chunked_eval=chunked_eval,
chunks=chunks,
blocks=blocks,
urlpath=urlpath,
contiguous=contiguous,
dparams=dparams,
storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous, dparams=dparams),
)
lazy_eval = expr[slices]
np.testing.assert_allclose(lazy_eval, npc[slices])
Expand All @@ -107,6 +105,6 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su
assert res.schunk.urlpath is None
assert res.schunk.contiguous == contiguous
# Check dparams after a getitem and an eval
assert res.schunk.dparams["nthreads"] == dparams["nthreads"]
assert res.schunk.dparams.nthreads == dparams["nthreads"]

blosc2.remove_urlpath(urlpath)
4 changes: 2 additions & 2 deletions tests/ndarray/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_copy(shape, chunks1, blocks1, chunks2, blocks2, dtype):
typesize = dtype.itemsize
size = int(np.prod(shape))
buffer = bytes(size * typesize)
cparams1 = {"clevel": 2}
cparams1 = blosc2.CParams(clevel=2)
a = blosc2.frombuffer(buffer, shape, dtype=dtype, chunks=chunks1, blocks=blocks1, cparams=cparams1)
cparams2 = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]}
b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams2)
Expand Down Expand Up @@ -63,7 +63,7 @@ def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype):
else:
nparray = np.arange(size, dtype=dtype).reshape(shape)
a = blosc2.asarray(nparray, chunks=chunks1, blocks=blocks1)
cparams = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]}
cparams = blosc2.CParams(clevel=5, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])
b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams)
assert b.dtype == nparray.dtype
if dtype.kind == "V":
Expand Down
11 changes: 6 additions & 5 deletions tests/ndarray/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,17 @@
def test_empty(shape, chunks, blocks, dtype, cparams, urlpath, contiguous):
blosc2.remove_urlpath(urlpath)
filters = cparams["filters"]
cparams["filters_meta"] = [0] * len(filters)
storage = blosc2.Storage(cparams=blosc2.CParams(**cparams),
dparams={"nthreads": 2},
urlpath=urlpath,
contiguous=contiguous,
)
a = blosc2.empty(
shape,
chunks=chunks,
blocks=blocks,
dtype=dtype,
cparams=cparams,
dparams={"nthreads": 2},
urlpath=urlpath,
contiguous=contiguous,
storage=storage,
)

dtype = np.dtype(dtype)
Expand Down
6 changes: 3 additions & 3 deletions tests/ndarray/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
(10, 10),
b"sun",
None,
{"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2},
blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2),
{"nthreads": 2},
"full.b2nd",
True,
Expand All @@ -55,7 +55,7 @@
(11, 11),
123456789,
None,
{"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2},
blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2),
{"nthreads": 2},
None,
True,
Expand All @@ -71,7 +71,7 @@ def test_full(shape, chunks, blocks, fill_value, cparams, dparams, dtype, urlpat
blocks=blocks,
dtype=dtype,
cparams=cparams,
dparams=dparams,
dparams=blosc2.DParams(**dparams),
urlpath=urlpath,
contiguous=contiguous,
)
Expand Down
8 changes: 4 additions & 4 deletions tests/ndarray/test_lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,14 +448,14 @@ def test_params(array_fixture):

urlpath = "eval_expr.b2nd"
blosc2.remove_urlpath(urlpath)
cparams = {"nthreads": 2}
cparams = blosc2.CParams(nthreads=2)
dparams = {"nthreads": 4}
chunks = tuple(i // 2 for i in nres.shape)
blocks = tuple(i // 4 for i in nres.shape)
res = expr.eval(urlpath=urlpath, cparams=cparams, dparams=dparams, chunks=chunks, blocks=blocks)
np.testing.assert_allclose(res[:], nres)
assert res.schunk.urlpath == urlpath
assert res.schunk.cparams.nthreads == cparams["nthreads"]
assert res.schunk.cparams.nthreads == cparams.nthreads
assert res.schunk.dparams.nthreads == dparams["nthreads"]
assert res.chunks == chunks
assert res.blocks == blocks
Expand Down Expand Up @@ -493,8 +493,8 @@ def test_save():
chunks = tuple(i // 2 for i in nres.shape)
blocks = tuple(i // 4 for i in nres.shape)
urlpath_eval = "eval_expr.b2nd"
res = expr.eval(
urlpath=urlpath_eval, cparams=cparams, dparams=dparams, mode="w", chunks=chunks, blocks=blocks
res = expr.eval(storage=blosc2.Storage(urlpath=urlpath_eval, cparams=cparams, dparams=dparams, mode="w"),
chunks=chunks, blocks=blocks
)
np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol)

Expand Down
Loading

0 comments on commit fa282d9

Please sign in to comment.