Further testing and fixing. WIP

Blosc · Sep 23, 2024 · fa282d9 · fa282d9
1 parent e3f128f
commit fa282d9
Show file tree

Hide file tree

Showing 11 changed files with 215 additions and 38 deletions.
diff --git a/src/blosc2/core.py b/src/blosc2/core.py
@@ -1447,7 +1447,7 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) ->
         Decompression parameters. The default values are in :class:`blosc2.DParams`.
         Keyword arguments supported:
 
-            cparams: :class:`blosc2.DParams`
+            dparams: :class:`blosc2.DParams`
                 All the decompression parameters that you want to use as
                 a :class:`blosc2.DParams` instance.
             others: Any

diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py
@@ -1289,13 +1289,15 @@ def copy(self, dtype: np.dtype = None, **kwargs: dict) -> NDArray:
         """
         if dtype is None:
             dtype = self.dtype
-        kwargs["cparams"] = kwargs.get("cparams", asdict(self.schunk.cparams)).copy()
-        kwargs["dparams"] = kwargs.get("dparams", asdict(self.schunk.dparams)).copy()
+        kwargs["cparams"] = kwargs.get("cparams").copy() if isinstance(kwargs.get("cparams"), dict) \
+            else asdict(self.schunk.cparams)
+        kwargs["dparams"] = kwargs.get("dparams").copy() if isinstance(kwargs.get("dparams"), dict) \
+            else asdict(self.schunk.dparams)
         if "meta" not in kwargs:
             # Copy metalayers as well
             meta_dict = {meta: self.schunk.meta[meta] for meta in self.schunk.meta}
             kwargs["meta"] = meta_dict
-        _check_ndarray_kwargs(**kwargs)
+        kwargs = _check_ndarray_kwargs(**kwargs)
 
         return super().copy(dtype, **kwargs)
 
@@ -1370,7 +1372,7 @@ def slice(self, key: int | slice | Sequence[slice], **kwargs: dict) -> NDArray:
         >>> print(type(c))
         <class 'blosc2.ndarray.NDArray'>
         """
-        _check_ndarray_kwargs(**kwargs)
+        kwargs = _check_ndarray_kwargs(**kwargs)
         key, mask = process_key(key, self.shape)
         start, stop, step = get_ndarray_start_stop(self.ndim, key, self.shape)
         key = (start, stop)
@@ -2329,7 +2331,7 @@ def empty(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict)
     dtype('int32')
     """
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2362,7 +2364,7 @@ def uninit(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict
     dtype('float64')
     """
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2395,7 +2397,7 @@ def nans(shape: int | tuple | list, dtype: np.dtype = np.float64, **kwargs: dict
     dtype('float64')
     """
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2434,7 +2436,7 @@ def zeros(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict)
     dtype('float64')
     """
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2487,7 +2489,7 @@ def full(shape: int | tuple | list, fill_value: bytes | int | float | bool, dtyp
     if dtype is None:
         dtype = np.dtype(type(fill_value))
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2534,7 +2536,7 @@ def frombuffer(
     >>> a = blosc2.frombuffer(buffer, shape, chunks=chunks, dtype=dtype)
     """
     shape = _check_shape(shape)
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     chunks, blocks = compute_chunks_blocks(shape, chunks, blocks, dtype, **kwargs)
@@ -2597,7 +2599,7 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra
     >>> # Create a NDArray from a NumPy array
     >>> nda = blosc2.asarray(a)
     """
-    _check_ndarray_kwargs(**kwargs)
+    kwargs = _check_ndarray_kwargs(**kwargs)
     chunks = kwargs.pop("chunks", None)
     blocks = kwargs.pop("blocks", None)
     # Use the chunks and blocks from the array if they are not passed
@@ -2648,6 +2650,20 @@ def asarray(array: np.ndarray | blosc2.C2Array, **kwargs: dict | list) -> NDArra
 
 
 def _check_ndarray_kwargs(**kwargs):
+    if "storage" in kwargs:
+        for key in kwargs:
+            if key in list(blosc2.Storage.__annotations__):
+                raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage")
+        storage = kwargs.get("storage")
+        del kwargs["storage"]
+        kwargs = {**kwargs, **asdict(storage)}
+    else:
+        cparams = kwargs.get("cparams", {})
+        cparams = cparams if isinstance(cparams, dict) else asdict(cparams)
+        dparams = kwargs.get("dparams", {})
+        dparams = dparams if isinstance(dparams, dict) else asdict(dparams)
+        kwargs["cparams"] = cparams
+        kwargs["dparams"] = dparams
     supported_keys = [
         "chunks",
         "blocks",
@@ -2659,17 +2675,21 @@ def _check_ndarray_kwargs(**kwargs):
         "mode",
         "mmap_mode",
         "initial_mapping_size",
+        "storage",
     ]
     for key in kwargs:
         if key not in supported_keys:
             raise KeyError(
                 f"Only {supported_keys} are supported as keyword arguments, and you passed '{key}'"
             )
+
     if "cparams" in kwargs and "chunks" in kwargs["cparams"]:
         raise ValueError("You cannot pass chunks in cparams, use `chunks` argument instead")
     if "cparams" in kwargs and "blocks" in kwargs["cparams"]:
         raise ValueError("You cannot pass chunks in cparams, use `blocks` argument instead")
 
+    return kwargs
+
 
 def get_slice_nchunks(schunk: blosc2.SChunk,
                       key: tuple[(int, int)] | int | slice | Sequence[slice]

diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py
@@ -44,7 +44,7 @@ class CParams:
 
     Parameters
     ----------
-    codec: :class:`Codec`
+    codec: :class:`Codec` or int
         The compressor code. Default is :py:obj:`Codec.ZSTD <Codec>`.
     codec_meta: int
         The metadata for the compressor code, 0 by default.
@@ -65,7 +65,7 @@ class CParams:
     splitmode: :class:`SplitMode`
         The split mode for the blocks.
         The default value is :py:obj:`SplitMode.ALWAYS_SPLIT <SplitMode>`.
-    filters: :class:`Filter` list
+    filters: :class:`Filter` or int list
         The sequence of filters. Default: [:py:obj:`Filter.NOFILTER <Filter>`,
         :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.NOFILTER <Filter>`,
         :py:obj:`Filter.NOFILTER <Filter>`, :py:obj:`Filter.SHUFFLE <Filter>`].
@@ -74,15 +74,15 @@ class CParams:
     tuner: :class:`Tuner`
         The tuner to use. Default: :py:obj:`Tuner.STUNE <Tuner>`.
     """
-    codec: blosc2.Codec = blosc2.Codec.ZSTD
+    codec: blosc2.Codec | int = blosc2.Codec.ZSTD
     codec_meta: int = 0
     clevel: int = 1
     use_dict: bool = False
     typesize: int = 8
     nthreads: int = field(default_factory=default_nthreads)
     blocksize: int = 0
     splitmode: blosc2.SplitMode = blosc2.SplitMode.ALWAYS_SPLIT
-    filters: list[blosc2.Filter] = field(default_factory=default_filters)
+    filters: list[blosc2.Filter | int] = field(default_factory=default_filters)
     filters_meta: list[int] = field(default_factory=default_filters_meta)
     tuner: blosc2.Tuner = blosc2.Tuner.STUNE
 
@@ -95,6 +95,10 @@ def __post_init__(self):
         if len(self.filters) > len(self.filters_meta):
             raise ValueError("Number of filters cannot exceed number of filters meta")
 
+        for i in range(len(self.filters)):
+            if self.filters_meta[i] == 0 and self.filters[i] == blosc2.Filter.BYTEDELTA:
+                self.filters_meta[i] = self.typesize
+
 
 @dataclass
 class DParams:

diff --git a/tests/ndarray/test_c2array_udf.py b/tests/ndarray/test_c2array_udf.py
@@ -95,9 +95,7 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su
         chunked_eval=chunked_eval,
         chunks=chunks,
         blocks=blocks,
-        urlpath=urlpath,
-        contiguous=contiguous,
-        dparams=dparams,
+        storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous, dparams=dparams),
     )
     lazy_eval = expr[slices]
     np.testing.assert_allclose(lazy_eval, npc[slices])
@@ -107,6 +105,6 @@ def test_getitem(chunks, blocks, slices, urlpath, contiguous, chunked_eval, c2su
     assert res.schunk.urlpath is None
     assert res.schunk.contiguous == contiguous
     # Check dparams after a getitem and an eval
-    assert res.schunk.dparams["nthreads"] == dparams["nthreads"]
+    assert res.schunk.dparams.nthreads == dparams["nthreads"]
 
     blosc2.remove_urlpath(urlpath)
diff --git a/tests/ndarray/test_copy.py b/tests/ndarray/test_copy.py
@@ -27,7 +27,7 @@ def test_copy(shape, chunks1, blocks1, chunks2, blocks2, dtype):
     typesize = dtype.itemsize
     size = int(np.prod(shape))
     buffer = bytes(size * typesize)
-    cparams1 = {"clevel": 2}
+    cparams1 = blosc2.CParams(clevel=2)
     a = blosc2.frombuffer(buffer, shape, dtype=dtype, chunks=chunks1, blocks=blocks1, cparams=cparams1)
     cparams2 = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]}
     b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams2)
@@ -63,7 +63,7 @@ def test_copy_numpy(shape, chunks1, blocks1, chunks2, blocks2, dtype):
     else:
         nparray = np.arange(size, dtype=dtype).reshape(shape)
     a = blosc2.asarray(nparray, chunks=chunks1, blocks=blocks1)
-    cparams = {"clevel": 5, "filters": [blosc2.Filter.BITSHUFFLE], "filters_meta": [0]}
+    cparams = blosc2.CParams(clevel=5, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])
     b = a.copy(chunks=chunks2, blocks=blocks2, cparams=cparams)
     assert b.dtype == nparray.dtype
     if dtype.kind == "V":

diff --git a/tests/ndarray/test_empty.py b/tests/ndarray/test_empty.py
@@ -65,16 +65,17 @@
 def test_empty(shape, chunks, blocks, dtype, cparams, urlpath, contiguous):
     blosc2.remove_urlpath(urlpath)
     filters = cparams["filters"]
-    cparams["filters_meta"] = [0] * len(filters)
+    storage = blosc2.Storage(cparams=blosc2.CParams(**cparams),
+                               dparams={"nthreads": 2},
+                               urlpath=urlpath,
+                               contiguous=contiguous,
+                               )
     a = blosc2.empty(
         shape,
         chunks=chunks,
         blocks=blocks,
         dtype=dtype,
-        cparams=cparams,
-        dparams={"nthreads": 2},
-        urlpath=urlpath,
-        contiguous=contiguous,
+        storage=storage,
     )
 
     dtype = np.dtype(dtype)

diff --git a/tests/ndarray/test_full.py b/tests/ndarray/test_full.py
@@ -33,7 +33,7 @@
             (10, 10),
             b"sun",
             None,
-            {"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2},
+            blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2),
             {"nthreads": 2},
             "full.b2nd",
             True,
@@ -55,7 +55,7 @@
             (11, 11),
             123456789,
             None,
-            {"codec": blosc2.Codec.LZ4HC, "clevel": 8, "use_dict": False, "nthreads": 2},
+            blosc2.CParams(codec=blosc2.Codec.LZ4HC, clevel=8, use_dict=False, nthreads=2),
             {"nthreads": 2},
             None,
             True,
@@ -71,7 +71,7 @@ def test_full(shape, chunks, blocks, fill_value, cparams, dparams, dtype, urlpat
         blocks=blocks,
         dtype=dtype,
         cparams=cparams,
-        dparams=dparams,
+        dparams=blosc2.DParams(**dparams),
         urlpath=urlpath,
         contiguous=contiguous,
     )

diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
@@ -448,14 +448,14 @@ def test_params(array_fixture):
 
     urlpath = "eval_expr.b2nd"
     blosc2.remove_urlpath(urlpath)
-    cparams = {"nthreads": 2}
+    cparams = blosc2.CParams(nthreads=2)
     dparams = {"nthreads": 4}
     chunks = tuple(i // 2 for i in nres.shape)
     blocks = tuple(i // 4 for i in nres.shape)
     res = expr.eval(urlpath=urlpath, cparams=cparams, dparams=dparams, chunks=chunks, blocks=blocks)
     np.testing.assert_allclose(res[:], nres)
     assert res.schunk.urlpath == urlpath
-    assert res.schunk.cparams.nthreads == cparams["nthreads"]
+    assert res.schunk.cparams.nthreads == cparams.nthreads
     assert res.schunk.dparams.nthreads == dparams["nthreads"]
     assert res.chunks == chunks
     assert res.blocks == blocks
@@ -493,8 +493,8 @@ def test_save():
     chunks = tuple(i // 2 for i in nres.shape)
     blocks = tuple(i // 4 for i in nres.shape)
     urlpath_eval = "eval_expr.b2nd"
-    res = expr.eval(
-        urlpath=urlpath_eval, cparams=cparams, dparams=dparams, mode="w", chunks=chunks, blocks=blocks
+    res = expr.eval(storage=blosc2.Storage(urlpath=urlpath_eval, cparams=cparams, dparams=dparams, mode="w"),
+                    chunks=chunks, blocks=blocks
     )
     np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol)