From f0603be526a9080b65475b7ab04568c96b27ded7 Mon Sep 17 00:00:00 2001 From: oumaima-ech-chdig Date: Mon, 7 Oct 2024 13:59:54 +0200 Subject: [PATCH 1/4] Update of the Initialization of CParams, DParams, and Storage in Blosc2 --- examples/btune.py | 8 ++++---- examples/filler.py | 2 +- examples/pack_tensor.py | 7 +------ examples/postfilter1.py | 8 ++++---- examples/postfilter2.py | 8 ++++---- examples/postfilter3.py | 8 ++++---- examples/prefilter.py | 8 ++++---- examples/schunk.py | 10 +++++----- 8 files changed, 27 insertions(+), 32 deletions(-) diff --git a/examples/btune.py b/examples/btune.py index 6f2b414c..8b7bc44a 100644 --- a/examples/btune.py +++ b/examples/btune.py @@ -18,12 +18,12 @@ nchunks = 10 # Set the compression and decompression parameters, use BTUNE tuner -cparams = {"codec": blosc2.Codec.LZ4HC, "typesize": 4, "tuner": blosc2.Tuner.BTUNE} -dparams = {} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4, tuner=blosc2.Tuner.BTUNE) +dparams = blosc2.DParams() contiguous = True urlpath = "filename" -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode='a') blosc2.remove_urlpath(urlpath) # Set the Btune configuration to use @@ -32,7 +32,7 @@ # Create the SChunk data = np.arange(200 * 1000 * nchunks) -schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, **storage) +schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage) # Check data can be retrieved correctly data2 = np.empty(data.shape, dtype=data.dtype) diff --git a/examples/filler.py b/examples/filler.py index 7f8b1e82..bfd1c1a2 100644 --- a/examples/filler.py +++ b/examples/filler.py @@ -17,7 +17,7 @@ schunk_dtype = np.dtype(np.float64) # Set the compression parameters. We need nthreads=1 for this example. -cparams = {"typesize": schunk_dtype.itemsize, "nthreads": 1} +cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) # Create empty SChunk schunk = blosc2.SChunk(chunksize=chunk_len * schunk_dtype.itemsize, cparams=cparams) diff --git a/examples/pack_tensor.py b/examples/pack_tensor.py index f2b2ced9..82ec1e44 100644 --- a/examples/pack_tensor.py +++ b/examples/pack_tensor.py @@ -15,12 +15,7 @@ a = np.arange(1_000_000) -cparams = { - "codec": blosc2.Codec.ZSTD, - "clevel": 9, - "filters": [blosc2.Filter.BITSHUFFLE], - "filters_meta": [0], -} +cparams = blosc2.CParams(codec=blosc2.Codec.ZSTD, clevel=9, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0]) cframe = blosc2.pack_tensor(a, cparams=cparams) print("Length of packed array in bytes:", len(cframe)) diff --git a/examples/postfilter1.py b/examples/postfilter1.py index a734284e..a69a2385 100644 --- a/examples/postfilter1.py +++ b/examples/postfilter1.py @@ -15,16 +15,16 @@ output_dtype = np.dtype(np.float32) # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.LZ4, "typesize": 4} -dparams = {"nthreads": 1} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=4) +dparams = blosc2.DParams(nthreads=1) contiguous = True urlpath = None -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode='a') # Remove previous SChunk blosc2.remove_urlpath(urlpath) # Create and set data data = np.arange(200 * 1000 * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, data=data, **storage) +schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage) out1 = np.empty(200 * 1000 * nchunks, dtype=input_dtype) schunk.get_slice(0, 200 * 1000 * nchunks, out=out1) diff --git a/examples/postfilter2.py b/examples/postfilter2.py index c0069689..088cd95f 100644 --- a/examples/postfilter2.py +++ b/examples/postfilter2.py @@ -15,17 +15,17 @@ output_dtype = np.int64 # output dtype has to be of the same size as input # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.LZ4, "typesize": input_dtype.itemsize} -dparams = {"nthreads": 1} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) +dparams = blosc2.DParams(nthreads=1) contiguous = True urlpath = "filename" -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode='a') # Remove previous SChunk blosc2.remove_urlpath(urlpath) # Create and set data chunkshape = 200 * 1000 data = np.arange(0, chunkshape * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk(chunksize=chunkshape * input_dtype.itemsize, data=data, **storage) +schunk = blosc2.SChunk(chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage) out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) schunk.get_slice(0, chunkshape * nchunks, out=out1) diff --git a/examples/postfilter3.py b/examples/postfilter3.py index 8bbce1ec..53280fb6 100644 --- a/examples/postfilter3.py +++ b/examples/postfilter3.py @@ -14,17 +14,17 @@ input_dtype = np.dtype(np.int64) # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.LZ4, "typesize": input_dtype.itemsize} -dparams = {"nthreads": 1} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4, typesize=input_dtype.itemsize) +dparams = blosc2.DParams(nthreads=1) contiguous = False urlpath = None -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode='a') # Remove previous SChunk blosc2.remove_urlpath(urlpath) # Create and set data chunkshape = 20_000 data = np.zeros(chunkshape * nchunks, dtype=input_dtype) -schunk = blosc2.SChunk(chunksize=chunkshape * input_dtype.itemsize, data=data, **storage) +schunk = blosc2.SChunk(chunksize=chunkshape * input_dtype.itemsize, data=data, cparams=cparams, dparams=dparams, storage=storage) out1 = np.empty(chunkshape * nchunks, dtype=input_dtype) schunk.get_slice(0, chunkshape * nchunks, out=out1) diff --git a/examples/prefilter.py b/examples/prefilter.py index e181d72b..b9507f6f 100644 --- a/examples/prefilter.py +++ b/examples/prefilter.py @@ -17,11 +17,11 @@ output_dtype = np.dtype(np.float32) # Set the compression and decompression parameters -cparams = {"typesize": 4, "nthreads": 1} -dparams = {"nthreads": 4} -storage = {"cparams": cparams, "dparams": dparams} +cparams = blosc2.CParams(typesize=4, nthreads=1) +dparams = blosc2.DParams(nthreads=4) +storage = blosc2.Storage(mode='a') # Create empty schunk -schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, **storage) +schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams, dparams=dparams, storage=storage) # Set prefilter with decorator diff --git a/examples/schunk.py b/examples/schunk.py index 0bd458cf..2487285d 100644 --- a/examples/schunk.py +++ b/examples/schunk.py @@ -12,19 +12,19 @@ nchunks = 10 # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.LZ4HC, "typesize": 4} -dparams = {} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) +dparams = blosc2.DParams() contiguous = True urlpath = "filename" -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode='a') blosc2.remove_urlpath(urlpath) numpy_meta = {b"dtype": str(np.dtype("int32"))} test_meta = {b"lorem": 1234} meta = {"numpy": numpy_meta, "test": test_meta} # Create the empty SChunk -schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, meta=meta, **storage) +schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, meta=meta, cparams=cparams, dparams=dparams) # Append some chunks for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") @@ -54,7 +54,7 @@ # Update a chunk compressing the data first buffer = 11 * np.arange(200 * 1000, dtype="int32") -chunk = blosc2.compress2(buffer, **cparams) +chunk = blosc2.compress2(buffer, cparams=cparams) schunk.update_chunk(7, chunk) # Delete the 4th chunk From 5552be221f78aa07c714ec87e5456a2fb73317d1 Mon Sep 17 00:00:00 2001 From: oumaima-ech-chdig Date: Tue, 8 Oct 2024 10:33:02 +0200 Subject: [PATCH 2/4] More updated examples --- .../tutorials/00.schunk-basics.ipynb | 203 ++++---- .../01.schunk-slicing_and_beyond.ipynb | 135 +++--- .../tutorials/02.ndarray-basics.ipynb | 446 +++++++----------- .../tutorials/03.lazyarray-expressions.ipynb | 60 +-- .../tutorials/04.reductions.ipynb | 122 ++--- .../tutorials/10.ucodecs-ufilters.ipynb | 53 ++- examples/ndarray/bytedelta_filter.py | 4 +- examples/ndarray/empty_.py | 14 +- examples/ndarray/ndmean.py | 2 +- examples/ndarray/zfp_codec.py | 2 +- examples/schunk_roundtrip.py | 10 +- examples/ucodecs.py | 12 +- examples/ufilters.py | 11 +- 13 files changed, 510 insertions(+), 564 deletions(-) diff --git a/doc/getting_started/tutorials/00.schunk-basics.ipynb b/doc/getting_started/tutorials/00.schunk-basics.ipynb index c8fd71b5..b6a3a5a1 100644 --- a/doc/getting_started/tutorials/00.schunk-basics.ipynb +++ b/doc/getting_started/tutorials/00.schunk-basics.ipynb @@ -14,8 +14,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:56:06.847395Z", - "start_time": "2023-06-20T08:56:04.891386Z" + "end_time": "2024-10-08T07:48:22.080821Z", + "start_time": "2024-10-08T07:48:20.212845Z" } }, "outputs": [], @@ -36,32 +36,30 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 18, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:56:35.213472Z", - "start_time": "2023-06-20T08:56:35.185848Z" + "end_time": "2024-10-08T07:50:53.469230Z", + "start_time": "2024-10-08T07:50:53.461168Z" } }, "outputs": [], "source": [ - "cparams = {\n", - " \"codec\": blosc2.Codec.BLOSCLZ,\n", - " \"typesize\": 4,\n", - " \"nthreads\": 8,\n", - "}\n", + "cparams = blosc2.CParams(\n", + " codec=blosc2.Codec.BLOSCLZ,\n", + " typesize=4,\n", + " nthreads=8,\n", + ")\n", "\n", - "dparams = {\n", - " \"nthreads\": 16,\n", - "}\n", + "dparams = blosc2.DParams(\n", + " nthreads=16,\n", + ")\n", "\n", - "storage = {\n", - " \"contiguous\": True,\n", - " \"urlpath\": \"myfile.b2frame\",\n", - " \"mode\": \"w\", # create a new file\n", - " \"cparams\": cparams,\n", - " \"dparams\": dparams,\n", - "}" + "storage = blosc2.Storage(\n", + " contiguous=True,\n", + " urlpath=\"myfile.b2frame\",\n", + " mode=\"w\", # create a new file\n", + ")" ] }, { @@ -73,25 +71,27 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:56:39.138571Z", - "start_time": "2023-06-20T08:56:39.094692Z" + "end_time": "2024-10-08T07:50:56.622362Z", + "start_time": "2024-10-08T07:50:56.597445Z" } }, "outputs": [ { "data": { - "text/plain": "" + "text/plain": [ + "" + ] }, - "execution_count": 3, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "schunk = blosc2.SChunk(chunksize=10_000_000, **storage)\n", + "schunk = blosc2.SChunk(chunksize=10_000_000, cparams=cparams, dparams=dparams, storage=storage)\n", "schunk" ] }, @@ -113,11 +113,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 20, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:12.225714Z", - "start_time": "2023-06-20T08:57:11.198762Z" + "end_time": "2024-10-08T07:51:03.809479Z", + "start_time": "2024-10-08T07:51:02.468183Z" } }, "outputs": [], @@ -127,11 +127,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 21, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:13.599134Z", - "start_time": "2023-06-20T08:57:12.984832Z" + "end_time": "2024-10-08T07:51:08.644653Z", + "start_time": "2024-10-08T07:51:07.997097Z" } }, "outputs": [ @@ -139,8 +139,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 737 ms, sys: 223 ms, total: 959 ms\n", - "Wall time: 680 ms\n" + "CPU times: user 774 ms, sys: 289 ms, total: 1.06 s\n", + "Wall time: 639 ms\n" ] } ], @@ -153,11 +153,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 22, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:15.168245Z", - "start_time": "2023-06-20T08:57:15.005858Z" + "end_time": "2024-10-08T07:51:10.979618Z", + "start_time": "2024-10-08T07:51:10.824076Z" } }, "outputs": [ @@ -165,7 +165,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "-rw-r--r-- 1 martaiborra staff 53M Jun 20 10:57 myfile.b2frame\r\n" + "-rw-r--r-- 1 oma staff 54M Oct 8 09:51 myfile.b2frame\r\n" ] } ], @@ -184,11 +184,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 23, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:30.996015Z", - "start_time": "2023-06-20T08:57:30.983554Z" + "end_time": "2024-10-08T07:51:17.141694Z", + "start_time": "2024-10-08T07:51:17.136224Z" } }, "outputs": [], @@ -198,11 +198,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 24, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:32.836926Z", - "start_time": "2023-06-20T08:57:32.587284Z" + "end_time": "2024-10-08T07:51:18.278099Z", + "start_time": "2024-10-08T07:51:17.990015Z" } }, "outputs": [ @@ -210,8 +210,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 325 ms, sys: 268 ms, total: 594 ms\n", - "Wall time: 277 ms\n" + "CPU times: user 379 ms, sys: 333 ms, total: 711 ms\n", + "Wall time: 282 ms\n" ] } ], @@ -223,11 +223,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 25, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:35.151951Z", - "start_time": "2023-06-20T08:57:34.608295Z" + "end_time": "2024-10-08T07:51:49.498739Z", + "start_time": "2024-10-08T07:51:49.431546Z" } }, "outputs": [], @@ -247,11 +247,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 26, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:38.686069Z", - "start_time": "2023-06-20T08:57:38.659828Z" + "end_time": "2024-10-08T07:51:53.107201Z", + "start_time": "2024-10-08T07:51:53.089631Z" } }, "outputs": [], @@ -262,11 +262,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 27, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:40.609473Z", - "start_time": "2023-06-20T08:57:40.573103Z" + "end_time": "2024-10-08T07:51:54.399484Z", + "start_time": "2024-10-08T07:51:54.385346Z" } }, "outputs": [ @@ -274,15 +274,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 305 µs, sys: 1.14 ms, total: 1.45 ms\n", - "Wall time: 1.36 ms\n" + "CPU times: user 305 µs, sys: 1.13 ms, total: 1.43 ms\n", + "Wall time: 1.62 ms\n" ] }, { "data": { - "text/plain": "100" + "text/plain": [ + "100" + ] }, - "execution_count": 11, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -301,11 +303,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 28, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:57:43.390240Z", - "start_time": "2023-06-20T08:57:43.342470Z" + "end_time": "2024-10-08T07:51:56.639465Z", + "start_time": "2024-10-08T07:51:56.621955Z" } }, "outputs": [ @@ -313,15 +315,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 255 µs, sys: 1.13 ms, total: 1.38 ms\n", - "Wall time: 2.54 ms\n" + "CPU times: user 269 µs, sys: 1.05 ms, total: 1.32 ms\n", + "Wall time: 2.48 ms\n" ] }, { "data": { - "text/plain": "101" + "text/plain": [ + "101" + ] }, - "execution_count": 12, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -350,20 +354,22 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 29, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T09:33:56.509580Z", - "start_time": "2023-06-20T09:33:56.486725Z" + "end_time": "2024-10-08T07:51:58.887183Z", + "start_time": "2024-10-08T07:51:58.879102Z" }, "collapsed": false }, "outputs": [ { "data": { - "text/plain": "['meta1']" + "text/plain": [ + "['meta1']" + ] }, - "execution_count": 17, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -375,20 +381,22 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T09:34:36.117828Z", - "start_time": "2023-06-20T09:34:36.108260Z" + "end_time": "2024-10-08T07:52:02.039689Z", + "start_time": "2024-10-08T07:52:02.024043Z" }, "collapsed": false }, "outputs": [ { "data": { - "text/plain": "234" + "text/plain": [ + "234" + ] }, - "execution_count": 19, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -399,20 +407,22 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 31, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T09:35:03.927788Z", - "start_time": "2023-06-20T09:35:03.917223Z" + "end_time": "2024-10-08T07:52:02.899323Z", + "start_time": "2024-10-08T07:52:02.892155Z" }, "collapsed": false }, "outputs": [ { "data": { - "text/plain": "235" + "text/plain": [ + "235" + ] }, - "execution_count": 20, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -435,19 +445,21 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 32, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:58:10.944357Z", - "start_time": "2023-06-20T08:58:10.899490Z" + "end_time": "2024-10-08T07:52:06.505484Z", + "start_time": "2024-10-08T07:52:06.496675Z" } }, "outputs": [ { "data": { - "text/plain": "{b'info1': 'This is an example', b'info2': 'of user meta handling'}" + "text/plain": [ + "{b'info1': 'This is an example', b'info2': 'of user meta handling'}" + ] }, - "execution_count": 13, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -467,19 +479,21 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 33, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T08:58:19.331481Z", - "start_time": "2023-06-20T08:58:19.295516Z" + "end_time": "2024-10-08T07:52:08.528185Z", + "start_time": "2024-10-08T07:52:08.522120Z" } }, "outputs": [ { "data": { - "text/plain": "{b'info2': 'of user meta handling'}" + "text/plain": [ + "{b'info2': 'of user meta handling'}" + ] }, - "execution_count": 14, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -495,6 +509,13 @@ "source": [ "That's all for now. There are more examples in the [examples directory of the git repository](https://github.com/Blosc/python-blosc2/tree/main/examples) for you to explore. Enjoy!" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/doc/getting_started/tutorials/01.schunk-slicing_and_beyond.ipynb b/doc/getting_started/tutorials/01.schunk-slicing_and_beyond.ipynb index 718c9f48..4ff4e928 100644 --- a/doc/getting_started/tutorials/01.schunk-slicing_and_beyond.ipynb +++ b/doc/getting_started/tutorials/01.schunk-slicing_and_beyond.ipynb @@ -11,14 +11,12 @@ }, { "cell_type": "code", - "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.507212Z", - "start_time": "2023-06-20T10:20:35.228091Z" + "end_time": "2024-10-08T07:53:25.108040Z", + "start_time": "2024-10-08T07:53:25.079638Z" } }, - "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -26,9 +24,11 @@ "\n", "nchunks = 10\n", "data = np.arange(200 * 1000 * nchunks, dtype=np.int32)\n", - "cparams = {\"typesize\": 4}\n", + "cparams = blosc2.CParams(typesize=4)\n", "schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams=cparams)" - ] + ], + "outputs": [], + "execution_count": 11 }, { "cell_type": "markdown", @@ -43,19 +43,19 @@ }, { "cell_type": "code", - "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.511148Z", - "start_time": "2023-06-20T10:20:35.270231Z" + "end_time": "2024-10-08T07:53:27.343758Z", + "start_time": "2024-10-08T07:53:27.332204Z" } }, - "outputs": [], "source": [ "out = np.empty(200 * 1000 * nchunks, dtype=np.int32)\n", "for i in range(nchunks):\n", " schunk.decompress_chunk(i, out[200 * 1000 * i : 200 * 1000 * (i + 1)])" - ] + ], + "outputs": [], + "execution_count": 12 }, { "cell_type": "markdown", @@ -66,27 +66,29 @@ }, { "cell_type": "code", - "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.519990Z", - "start_time": "2023-06-20T10:20:35.304350Z" + "end_time": "2024-10-08T07:53:30.596Z", + "start_time": "2024-10-08T07:53:30.569975Z" } }, + "source": [ + "out_slice = schunk[:]\n", + "type(out_slice)" + ], "outputs": [ { "data": { - "text/plain": "bytes" + "text/plain": [ + "bytes" + ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "out_slice = schunk[:]\n", - "type(out_slice)" - ] + "execution_count": 13 }, { "cell_type": "markdown", @@ -97,13 +99,18 @@ }, { "cell_type": "code", - "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.525236Z", - "start_time": "2023-06-20T10:20:35.336814Z" + "end_time": "2024-10-08T07:53:35.334366Z", + "start_time": "2024-10-08T07:53:35.300689Z" } }, + "source": [ + "out_slice = np.empty(200 * 1000 * nchunks, dtype=np.int32)\n", + "schunk.get_slice(out=out_slice)\n", + "np.array_equal(out, out_slice)\n", + "print(out_slice[:4])" + ], "outputs": [ { "name": "stdout", @@ -113,12 +120,7 @@ ] } ], - "source": [ - "out_slice = np.empty(200 * 1000 * nchunks, dtype=np.int32)\n", - "schunk.get_slice(out=out_slice)\n", - "np.array_equal(out, out_slice)\n", - "print(out_slice[:4])" - ] + "execution_count": 14 }, { "cell_type": "markdown", @@ -133,20 +135,20 @@ }, { "cell_type": "code", - "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.527561Z", - "start_time": "2023-06-20T10:20:35.353345Z" + "end_time": "2024-10-08T07:53:38.365602Z", + "start_time": "2024-10-08T07:53:38.353159Z" } }, - "outputs": [], "source": [ "start = 34\n", "stop = 1000 * 200 * 4\n", "new_value = np.ones(stop - start, dtype=np.int32)\n", "schunk[start:stop] = new_value" - ] + ], + "outputs": [], + "execution_count": 15 }, { "cell_type": "markdown", @@ -157,14 +159,12 @@ }, { "cell_type": "code", - "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.527871Z", - "start_time": "2023-06-20T10:20:35.368583Z" + "end_time": "2024-10-08T07:53:41.529220Z", + "start_time": "2024-10-08T07:53:41.518617Z" } }, - "outputs": [], "source": [ "schunk_nelems = 1000 * 200 * nchunks\n", "\n", @@ -172,7 +172,9 @@ "start = schunk_nelems - 123\n", "new_nitems = start + new_value.size\n", "schunk[start:new_nitems] = new_value" - ] + ], + "outputs": [], + "execution_count": 16 }, { "cell_type": "markdown", @@ -194,17 +196,17 @@ }, { "cell_type": "code", - "execution_count": 17, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.529947Z", - "start_time": "2023-06-20T10:20:35.379228Z" + "end_time": "2024-10-08T07:53:44.648399Z", + "start_time": "2024-10-08T07:53:44.639895Z" } }, - "outputs": [], "source": [ "buf = schunk.to_cframe()" - ] + ], + "outputs": [], + "execution_count": 17 }, { "cell_type": "markdown", @@ -215,17 +217,17 @@ }, { "cell_type": "code", - "execution_count": 18, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:35.542962Z", - "start_time": "2023-06-20T10:20:35.388249Z" + "end_time": "2024-10-08T07:53:47.319573Z", + "start_time": "2024-10-08T07:53:47.315552Z" } }, - "outputs": [], "source": [ "schunk2 = blosc2.schunk_from_cframe(cframe=buf, copy=True)" - ] + ], + "outputs": [], + "execution_count": 18 }, { "cell_type": "markdown", @@ -245,20 +247,20 @@ }, { "cell_type": "code", - "execution_count": 19, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:20:54.023391Z", - "start_time": "2023-06-20T10:20:35.396904Z" + "end_time": "2024-10-08T07:54:08.304326Z", + "start_time": "2024-10-08T07:53:51.028111Z" } }, - "outputs": [], "source": [ "np_array = np.arange(2**30, dtype=np.int32) # 4 GB array\n", "\n", "packed_arr2 = blosc2.pack_tensor(np_array)\n", "unpacked_arr2 = blosc2.unpack_tensor(packed_arr2)" - ] + ], + "outputs": [], + "execution_count": 19 }, { "cell_type": "markdown", @@ -271,28 +273,30 @@ }, { "cell_type": "code", - "execution_count": 20, "metadata": { "ExecuteTime": { - "end_time": "2023-06-20T10:21:15.080447Z", - "start_time": "2023-06-20T10:20:54.892944Z" + "end_time": "2024-10-08T07:54:32.551242Z", + "start_time": "2024-10-08T07:54:10.547445Z" } }, + "source": [ + "blosc2.save_tensor(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n", + "np_array2 = blosc2.load_tensor(\"ondisk_array.b2frame\")\n", + "np.array_equal(np_array, np_array2)" + ], "outputs": [ { "data": { - "text/plain": "True" + "text/plain": [ + "True" + ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "blosc2.save_tensor(np_array, urlpath=\"ondisk_array.b2frame\", mode=\"w\")\n", - "np_array2 = blosc2.load_tensor(\"ondisk_array.b2frame\")\n", - "np.array_equal(np_array, np_array2)" - ] + "execution_count": 20 }, { "cell_type": "markdown", @@ -302,6 +306,13 @@ "\n", "Now python-blosc2 offers an easy, yet fast way of creating, getting, setting and expanding data via the `SChunk` class. Moreover, you can get a contiguous compressed representation (aka [cframe](https://github.com/Blosc/c-blosc2/blob/main/README_CFRAME_FORMAT.rst)) of it and re-create it again later with no sweat.\n" ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "" } ], "metadata": { diff --git a/doc/getting_started/tutorials/02.ndarray-basics.ipynb b/doc/getting_started/tutorials/02.ndarray-basics.ipynb index 2012a14f..04544204 100644 --- a/doc/getting_started/tutorials/02.ndarray-basics.ipynb +++ b/doc/getting_started/tutorials/02.ndarray-basics.ipynb @@ -12,19 +12,19 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.426033Z", - "start_time": "2024-02-01T13:48:39.730441Z" + "end_time": "2024-10-08T07:56:33.037210Z", + "start_time": "2024-10-08T07:56:33.032553Z" } }, - "outputs": [], "source": [ "import numpy as np\n", "\n", "import blosc2" - ] + ], + "outputs": [], + "execution_count": 26 }, { "cell_type": "markdown", @@ -36,13 +36,16 @@ }, { "cell_type": "code", - "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.445534Z", - "start_time": "2024-02-01T13:48:41.438272Z" + "end_time": "2024-10-08T07:56:34.647278Z", + "start_time": "2024-10-08T07:56:34.623875Z" } }, + "source": [ + "array = blosc2.zeros((10000, 10000), dtype=np.int32)\n", + "print(array.info)" + ], "outputs": [ { "name": "stdout", @@ -50,11 +53,11 @@ "text": [ "type : NDArray\n", "shape : (10000, 10000)\n", - "chunks : (512, 1024)\n", - "blocks : (128, 256)\n", + "chunks : (25, 10000)\n", + "blocks : (2, 10000)\n", "dtype : int32\n", - "cratio : 65536.00\n", - "cparams : {'blocksize': 131072,\n", + "cratio : 32500.00\n", + "cparams : {'blocksize': 80000,\n", " 'clevel': 1,\n", " 'codec': ,\n", " 'codec_meta': 0,\n", @@ -65,18 +68,16 @@ " ,\n", " ],\n", " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", + " 'nthreads': 4,\n", " 'splitmode': ,\n", " 'typesize': 4,\n", " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" + "dparams : {'nthreads': 4}\n", + "\n" ] } ], - "source": [ - "array = blosc2.zeros((10000, 10000), dtype=np.int32)\n", - "print(array.info)" - ] + "execution_count": 27 }, { "cell_type": "markdown", @@ -90,87 +91,93 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.497932Z", - "start_time": "2024-02-01T13:48:41.459032Z" + "end_time": "2024-10-08T07:56:36.157687Z", + "start_time": "2024-10-08T07:56:36.031592Z" } }, - "outputs": [], "source": [ "array[0, :] = np.arange(10000, dtype=array.dtype)\n", "array[:, 0] = np.arange(10000, dtype=array.dtype)" - ] + ], + "outputs": [], + "execution_count": 28 }, { "cell_type": "code", - "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.499948Z", - "start_time": "2024-02-01T13:48:41.473018Z" + "end_time": "2024-10-08T07:56:36.493800Z", + "start_time": "2024-10-08T07:56:36.479202Z" } }, + "source": [ + "array[0, 0]" + ], "outputs": [ { "data": { - "text/plain": "array(0, dtype=int32)" + "text/plain": [ + "array(0, dtype=int32)" + ] }, - "execution_count": 4, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "array[0, 0]" - ] + "execution_count": 29 }, { "cell_type": "code", - "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.540871Z", - "start_time": "2024-02-01T13:48:41.484602Z" + "end_time": "2024-10-08T07:56:37.078878Z", + "start_time": "2024-10-08T07:56:37.069792Z" } }, + "source": [ + "array[0, :]" + ], "outputs": [ { "data": { - "text/plain": "array([ 0, 1, 2, ..., 9997, 9998, 9999], dtype=int32)" + "text/plain": [ + "array([ 0, 1, 2, ..., 9997, 9998, 9999], dtype=int32)" + ] }, - "execution_count": 5, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "array[0, :]" - ] + "execution_count": 30 }, { "cell_type": "code", - "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.541970Z", - "start_time": "2024-02-01T13:48:41.494320Z" + "end_time": "2024-10-08T07:56:37.794476Z", + "start_time": "2024-10-08T07:56:37.731823Z" } }, + "source": [ + "array[:, 0]" + ], "outputs": [ { "data": { - "text/plain": "array([ 0, 1, 2, ..., 9997, 9998, 9999], dtype=int32)" + "text/plain": [ + "array([ 0, 1, 2, ..., 9997, 9998, 9999], dtype=int32)" + ] }, - "execution_count": 6, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "array[:, 0]" - ] + "execution_count": 31 }, { "cell_type": "markdown", @@ -183,13 +190,23 @@ }, { "cell_type": "code", - "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.592610Z", - "start_time": "2024-02-01T13:48:41.581711Z" + "end_time": "2024-10-08T07:56:40.158664Z", + "start_time": "2024-10-08T07:56:40.023997Z" } }, + "source": [ + "array = blosc2.full(\n", + " (1000, 1000),\n", + " fill_value=b\"pepe\",\n", + " chunks=(100, 100),\n", + " blocks=(50, 50),\n", + " urlpath=\"ndarray_tutorial.b2nd\",\n", + " mode=\"w\",\n", + ")\n", + "print(array.info)" + ], "outputs": [ { "name": "stdout", @@ -212,25 +229,16 @@ " ,\n", " ],\n", " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", + " 'nthreads': 4,\n", " 'splitmode': ,\n", " 'typesize': 4,\n", " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" + "dparams : {'nthreads': 4}\n", + "\n" ] } ], - "source": [ - "array = blosc2.full(\n", - " (1000, 1000),\n", - " fill_value=b\"pepe\",\n", - " chunks=(100, 100),\n", - " blocks=(50, 50),\n", - " urlpath=\"ndarray_tutorial.b2nd\",\n", - " mode=\"w\",\n", - ")\n", - "print(array.info)" - ] + "execution_count": 32 }, { "cell_type": "markdown", @@ -241,13 +249,16 @@ }, { "cell_type": "code", - "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.593734Z", - "start_time": "2024-02-01T13:48:41.586251Z" + "end_time": "2024-10-08T07:56:42.314419Z", + "start_time": "2024-10-08T07:56:42.308506Z" } }, + "source": [ + "array2 = blosc2.open(\"ndarray_tutorial.b2nd\")\n", + "print(array2.info)" + ], "outputs": [ { "name": "stdout", @@ -270,18 +281,16 @@ " ,\n", " ],\n", " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", + " 'nthreads': 1,\n", " 'splitmode': ,\n", " 'typesize': 4,\n", " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" + "dparams : {'nthreads': 1}\n", + "\n" ] } ], - "source": [ - "array2 = blosc2.open(\"ndarray_tutorial.b2nd\")\n", - "print(array2.info)" - ] + "execution_count": 33 }, { "cell_type": "markdown", @@ -293,13 +302,17 @@ }, { "cell_type": "code", - "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:41.679224Z", - "start_time": "2024-02-01T13:48:41.591484Z" + "end_time": "2024-10-08T08:01:21.290860Z", + "start_time": "2024-10-08T08:01:21.216404Z" } }, + "source": [ + "b = np.arange(1000000).tobytes()\n", + "array1 = blosc2.frombuffer(b, shape=(1000, 1000), dtype=np.int64, chunks=(500, 10), blocks=(50, 10))\n", + "print(array1.info)" + ], "outputs": [ { "name": "stdout", @@ -322,70 +335,52 @@ " ,\n", " ],\n", " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", + " 'nthreads': 4,\n", " 'splitmode': ,\n", " 'typesize': 8,\n", " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" + "dparams : {'nthreads': 4}\n", + "\n" ] } ], - "source": [ - "b = np.arange(1000000).tobytes()\n", - "array1 = blosc2.frombuffer(b, shape=(1000, 1000), dtype=np.int64, chunks=(500, 10), blocks=(50, 10))\n", - "print(array1.info)" - ] + "execution_count": 38 }, { "cell_type": "code", - "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.618064Z", - "start_time": "2024-02-01T13:48:42.598309Z" + "end_time": "2024-10-08T08:01:22.190912Z", + "start_time": "2024-10-08T08:01:22.132568Z" } }, + "source": [ + "cparams = blosc2.CParams(\n", + " codec=blosc2.Codec.ZSTD,\n", + " clevel=9,\n", + " filters=[blosc2.Filter.BITSHUFFLE],\n", + " filters_meta=[0],\n", + ")\n", + "\n", + "array2 = array1.copy(chunks=(500, 10), blocks=(50, 10), cparams=cparams)\n", + "print(array2.info)" + ], "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (1000, 1000)\n", - "chunks : (500, 10)\n", - "blocks : (50, 10)\n", - "dtype : int64\n", - "cratio : 13.94\n", - "cparams : {'blocksize': 4000,\n", - " 'clevel': 9,\n", - " 'codec': ,\n", - " 'codec_meta': 0,\n", - " 'filters': [,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ],\n", - " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", - " 'splitmode': ,\n", - " 'typesize': 8,\n", - " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" + "ename": "TypeError", + "evalue": "asdict() should be called on dataclass instances", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[39], line 8\u001B[0m\n\u001B[1;32m 1\u001B[0m cparams \u001B[38;5;241m=\u001B[39m blosc2\u001B[38;5;241m.\u001B[39mCParams(\n\u001B[1;32m 2\u001B[0m codec\u001B[38;5;241m=\u001B[39mblosc2\u001B[38;5;241m.\u001B[39mCodec\u001B[38;5;241m.\u001B[39mZSTD,\n\u001B[1;32m 3\u001B[0m clevel\u001B[38;5;241m=\u001B[39m\u001B[38;5;241m9\u001B[39m,\n\u001B[1;32m 4\u001B[0m filters\u001B[38;5;241m=\u001B[39m[blosc2\u001B[38;5;241m.\u001B[39mFilter\u001B[38;5;241m.\u001B[39mBITSHUFFLE],\n\u001B[1;32m 5\u001B[0m filters_meta\u001B[38;5;241m=\u001B[39m[\u001B[38;5;241m0\u001B[39m],\n\u001B[1;32m 6\u001B[0m )\n\u001B[0;32m----> 8\u001B[0m array2 \u001B[38;5;241m=\u001B[39m array1\u001B[38;5;241m.\u001B[39mcopy(chunks\u001B[38;5;241m=\u001B[39m(\u001B[38;5;241m500\u001B[39m, \u001B[38;5;241m10\u001B[39m), blocks\u001B[38;5;241m=\u001B[39m(\u001B[38;5;241m50\u001B[39m, \u001B[38;5;241m10\u001B[39m), cparams\u001B[38;5;241m=\u001B[39mcparams)\n\u001B[1;32m 9\u001B[0m \u001B[38;5;28mprint\u001B[39m(array2\u001B[38;5;241m.\u001B[39minfo)\n", + "File \u001B[0;32m~/blosc/python-blosc2/src/blosc2/ndarray.py:1356\u001B[0m, in \u001B[0;36mNDArray.copy\u001B[0;34m(self, dtype, **kwargs)\u001B[0m\n\u001B[1;32m 1351\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m dtype \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 1352\u001B[0m dtype \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mdtype\n\u001B[1;32m 1353\u001B[0m kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcparams\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m (\n\u001B[1;32m 1354\u001B[0m kwargs\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcparams\u001B[39m\u001B[38;5;124m\"\u001B[39m)\u001B[38;5;241m.\u001B[39mcopy()\n\u001B[1;32m 1355\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(kwargs\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcparams\u001B[39m\u001B[38;5;124m\"\u001B[39m), \u001B[38;5;28mdict\u001B[39m)\n\u001B[0;32m-> 1356\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m asdict(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mschunk\u001B[38;5;241m.\u001B[39mcparams)\n\u001B[1;32m 1357\u001B[0m )\n\u001B[1;32m 1358\u001B[0m kwargs[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdparams\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m (\n\u001B[1;32m 1359\u001B[0m kwargs\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdparams\u001B[39m\u001B[38;5;124m\"\u001B[39m)\u001B[38;5;241m.\u001B[39mcopy()\n\u001B[1;32m 1360\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(kwargs\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mdparams\u001B[39m\u001B[38;5;124m\"\u001B[39m), \u001B[38;5;28mdict\u001B[39m)\n\u001B[1;32m 1361\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m asdict(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mschunk\u001B[38;5;241m.\u001B[39mdparams)\n\u001B[1;32m 1362\u001B[0m )\n\u001B[1;32m 1363\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmeta\u001B[39m\u001B[38;5;124m\"\u001B[39m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m kwargs:\n\u001B[1;32m 1364\u001B[0m \u001B[38;5;66;03m# Copy metalayers as well\u001B[39;00m\n", + "File \u001B[0;32m~/opt/miniconda3/lib/python3.12/dataclasses.py:1319\u001B[0m, in \u001B[0;36masdict\u001B[0;34m(obj, dict_factory)\u001B[0m\n\u001B[1;32m 1300\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Return the fields of a dataclass instance as a new dictionary mapping\u001B[39;00m\n\u001B[1;32m 1301\u001B[0m \u001B[38;5;124;03mfield names to field values.\u001B[39;00m\n\u001B[1;32m 1302\u001B[0m \n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 1316\u001B[0m \u001B[38;5;124;03mtuples, lists, and dicts. Other objects are copied with 'copy.deepcopy()'.\u001B[39;00m\n\u001B[1;32m 1317\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 1318\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m _is_dataclass_instance(obj):\n\u001B[0;32m-> 1319\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124masdict() should be called on dataclass instances\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 1320\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m _asdict_inner(obj, dict_factory)\n", + "\u001B[0;31mTypeError\u001B[0m: asdict() should be called on dataclass instances" ] } ], - "source": [ - "cparams = {\n", - " \"codec\": blosc2.Codec.ZSTD,\n", - " \"clevel\": 9,\n", - " \"filters\": [blosc2.Filter.BITSHUFFLE],\n", - " \"filters_meta\": [0],\n", - "}\n", - "\n", - "array2 = array1.copy(chunks=(500, 10), blocks=(50, 10), cparams=cparams)\n", - "print(array2.info)" - ] + "execution_count": 39 }, { "cell_type": "markdown", @@ -398,18 +393,18 @@ }, { "cell_type": "code", - "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.619267Z", - "start_time": "2024-02-01T13:48:42.608433Z" + "end_time": "2024-10-08T07:55:16.569975Z", + "start_time": "2024-10-08T07:55:16.569707Z" } }, - "outputs": [], "source": [ "meta = {\"dtype\": \"i8\", \"coords\": [5.14, 23.0]}\n", "array = blosc2.zeros((1000, 1000), dtype=np.int16, chunks=(100, 100), blocks=(50, 50), meta=meta)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -420,49 +415,37 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.690125Z", - "start_time": "2024-02-01T13:48:42.617894Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "" - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, "source": [ "array.schunk.meta" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.691496Z", - "start_time": "2024-02-01T13:48:42.627564Z" + "end_time": "2024-10-08T07:55:16.594249Z", + "start_time": "2024-10-08T07:55:16.588761Z" } }, + "source": [ + "array.schunk.meta.keys()" + ], "outputs": [ { "data": { - "text/plain": "['b2nd', 'dtype', 'coords']" + "text/plain": [ + "['b2nd']" + ] }, - "execution_count": 13, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "array.schunk.meta.keys()" - ] + "execution_count": 23 }, { "cell_type": "markdown", @@ -473,49 +456,55 @@ }, { "cell_type": "code", - "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.694278Z", - "start_time": "2024-02-01T13:48:42.635275Z" + "end_time": "2024-10-08T07:55:16.642139Z", + "start_time": "2024-10-08T07:55:16.634598Z" } }, + "source": [ + "array.schunk.meta[\"b2nd\"]" + ], "outputs": [ { "data": { - "text/plain": "[0, 2, [1000, 1000], [100, 100], [50, 50], 0, ' 1\u001B[0m array\u001B[38;5;241m.\u001B[39mschunk\u001B[38;5;241m.\u001B[39mmeta[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mcoords\u001B[39m\u001B[38;5;124m\"\u001B[39m]\n", + "File \u001B[0;32m~/blosc/python-blosc2/src/blosc2/schunk.py:122\u001B[0m, in \u001B[0;36mMeta.__getitem__\u001B[0;34m(self, item)\u001B[0m\n\u001B[1;32m 117\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m unpackb(\n\u001B[1;32m 118\u001B[0m blosc2_ext\u001B[38;5;241m.\u001B[39mmeta__getitem__(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mschunk, item),\n\u001B[1;32m 119\u001B[0m list_hook\u001B[38;5;241m=\u001B[39mblosc2_ext\u001B[38;5;241m.\u001B[39mdecode_tuple,\n\u001B[1;32m 120\u001B[0m )\n\u001B[1;32m 121\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 122\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mitem\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m not found\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n", + "\u001B[0;31mKeyError\u001B[0m: 'coords not found'" + ] } ], - "source": [ - "array.schunk.meta[\"coords\"]" - ] + "execution_count": 25 }, { "cell_type": "markdown", @@ -526,36 +515,20 @@ }, { "cell_type": "code", - "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.802674Z", - "start_time": "2024-02-01T13:48:42.649615Z" + "end_time": "2024-10-08T07:55:16.848001Z", + "start_time": "2024-10-08T07:55:16.847803Z" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{}\n" - ] - }, - { - "data": { - "text/plain": "{b'info1': 'This is an example', b'info2': 'of user meta handling'}" - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "print(array.schunk.vlmeta.getall())\n", "array.schunk.vlmeta[\"info1\"] = \"This is an example\"\n", "array.schunk.vlmeta[\"info2\"] = \"of user meta handling\"\n", "array.schunk.vlmeta.getall()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -566,27 +539,13 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.803564Z", - "start_time": "2024-02-01T13:48:42.654680Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": "{b'info1': 'This is a larger example', b'info2': 'of user meta handling'}" - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "metadata": {}, "source": [ "array.schunk.vlmeta[\"info1\"] = \"This is a larger example\"\n", "array.schunk.vlmeta.getall()" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -601,51 +560,18 @@ }, { "cell_type": "code", - "execution_count": 18, "metadata": { - "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.821726Z", - "start_time": "2024-02-01T13:48:42.659936Z" - }, "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "type : NDArray\n", - "shape : (100, 100, 100)\n", - "chunks : (64, 64, 100)\n", - "blocks : (32, 32, 32)\n", - "dtype : float64\n", - "cratio : 15.99\n", - "cparams : {'blocksize': 262144,\n", - " 'clevel': 1,\n", - " 'codec': ,\n", - " 'codec_meta': 0,\n", - " 'filters': [,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ],\n", - " 'filters_meta': [0, 0, 0, 0, 0, 0],\n", - " 'nthreads': 6,\n", - " 'splitmode': ,\n", - " 'typesize': 8,\n", - " 'use_dict': 0}\n", - "dparams : {'nthreads': 6}\n" - ] - } - ], "source": [ "shape = (100, 100, 100)\n", "dtype = np.float64\n", "nparray = np.linspace(0, 100, np.prod(shape), dtype=dtype).reshape(shape)\n", "b2ndarray = blosc2.asarray(nparray)\n", "print(b2ndarray.info)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -660,38 +586,18 @@ }, { "cell_type": "code", - "execution_count": 19, "metadata": { - "ExecuteTime": { - "end_time": "2024-02-01T13:48:42.826284Z", - "start_time": "2024-02-01T13:48:42.704044Z" - }, "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Compression ratio: 2.346534664543712\n" - ] - }, - { - "data": { - "text/plain": "array([[[ 5.42196142e+00, 2.73411248e-01, -8.16705224e-01,\n 1.37387920e+00, 4.67745267e+00],\n [-9.74870871e+00, 5.84935129e+00, 9.58553390e+00,\n -2.83529450e-01, 7.53172473e+00],\n [ 1.49656577e+00, 5.17716640e+00, 7.88381029e+00,\n 6.98547347e-01, -4.22113557e+00],\n [ 3.26899881e+00, -1.82539905e+00, -6.64803980e+00,\n 2.26750920e+00, -8.04904893e+00],\n [ 1.25639643e+01, 6.13877785e+00, 8.36071977e-01,\n 4.61057570e+00, 1.48929362e+01]],\n\n [[ 3.35584136e+00, 1.99526803e-01, -1.83173110e+01,\n -9.23138847e+00, -1.16172733e+00],\n [-5.03933967e+00, -1.12041458e+01, 4.03284196e+00,\n 1.00896486e+01, 1.66993503e+00],\n [-1.18575679e+01, -4.75050150e+00, 2.18309491e+00,\n 7.96693815e+00, -1.08675195e+01],\n [-8.88867651e+00, 2.61614522e+00, -1.21496391e+00,\n -1.07405006e+01, -1.62225644e+01],\n [-8.06054293e+00, 1.41019810e+01, 3.73009613e+00,\n 1.94280930e+00, -4.03920319e-01]],\n\n [[ 7.11325574e+00, 1.81344216e+00, -1.31212523e+01,\n 7.53794442e+00, 6.05015875e+00],\n [-3.72363480e+00, 1.51570884e+01, -2.04563128e-01,\n 2.48303234e+00, -2.40123746e+00],\n [-6.54960604e+00, -9.95287318e+00, -5.29298162e+00,\n 8.24236836e+00, 7.44135682e+00],\n [ 2.93987926e+00, -6.38440848e+00, -1.14590714e+00,\n 2.02831822e+00, 2.50627016e-03],\n [ 4.39693638e+00, 7.14526714e+00, -1.83301102e+00,\n 8.41598861e+00, -4.57312873e+00]],\n\n [[ 1.72690846e+01, 6.30828920e+00, -5.30917037e+00,\n 7.52455436e+00, 1.19643440e+01],\n [ 9.12355405e+00, 1.67975018e+00, 2.93640941e+00,\n -7.64215452e+00, 1.62410350e+00],\n [-1.55437404e+00, 9.49132288e-01, 8.92834289e+00,\n -1.37456729e+01, 9.86778010e+00],\n [-7.21853497e+00, -4.47973496e+00, 3.25376041e+00,\n -6.51526389e+00, 8.59162340e+00],\n [-9.83341081e+00, 9.25969121e+00, -1.36367239e+01,\n 8.07390571e+00, 6.14360462e-01]],\n\n [[ 4.65602528e+00, -1.48217159e+01, 7.67247150e+00,\n -1.41809697e+01, 8.29187072e+00],\n [-2.09188110e+01, -1.21744141e+01, -1.23980307e+00,\n -1.67901253e+01, -1.11255548e+01],\n [-1.71639719e+00, 8.41005260e+00, -9.16336234e+00,\n -9.91380613e+00, -9.34633040e-01],\n [-7.14082014e+00, -3.63309930e+00, 5.40634385e+00,\n -1.65522254e+00, 5.61551645e+00],\n [-6.91584808e-01, 1.46205820e+01, -6.56466213e+00,\n -3.79375418e+00, 6.82807996e+00]]])" - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "rng = np.random.default_rng()\n", "buffer = bytes(rng.normal(size=np.prod(shape)) * 8)\n", "b2ndarray = blosc2.frombuffer(buffer, shape, dtype=dtype)\n", "print(\"Compression ratio:\", b2ndarray.schunk.cratio)\n", "b2ndarray[:5, :5, :5]" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/doc/getting_started/tutorials/03.lazyarray-expressions.ipynb b/doc/getting_started/tutorials/03.lazyarray-expressions.ipynb index c358e63b..03d0dd2f 100644 --- a/doc/getting_started/tutorials/03.lazyarray-expressions.ipynb +++ b/doc/getting_started/tutorials/03.lazyarray-expressions.ipynb @@ -14,8 +14,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.570244Z", - "start_time": "2024-06-18T11:28:42.958292Z" + "end_time": "2024-10-08T08:02:44.264534Z", + "start_time": "2024-10-08T08:02:42.250807Z" } }, "outputs": [], @@ -38,8 +38,8 @@ "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.612950Z", - "start_time": "2024-06-18T11:28:44.575724Z" + "end_time": "2024-10-08T08:02:44.318974Z", + "start_time": "2024-10-08T08:02:44.270378Z" } }, "outputs": [], @@ -62,8 +62,8 @@ "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.623774Z", - "start_time": "2024-06-18T11:28:44.615967Z" + "end_time": "2024-10-08T08:02:44.335524Z", + "start_time": "2024-10-08T08:02:44.321803Z" } }, "outputs": [ @@ -101,8 +101,8 @@ "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.641281Z", - "start_time": "2024-06-18T11:28:44.626435Z" + "end_time": "2024-10-08T08:02:44.374777Z", + "start_time": "2024-10-08T08:02:44.339465Z" } }, "outputs": [ @@ -128,11 +128,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.837647Z", - "start_time": "2024-06-18T11:28:44.643376Z" + "end_time": "2024-10-08T08:03:35.576040Z", + "start_time": "2024-10-08T08:03:34.587292Z" } }, "outputs": [ @@ -140,12 +140,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Compression ratio: 2.10x\n" + "Compression ratio: 2.08x\n" ] } ], "source": [ - "cparams = {\"codec\": blosc2.Codec.ZSTD, \"filters\": [blosc2.Filter.BITSHUFFLE], \"clevel\": 9}\n", + "cparams = blosc2.CParams(\n", + " codec=blosc2.Codec.ZSTD, filters=[blosc2.Filter.BITSHUFFLE], clevel=9, filters_meta=[0]\n", + ")\n", "d = c.eval(cparams=cparams)\n", "print(f\"Compression ratio: {d.schunk.cratio:.2f}x\")" ] @@ -157,11 +159,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.848909Z", - "start_time": "2024-06-18T11:28:44.839555Z" + "end_time": "2024-10-08T08:03:38.778346Z", + "start_time": "2024-10-08T08:03:38.766508Z" } }, "outputs": [ @@ -190,11 +192,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.866877Z", - "start_time": "2024-06-18T11:28:44.850580Z" + "end_time": "2024-10-08T08:03:42.435049Z", + "start_time": "2024-10-08T08:03:42.424601Z" } }, "outputs": [], @@ -210,11 +212,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.877079Z", - "start_time": "2024-06-18T11:28:44.868540Z" + "end_time": "2024-10-08T08:03:44.143482Z", + "start_time": "2024-10-08T08:03:44.131993Z" } }, "outputs": [ @@ -243,11 +245,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.897246Z", - "start_time": "2024-06-18T11:28:44.880903Z" + "end_time": "2024-10-08T08:03:46.697195Z", + "start_time": "2024-10-08T08:03:46.656874Z" } }, "outputs": [ @@ -275,11 +277,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-06-18T11:28:44.916136Z", - "start_time": "2024-06-18T11:28:44.902534Z" + "end_time": "2024-10-08T08:03:47.858807Z", + "start_time": "2024-10-08T08:03:47.835261Z" } }, "outputs": [ @@ -289,7 +291,7 @@ "999999.9999999471" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } diff --git a/doc/getting_started/tutorials/04.reductions.ipynb b/doc/getting_started/tutorials/04.reductions.ipynb index f81ffc82..a4103df8 100644 --- a/doc/getting_started/tutorials/04.reductions.ipynb +++ b/doc/getting_started/tutorials/04.reductions.ipynb @@ -26,15 +26,13 @@ }, { "cell_type": "code", - "execution_count": 1, "id": "initial_id", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.283613Z", - "start_time": "2024-08-23T08:35:37.744567Z" + "end_time": "2024-10-08T08:09:33.940958Z", + "start_time": "2024-10-08T08:09:31.388787Z" } }, - "outputs": [], "source": [ "from time import time\n", "\n", @@ -42,26 +40,28 @@ "import numpy as np\n", "\n", "import blosc2" - ] + ], + "outputs": [], + "execution_count": 6 }, { "cell_type": "code", - "execution_count": 2, "id": "94a5fa3aad0a9d8b", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.461093Z", - "start_time": "2024-08-23T08:35:38.284710Z" + "end_time": "2024-10-08T08:09:40.296786Z", + "start_time": "2024-10-08T08:09:33.943533Z" } }, - "outputs": [], "source": [ "# Create a 3D array of type float64 (8 GB)\n", "dtype = np.float64\n", "shape = (1000, 1000, 1000)\n", "size = np.prod(shape)\n", "a = np.linspace(0, 1000, num=size, dtype=dtype).reshape(shape)" - ] + ], + "outputs": [], + "execution_count": 7 }, { "cell_type": "markdown", @@ -75,15 +75,13 @@ }, { "cell_type": "code", - "execution_count": 3, "id": "bbbd00951e2b16f6", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.579018Z", - "start_time": "2024-08-23T08:35:38.461714Z" + "end_time": "2024-10-08T08:09:43.197736Z", + "start_time": "2024-10-08T08:09:40.299433Z" } }, - "outputs": [], "source": [ "axes = (\"X\", \"Y\", \"Z\", \"all\")\n", "meas_np = {\"sum\": {}, \"time\": {}}\n", @@ -93,7 +91,9 @@ " meas_np[\"sum\"][axis] = np.sum(a, axis=n)\n", " t = time() - t0\n", " meas_np[\"time\"][axis] = time() - t0" - ] + ], + "outputs": [], + "execution_count": 8 }, { "cell_type": "markdown", @@ -107,20 +107,20 @@ }, { "cell_type": "code", - "execution_count": 4, "id": "21b3c02e2b03e1d8", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.581397Z", - "start_time": "2024-08-23T08:35:38.579700Z" + "end_time": "2024-10-08T08:09:43.206113Z", + "start_time": "2024-10-08T08:09:43.201684Z" } }, - "outputs": [], "source": [ "# Params for Blosc2\n", "clevels = (0, 5)\n", "codecs = (blosc2.Codec.LZ4, blosc2.Codec.ZSTD)" - ] + ], + "outputs": [], + "execution_count": 9 }, { "cell_type": "markdown", @@ -132,15 +132,13 @@ }, { "cell_type": "code", - "execution_count": 5, "id": "92217680c72e2ae4", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.585750Z", - "start_time": "2024-08-23T08:35:38.582786Z" + "end_time": "2024-10-08T08:09:43.219653Z", + "start_time": "2024-10-08T08:09:43.210405Z" } }, - "outputs": [], "source": [ "# Create a 3D array of type float64\n", "def measure_blosc2(chunks):\n", @@ -149,7 +147,7 @@ " meas[codec] = {}\n", " for clevel in clevels:\n", " meas[codec][clevel] = {\"sum\": {}, \"time\": {}}\n", - " cparams = {\"clevel\": clevel, \"codec\": codec}\n", + " cparams = blosc2.CParams(clevel=clevel, codec=codec)\n", " a1 = blosc2.asarray(a, chunks=chunks, cparams=cparams)\n", " if clevel > 0:\n", " print(f\"cratio for {codec.name} + SHUFFLE: {a1.schunk.cratio:.1f}x\")\n", @@ -165,7 +163,9 @@ " # np.testing.assert_allclose(meas[codec][clevel][\"sum\"][axis],\n", " # meas_np[\"sum\"][axis])\n", " return meas" - ] + ], + "outputs": [], + "execution_count": 10 }, { "cell_type": "markdown", @@ -177,15 +177,13 @@ }, { "cell_type": "code", - "execution_count": 6, "id": "fb0ce45807353475", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:38.590013Z", - "start_time": "2024-08-23T08:35:38.586424Z" + "end_time": "2024-10-08T08:09:43.231879Z", + "start_time": "2024-10-08T08:09:43.222961Z" } }, - "outputs": [], "source": [ "def plot_meas(meas_np, meas, chunks):\n", " _fig, ax = plt.subplots()\n", @@ -228,44 +226,46 @@ "\n", " plt.tight_layout()\n", " plt.show()" - ] + ], + "outputs": [], + "execution_count": 11 }, { "cell_type": "code", - "execution_count": 7, "id": "9314c555-f759-43dd-95dd-08772b2bfd3a", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:42.849758Z", - "start_time": "2024-08-23T08:35:38.590621Z" + "end_time": "2024-10-08T08:14:56.328625Z", + "start_time": "2024-10-08T08:09:43.235144Z" } }, + "source": [ + "# Automatic chunking: (1, 1000, 1000) for Intel 13900K\n", + "chunks = None\n", + "meas = measure_blosc2(chunks)\n", + "plot_meas(meas_np, meas, chunks)" + ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "cratio for LZ4 + SHUFFLE: 16.7x\n", - "cratio for ZSTD + SHUFFLE: 63.6x\n" + "cratio for LZ4 + SHUFFLE: 11.3x\n", + "cratio for ZSTD + SHUFFLE: 39.3x\n" ] }, { "data": { - "image/png": "", "text/plain": [ "
" - ] + ], + "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], - "source": [ - "# Automatic chunking: (1, 1000, 1000) for Intel 13900K\n", - "chunks = None\n", - "meas = measure_blosc2(chunks)\n", - "plot_meas(meas_np, meas, chunks)" - ] + "execution_count": 12 }, { "cell_type": "markdown", @@ -279,40 +279,40 @@ }, { "cell_type": "code", - "execution_count": 8, "id": "e0070348-b3e5-4936-93ab-11dbe70db445", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:48.018075Z", - "start_time": "2024-08-23T08:35:42.861338Z" + "end_time": "2024-10-08T08:20:35.190410Z", + "start_time": "2024-10-08T08:14:56.333112Z" } }, + "source": [ + "# Manual chunking\n", + "chunks = (100, 100, 100)\n", + "meas = measure_blosc2(chunks)\n", + "plot_meas(meas_np, meas, chunks)" + ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "cratio for LZ4 + SHUFFLE: 9.8x\n", - "cratio for ZSTD + SHUFFLE: 33.8x\n" + "cratio for LZ4 + SHUFFLE: 6.2x\n", + "cratio for ZSTD + SHUFFLE: 13.5x\n" ] }, { "data": { - "image/png": "", "text/plain": [ "
" - ] + ], + "image/png": "" }, "metadata": {}, "output_type": "display_data" } ], - "source": [ - "# Manual chunking\n", - "chunks = (100, 100, 100)\n", - "meas = measure_blosc2(chunks)\n", - "plot_meas(meas_np, meas, chunks)" - ] + "execution_count": 13 }, { "cell_type": "markdown", @@ -394,16 +394,16 @@ }, { "cell_type": "code", - "execution_count": null, "id": "4995f5eaeff0a11a", "metadata": { "ExecuteTime": { - "end_time": "2024-08-23T08:35:54.614095Z", - "start_time": "2024-08-23T08:35:54.612202Z" + "end_time": "2024-10-08T08:20:35.205614Z", + "start_time": "2024-10-08T08:20:35.197712Z" } }, + "source": [], "outputs": [], - "source": [] + "execution_count": 13 } ], "metadata": { diff --git a/doc/getting_started/tutorials/10.ucodecs-ufilters.ipynb b/doc/getting_started/tutorials/10.ucodecs-ufilters.ipynb index 07173afe..f886f6af 100644 --- a/doc/getting_started/tutorials/10.ucodecs-ufilters.ipynb +++ b/doc/getting_started/tutorials/10.ucodecs-ufilters.ipynb @@ -31,8 +31,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2023-06-21T08:11:27.952152Z", - "start_time": "2023-06-21T08:11:26.405204Z" + "end_time": "2024-10-08T08:11:35.927616Z", + "start_time": "2024-10-08T08:11:33.929462Z" }, "pycharm": { "name": "#%%\n" @@ -47,11 +47,8 @@ "import blosc2\n", "\n", "dtype = np.dtype(np.int32)\n", - "cparams = {\n", - " \"nthreads\": 1,\n", - " \"typesize\": dtype.itemsize,\n", - "}\n", - "dparams = {\"nthreads\": 1}\n", + "cparams = blosc2.CParams(nthreads=1, typesize=dtype.itemsize)\n", + "dparams = blosc2.DParams(nthreads=1)\n", "\n", "chunk_len = 10_000\n", "schunk = blosc2.SChunk(chunksize=chunk_len * dtype.itemsize, cparams=cparams)" @@ -75,8 +72,8 @@ "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2023-06-21T08:11:27.962250Z", - "start_time": "2023-06-21T08:11:27.956854Z" + "end_time": "2024-10-08T08:11:41.200886Z", + "start_time": "2024-10-08T08:11:41.188272Z" }, "pycharm": { "name": "#%%\n" @@ -117,8 +114,8 @@ "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2023-06-21T08:11:27.970548Z", - "start_time": "2023-06-21T08:11:27.966942Z" + "end_time": "2024-10-08T08:11:43.417236Z", + "start_time": "2024-10-08T08:11:43.412230Z" }, "pycharm": { "name": "#%%\n" @@ -166,8 +163,8 @@ "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2023-06-21T08:11:28.003212Z", - "start_time": "2023-06-21T08:11:27.972522Z" + "end_time": "2024-10-08T08:11:51.481622Z", + "start_time": "2024-10-08T08:11:51.474227Z" }, "pycharm": { "name": "#%%\n" @@ -195,11 +192,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2023-06-21T08:11:28.004154Z", - "start_time": "2023-06-21T08:11:27.988511Z" + "end_time": "2024-10-08T08:16:59.500145Z", + "start_time": "2024-10-08T08:16:59.382269Z" }, "pycharm": { "name": "#%%\n" @@ -208,9 +205,25 @@ "outputs": [ { "data": { - "text/plain": "{'codec': 160,\n 'codec_meta': 0,\n 'clevel': 1,\n 'use_dict': 0,\n 'typesize': 4,\n 'nthreads': 1,\n 'blocksize': 0,\n 'splitmode': ,\n 'filters': [,\n ,\n ,\n ,\n ,\n ],\n 'filters_meta': [0, 0, 0, 0, 0, 0]}" + "text/plain": [ + "{'codec': 160,\n", + " 'codec_meta': 0,\n", + " 'clevel': 1,\n", + " 'use_dict': 0,\n", + " 'typesize': 4,\n", + " 'nthreads': 1,\n", + " 'blocksize': 0,\n", + " 'splitmode': ,\n", + " 'filters': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " 'filters_meta': [0, 0, 0, 0, 0, 0]}" + ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -233,9 +246,7 @@ "name": "#%% md\n" } }, - "source": [ - "Now we can check that our codec works well by appending and recovering some data:" - ] + "source": "\"Now we can check that our codec works well by appending and recovering some data:" }, { "cell_type": "code", diff --git a/examples/ndarray/bytedelta_filter.py b/examples/ndarray/bytedelta_filter.py index 34fcb601..17fa2848 100644 --- a/examples/ndarray/bytedelta_filter.py +++ b/examples/ndarray/bytedelta_filter.py @@ -21,14 +21,14 @@ nparray = np.linspace(0, 1000, math.prod(shape)).reshape(shape) # Compress with and without bytedelta -cparams = {"filters": [blosc2.Filter.SHUFFLE]} +cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE], filters_meta=[0]) a = blosc2.asarray(nparray, cparams=cparams) print( f"Compression ratio with shuffle: {a.schunk.cratio:.2f} x", ) # Now with bytedelta -cparams = {"filters": [blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA]} +cparams = blosc2.CParams(filters=[blosc2.Filter.SHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0, 0]) a = blosc2.asarray(nparray, cparams=cparams) print( f"Compression ratio with shuffle + bytedelta: {a.schunk.cratio:.2f} x", diff --git a/examples/ndarray/empty_.py b/examples/ndarray/empty_.py index 52dbeb81..ecc79185 100644 --- a/examples/ndarray/empty_.py +++ b/examples/ndarray/empty_.py @@ -10,13 +10,13 @@ import blosc2 -cparams = { - "codec": blosc2.Codec.LZ4, - "clevel": 5, - "nthreads": 4, - "filters": [blosc2.Filter.DELTA, blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [0, 3, 0], # keep just 3 bits in mantissa -} +cparams = blosc2.CParams( + codec=blosc2.Codec.LZ4, + clevel=5, + nthreads=4, + filters=[blosc2.Filter.DELTA, blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + filters_meta=[0, 3, 0], # keep just 3 bits in mantissa +) a = blosc2.empty(shape=(40, 401), blocks=(6, 26), dtype="f8", cparams=cparams) a[...] = 222 diff --git a/examples/ndarray/ndmean.py b/examples/ndarray/ndmean.py index 55d8a1bb..0f486506 100644 --- a/examples/ndarray/ndmean.py +++ b/examples/ndarray/ndmean.py @@ -20,6 +20,6 @@ random = np.random.default_rng() array = random.normal(0, 1, np.prod(shape)).reshape(shape) # Use NDMEAN filter -cparams = {"filters": [blosc2.Filter.NDMEAN], "filters_meta": [4]} +cparams = blosc2.CParams(filters=[blosc2.Filter.NDMEAN], filters_meta=[4]) a = blosc2.asarray(array, chunks=chunks, cparams=cparams) print("compression ratio:", a.schunk.cratio) diff --git a/examples/ndarray/zfp_codec.py b/examples/ndarray/zfp_codec.py index a58780a3..2a7825eb 100644 --- a/examples/ndarray/zfp_codec.py +++ b/examples/ndarray/zfp_codec.py @@ -20,6 +20,6 @@ random = np.random.default_rng() array = random.normal(0, 1, np.prod(shape)).reshape(shape) # Use ZFP_RATE codec -cparams = {"codec": blosc2.Codec.ZFP_RATE, "codec_meta": 37} +cparams = blosc2.CParams(codec=blosc2.Codec.ZFP_RATE, codec_meta=37) a = blosc2.asarray(array, chunks=chunks, cparams=cparams) print("compression ratio:", a.schunk.cratio) diff --git a/examples/schunk_roundtrip.py b/examples/schunk_roundtrip.py index f2e3732e..e89d1d83 100644 --- a/examples/schunk_roundtrip.py +++ b/examples/schunk_roundtrip.py @@ -12,17 +12,19 @@ nchunks = 10 # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.LZ4HC, "typesize": 4} -dparams = {} +cparams = blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4) +dparams = blosc2.DParams() contiguous = True urlpath = "filename" -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) # Create the SChunk data = np.arange(200 * 1000 * nchunks) -schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, **storage) +schunk = blosc2.SChunk( + chunksize=200 * 1000 * 4, data=data, cparams=cparams, dparams=dparams, storage=storage +) cframe = schunk.to_cframe() diff --git a/examples/ucodecs.py b/examples/ucodecs.py index 6cf5a2f6..663ee2ca 100644 --- a/examples/ucodecs.py +++ b/examples/ucodecs.py @@ -51,14 +51,10 @@ def decoder1(input, output, meta, schunk): blosc2.register_codec(codec_name, id, encoder1, decoder1) # Set the compression and decompression parameters -cparams = { - "typesize": dtype.itemsize, - "nthreads": 1, - "filters": [blosc2.Filter.NOFILTER], - "filters_meta": [0], -} -dparams = {"nthreads": 1} -cparams["codec"] = id +cparams = blosc2.CParams( + typesize=dtype.itemsize, codec=id, nthreads=1, filters=[blosc2.Filter.NOFILTER], filters_meta=[0] +) +dparams = blosc2.DParams(nthreads=1) # Create SChunk and fill it with data data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) diff --git a/examples/ufilters.py b/examples/ufilters.py index 2b42fb0e..9285bcb4 100644 --- a/examples/ufilters.py +++ b/examples/ufilters.py @@ -37,13 +37,10 @@ def backward(input, output, meta, schunk): blosc2.register_filter(id, forward, backward) # Set the compression and decompression parameters -cparams = { - "typesize": dtype.itemsize, - "nthreads": 1, - "filters": [blosc2.Filter.NOFILTER, id], - "filters_meta": [0, 0], -} -dparams = {"nthreads": 1} +cparams = blosc2.CParams( + typesize=dtype.itemsize, nthreads=1, filters=[blosc2.Filter.NOFILTER, id], filters_meta=[0, 0] +) +dparams = blosc2.DParams(nthreads=1) # Create SChunk and fill it with data data = np.arange(0, chunk_len * nchunks, 1, dtype=dtype) From 7621087bed7d6485337a0bfe616e6b11c02db539 Mon Sep 17 00:00:00 2001 From: oumaima-ech-chdig Date: Tue, 8 Oct 2024 14:01:34 +0200 Subject: [PATCH 3/4] More updated examples of CParams, DParams and Storage --- src/blosc2/core.py | 3 +- src/blosc2/schunk.py | 92 ++++++++++++++++++++++++++++---------------- 2 files changed, 61 insertions(+), 34 deletions(-) diff --git a/src/blosc2/core.py b/src/blosc2/core.py index 1a4c4df3..e521f221 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -1557,7 +1557,8 @@ def schunk_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.SChunk >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> serialized_schunk = schunk.to_cframe() >>> print(f"Serialized SChunk length: {len(serialized_schunk)} bytes") Serialized SChunk length: 14129 bytes diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index b77d68df..3d44c876 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -175,8 +175,14 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): Examples -------- >>> import blosc2 - >>> storage = {"contiguous": True, "cparams": {}, "dparams": {}} - >>> schunk = blosc2.SChunk(**storage) + >>> import numpy as np + >>> import os.path + >>> import shutil + >>> import tempfile + >>> cparams = blosc2.CParams() + >>> dparams = blosc2.DParams() + >>> storage = blosc2.Storage(contiguous=True) + >>> schunk = blosc2.SChunk(cparams=cparams, dparams=dparams, storage=storage) In the following, we will write and read a super-chunk to and from disk via memory-mapped files. @@ -184,7 +190,8 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): >>> a = np.arange(3, dtype=np.int64) >>> chunksize = a.size * a.itemsize >>> n_chunks = 2 - >>> urlpath = getfixture('tmp_path') / "schunk.b2frame" + >>> tmpdirname = tempfile.mkdtemp() + >>> urlpath = os.path.join(tmpdirname, 'schunk.b2frame') Optional: we intend to write 2 chunks of 24 bytes each, and we expect the compressed size to be smaller than the original size. Hence, we @@ -214,6 +221,7 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): [0, 1, 2] >>> np.frombuffer(schunk_mmap.decompress_chunk(1), dtype=np.int64).tolist() [0, 2, 4] + >>> shutil.rmtree(tmpdirname) """ # Check only allowed kwarg are passed allowed_kwargs = [ @@ -452,13 +460,15 @@ def fill_special( >>> # Measure the time to create SChunk from a NumPy array >>> t0 = time.time() >>> data = np.full(nitems, np.pi, dtype) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": dtype.itemsize}) + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> t = (time.time() - t0) * 1000. >>> f"Time creating a schunk with a numpy array: {t:10.3f} ms" Time creating a schunk with a numpy array: 710.273 ms >>> # Measure the time to create SChunk using fill_special >>> t0 = time.time() - >>> schunk = blosc2.SChunk(cparams={"typesize": dtype.itemsize}) + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> schunk = blosc2.SChunk(cparams=cparams) >>> schunk.fill_special(nitems, blosc2.SpecialValue.VALUE, np.pi) >>> t = (time.time() - t0) * 1000. >>> f"Time passing directly the value to `fill_special`: {t:10.3f} ms" @@ -503,7 +513,8 @@ def decompress_chunk(self, nchunk: int, dst: object = None) -> str | bytes: Examples -------- >>> import blosc2 - >>> schunk = blosc2.SChunk(cparams={'typesize': 1}) + >>> cparams = blosc2.CParams(typesize=1) + >>> schunk = blosc2.SChunk(cparams=cparams) >>> buffer = b"wermqeoir23" >>> schunk.append_data(buffer) 1 @@ -542,7 +553,8 @@ def get_chunk(self, nchunk: int) -> bytes: >>> # Create an SChunk with 3 chunks >>> nchunks = 3 >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Retrieve the first chunk (index 0) >>> chunk = schunk.get_chunk(0) >>> # Check the type and length of the compressed chunk @@ -578,7 +590,8 @@ def delete_chunk(self, nchunk: int) -> int: >>> # Create an SChunk with 3 chunks >>> nchunks = 3 >>> data = np.arange(200 * 1000 * nchunks, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, cparams=cparams) >>> # Check the number of chunks before deletion >>> schunk.nchunks 3 @@ -617,7 +630,8 @@ def insert_chunk(self, nchunk: int, chunk: bytes) -> int: >>> import numpy as np >>> # Create an SChunk with 2 chunks >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) >>> # Get a compressed chunk from the SChunk >>> chunk = schunk.get_chunk(0) >>> # Insert a chunk in the second position (index 1)" @@ -657,7 +671,8 @@ def insert_data(self, nchunk: int, data: object, copy: bool) -> int: >>> import numpy as np >>> # Create an SChunk with 2 chunks >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=200*1000*4, data=data, cparams=cparams) >>> # Create a new array to insert into the second chunk of the SChunk >>> new_data = np.arange(200 * 1000, dtype=np.int32) >>> # Insert the new data at position 1, compressing it @@ -696,7 +711,8 @@ def update_chunk(self, nchunk: int, chunk: bytes) -> int: >>> nchunks = 5 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) >>> f"Initial number of chunks: {schunk.nchunks}" Initial number of chunks: 5 >>> c_index = 1 @@ -739,7 +755,8 @@ def update_data(self, nchunk: int, data: object, copy: bool) -> int: >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) >>> f"Initial number of chunks: {schunk.nchunks}" Initial number of chunks: 4 >>> c_index = 1 # Update the 2nd chunk (index 1) @@ -796,7 +813,8 @@ def get_slice(self, start: int = 0, stop: int = None, out: object = None) -> str >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Define the slice parameters >>> start_index = 200 * 1000 >>> stop_index = 2 * 200 * 1000 @@ -851,7 +869,8 @@ def __getitem__(self, item: int | slice) -> str | bytes: >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(chunksize=chunk_size, data=data, cparams=cparams) >>> # Use __getitem__ to retrieve the same slice of data from the SChunk >>> res = schunk[150:155] >>> f"Slice data: {np.frombuffer(res, dtype=np.int32)}" @@ -905,7 +924,8 @@ def __setitem__(self, key: int | slice, value: object) -> None: >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Create a new array of values to update the slice (values from 1000 to 1999 multiplied by 2) >>> start_ = 1000 >>> stop = 2000 @@ -942,7 +962,8 @@ def to_cframe(self) -> bytes: >>> nchunks = 4 >>> chunk_size = 200 * 1000 * 4 >>> data = np.arange(nchunks * chunk_size // 4, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Serialize the SChunk instance to a bytes object >>> serialized_schunk = schunk.to_cframe() >>> f"Serialized SChunk length: {len(serialized_schunk)} bytes" @@ -981,7 +1002,8 @@ def iterchunks(self, dtype: np.dtype) -> Iterator[np.ndarray]: >>> import numpy as np >>> # Create sample data and an SChunk >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Iterate over chunks using the iterchunks method >>> for chunk in schunk.iterchunks(dtype=np.int32): >>> f"Chunk shape: {chunk.shape} " @@ -1031,7 +1053,8 @@ def iterchunks_info( >>> import numpy as np >>> # Create sample data and an SChunk >>> data = np.arange(400 * 1000, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, cparams={"typesize": 4}) + >>> cparams = blosc2.CParams(typesize=4) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams) >>> # Iterate over chunks and print detailed information >>> for chunk_info in schunk.iterchunks_info(): >>> f"Chunk index: {chunk_info.nchunk}" @@ -1099,10 +1122,9 @@ def postfilter(self, input_dtype: np.dtype, output_dtype: np.dtype = None) -> No # Create SChunk input_dtype = np.dtype(np.int64) - cparams = {"typesize": input_dtype.itemsize} - dparams = {"nthreads": 1} - storage = {"cparams": cparams, "dparams": dparams} - schunk = blosc2.SChunk(chunksize=20_000 * input_dtype.itemsize, **storage) + cparams = blosc2.CParams(typesize=input_dtype.itemsize) + dparams = blosc2.DParams(nthreads=1) + schunk = blosc2.SChunk(chunksize=20_000 * input_dtype.itemsize, cparams=cparams, dparams=dparams) # Create postfilter and associate it to the schunk @schunk.postfilter(input_dtype) @@ -1137,9 +1159,10 @@ def remove_postfilter(self, func_name: str, _new_ctx: bool = True) -> None: >>> import blosc2 >>> import numpy as np >>> dtype = np.dtype(np.int32) - >>> storage = {"cparams": {"typesize": dtype.itemsize}, "dparams": {"nthreads": 1}} + >>> cparams = blosc2.CParams(typesize=dtype.itemsize) + >>> dparams = blosc2.DParams(nthreads=1) >>> data = np.arange(500, dtype=np.int32) - >>> schunk = blosc2.SChunk(data=data, **storage) + >>> schunk = blosc2.SChunk(data=data, cparams=cparams, dparams=dparams) >>> # Define the postfilter function >>> @schunk.postfilter(dtype) >>> def postfilter(input, output, offset): @@ -1197,10 +1220,9 @@ def filler(self, inputs_tuple: tuple[tuple], schunk_dtype: np.dtype, nelem: int # Set the compression and decompression parameters schunk_dtype = np.dtype(np.float64) - cparams = {"typesize": schunk_dtype.itemsize, "nthreads": 1} - storage = {"cparams": cparams} + cparams = blosc2.CParams(typesize=schunk_dtype.itemsize, nthreads=1) # Create empty SChunk - schunk = blosc2.SChunk(chunksize=20_000 * schunk_dtype.itemsize, **storage) + schunk = blosc2.SChunk(chunksize=20_000 * schunk_dtype.itemsize, cparams=cparams) # Create operands op_dtype = np.dtype(np.int32) @@ -1278,7 +1300,7 @@ def prefilter(self, input_dtype: np.dtype, output_dtype: np.dtype = None) -> Non # Set the compression and decompression parameters input_dtype = np.dtype(np.int32) output_dtype = np.dtype(np.float32) - cparams = {"typesize": output_dtype.itemsize, "nthreads": 1} + cparams = blosc2.CParams(typesize=output_dtype.itemsize, nthreads=1) # Create schunk schunk = blosc2.SChunk(chunksize=200 * 1000 * input_dtype.itemsize, cparams=cparams) @@ -1316,7 +1338,7 @@ def remove_prefilter(self, func_name: str, _new_ctx: bool = True) -> None: >>> import blosc2 >>> import numpy as np >>> dtype = np.dtype(np.int32) - >>> cparams = {"typesize": dtype.itemsize, "nthreads": 1} + >>> cparams = blosc2.CParams(typesize=dtype.itemsize, nthreads=1) >>> data = np.arange(1000, dtype=np.int32) >>> output_dtype = np.float32 >>> schunk = blosc2.SChunk(cparams=cparams) @@ -1401,15 +1423,19 @@ def open( -------- >>> import blosc2 >>> import numpy as np - >>> storage = {"contiguous": True, "urlpath": getfixture('tmp_path') / "b2frame", "mode": "w"} + >>> import os + >>> import tempfile + >>> tmpdirname = tempfile.mkdtemp() + >>> urlpath = os.path.join(tmpdirname, 'b2frame') + >>> storage = blosc2.Storage(contiguous=True, urlpath=urlpath, mode="w") >>> nelem = 20 * 1000 >>> nchunks = 5 >>> chunksize = nelem * 4 // nchunks >>> data = np.arange(nelem, dtype="int32") >>> # Create SChunk and append data - >>> schunk = blosc2.SChunk(chunksize=chunksize, data=data.tobytes(), **storage) + >>> schunk = blosc2.SChunk(chunksize=chunksize, data=data.tobytes(), storage=storage) >>> # Open SChunk - >>> sc_open = blosc2.open(urlpath=storage["urlpath"]) + >>> sc_open = blosc2.open(urlpath=urlpath) >>> for i in range(nchunks): ... dest = np.empty(nelem // nchunks, dtype=data.dtype) ... schunk.decompress_chunk(i, dest) @@ -1424,7 +1450,7 @@ def open( To open the same schunk memory-mapped, we simply need to pass the `mmap_mode` parameter: - >>> sc_open_mmap = blosc2.open(urlpath=storage["urlpath"], mmap_mode="r") + >>> sc_open_mmap = blosc2.open(urlpath=urlpath, mmap_mode="r") >>> sc_open.nchunks == sc_open_mmap.nchunks True >>> all(sc_open.decompress_chunk(i, dest1) == sc_open_mmap.decompress_chunk(i, dest1) for i in range(nchunks)) From c71f8c6b60accaf52a9a2969af6f44a0098a9bcd Mon Sep 17 00:00:00 2001 From: oumaima-ech-chdig Date: Thu, 10 Oct 2024 13:45:15 +0200 Subject: [PATCH 4/4] All the examples are up to date --- bench/get_slice.py | 8 ++++---- bench/ndarray/compare_getslice.py | 16 ++++++++-------- bench/ndarray/copy_postfilter.py | 3 ++- bench/ndarray/transcode_data.py | 6 +++--- bench/set_slice.py | 8 ++++---- bench/sum_postfilter.py | 9 ++++----- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/bench/get_slice.py b/bench/get_slice.py index 2c483014..db1f2485 100644 --- a/bench/get_slice.py +++ b/bench/get_slice.py @@ -22,8 +22,8 @@ nchunks = shape // chunksize # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.BLOSCLZ, "typesize": 8, "blocksize": blocksize * 8} -dparams = {} +cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) +dparams = blosc2.DParams() contiguous = True persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False @@ -32,11 +32,11 @@ else: urlpath = None -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) # Create the empty SChunk -schunk = blosc2.SChunk(chunksize=chunksize * cparams["typesize"], **storage) +schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) # Append some chunks for i in range(nchunks): diff --git a/bench/ndarray/compare_getslice.py b/bench/ndarray/compare_getslice.py index 7438e035..e3d09fee 100644 --- a/bench/ndarray/compare_getslice.py +++ b/bench/ndarray/compare_getslice.py @@ -62,14 +62,14 @@ cname = "zstd" nthreads = 8 filter = blosc2.Filter.SHUFFLE -cparams = { - "codec": blosc2.Codec.ZSTD, - "clevel": clevel, - "filters": [filter], - "filters_meta": [0], - "nthreads": nthreads, -} -dparams = {"nthreads": nthreads} +cparams = blosc2.CParams( + codec=blosc2.Codec.ZSTD, + clevel=clevel, + filters=[filter], + filters_meta=[0], + nthreads=nthreads, +) +dparams = blosc2.DParams(nthreads=nthreads) zfilter = numcodecs.Blosc.SHUFFLE blocksize = int(np.prod(blocks)) if blocks else 0 diff --git a/bench/ndarray/copy_postfilter.py b/bench/ndarray/copy_postfilter.py index fb30aeb6..236291ec 100644 --- a/bench/ndarray/copy_postfilter.py +++ b/bench/ndarray/copy_postfilter.py @@ -18,11 +18,12 @@ dtype = np.dtype(np.int32) # Set the compression and decompression parameters -dparams = {"nthreads": 1} +dparams = {"nthreads" : 1} # Create array arr = blosc2.empty(shape=(nchunks * chunkshape,), chunks=(chunkshape,), dtype=dtype, dparams=dparams) data = np.arange(chunkshape, dtype=dtype) + t0 = time() for i in range(nchunks): arr[i * chunkshape : (i + 1) * chunkshape] = data diff --git a/bench/ndarray/transcode_data.py b/bench/ndarray/transcode_data.py index 3a02a79d..f4c62bd8 100644 --- a/bench/ndarray/transcode_data.py +++ b/bench/ndarray/transcode_data.py @@ -69,9 +69,9 @@ }, } -dparams = { - "nthreads": nthreads_decomp, -} +dparams = blosc2.DParams( + nthreads=nthreads_decomp, +) dir_path = Path(dir_path) if not dir_path.is_dir(): diff --git a/bench/set_slice.py b/bench/set_slice.py index 95519cc8..65b3ad7f 100644 --- a/bench/set_slice.py +++ b/bench/set_slice.py @@ -22,8 +22,8 @@ nchunks = shape // chunksize # Set the compression and decompression parameters -cparams = {"codec": blosc2.Codec.BLOSCLZ, "typesize": 8, "blocksize": blocksize * 8} -dparams = {} +cparams = blosc2.CParams(codec=blosc2.Codec.BLOSCLZ, typesize=8, blocksize=blocksize * 8) +dparams = blosc2.DParams() contiguous = True persistent = bool(sys.argv[1]) if len(sys.argv) > 1 else False @@ -32,11 +32,11 @@ else: urlpath = None -storage = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams, "dparams": dparams} +storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) # Create the empty SChunk -schunk = blosc2.SChunk(chunksize=chunksize * cparams["typesize"], **storage) +schunk = blosc2.SChunk(chunksize=chunksize * cparams.typesize, storage=storage, cparams=cparams, dparams=dparams) # Append some chunks for i in range(nchunks): diff --git a/bench/sum_postfilter.py b/bench/sum_postfilter.py index 8a892c08..afca4397 100644 --- a/bench/sum_postfilter.py +++ b/bench/sum_postfilter.py @@ -19,13 +19,12 @@ chunksize = chunkshape * dtype.itemsize # Set the compression and decompression parameters -cparams = {"typesize": 4, "nthreads": 1} -dparams = {"nthreads": 1} -storage = {"cparams": cparams, "dparams": dparams} +cparams = blosc2.CParams(typesize=4, nthreads=1) +dparams = blosc2.DParams(nthreads=1) # Create super-chunks -schunk0 = blosc2.SChunk(chunksize=chunksize, **storage) -schunk = blosc2.SChunk(chunksize=chunksize, **storage) +schunk0 = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) +schunk = blosc2.SChunk(chunksize=chunksize, cparams=cparams, dparams=dparams) data = np.arange(chunkshape, dtype=dtype) t0 = time()