Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LazyArray examples #257

Merged
merged 5 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,27 @@ def eval(self, item, **kwargs):
* If self is a LazyArray from an udf, the kwargs used to store the resulting
array will be the ones passed to the constructor in :func:`lazyudf` (except the
`urlpath`) updated with the kwargs passed when calling this method.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future, you can change these kind of lines, for size = np.prod(shape)

>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1
>>> output = expr.eval()
>>> f"Result of a + b (lazy evaluation): {output[:]}"
Result of a + b (lazy evaluation):
[[ 0. 1.25 2.5 ]
[ 3.75 5. 6.25]
[ 7.5 8.75 10. ]]
"""
pass

Expand All @@ -103,6 +124,26 @@ def __getitem__(self, item):
-------
out: np.ndarray
An array with the data containing the slice evaluated.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [30, 4]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1 # LazyExpr expression
>>> expr[3]
[2.01680672 2.18487395 2.35294118 2.5210084 ]
>>> expr[2:4]
[[1.34453782 1.51260504 1.68067227 1.8487395 ]
[2.01680672 2.18487395 2.35294118 2.5210084 ]]
"""
pass

Expand All @@ -128,6 +169,28 @@ def save(self, **kwargs):
if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes
section for more info).
* This is currently only supported for :ref:`LazyExpr`.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Define file paths for storing the arrays
>>> a1 = blosc2.asarray(a, urlpath='a_array.b2nd', mode='w')
>>> b1 = blosc2.asarray(b, urlpath='b_array.b2nd', mode='w')
>>> # Perform the mathematical operation to create a LazyExpr expression
>>> expr = a1 + b1
>>> # Save the LazyExpr to disk
>>> expr.save(urlpath='lazy_array.b2nd', mode='w')
>>> # Open and load the LazyExpr from disk
>>> disk_expr = blosc2.open('lazy_array.b2nd')
>>> disk_expr[:2]
[[0. 1.25 2.5 ]
[3.75 5. 6.25]]
"""
pass

Expand Down Expand Up @@ -1904,6 +1967,30 @@ def lazyudf(func, inputs, dtype, chunked_eval=True, **kwargs):
out: :ref:`LazyUDF`
A :ref:`LazyUDF` is returned.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(10, 20, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Define a user-defined function that will be applied to each block of data
>>> def my_function(inputs_tuple, output, offset):
>>> a, b = inputs_tuple
>>> output[:] = a + b
>>> # Create a LazyUDF object using the user-defined function
>>> lazy_udf = blosc2.lazyudf(my_function, [a1, b1], dtype)
>>> type(lazy_udf)
<class 'blosc2.lazyexpr.LazyUDF'>
>>> f"Result of LazyUDF evaluation: {lazy_udf[:]}"
Result of LazyUDF evaluation:
[[10. 12.5 15. ]
[17.5 20. 22.5]
[25. 27.5 30. ]]
"""
return LazyUDF(func, inputs, dtype, chunked_eval, **kwargs)

Expand Down Expand Up @@ -1933,6 +2020,30 @@ def lazyexpr(expression, operands=None, out=None, where=None):
out: :ref:`LazyExpr`
A :ref:`LazyExpr` is returned.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> a1[:]
[[0. 0.625 1.25 ]
[1.875 2.5 3.125]
[3.75 4.375 5. ]]
>>> b1 = blosc2.asarray(b)
>>> expr = 'a1 * b1 + 2'
>>> operands = { 'a': a1, 'b': b1 }
>>> lazy_expr = blosc2.lazyexpr(expr, operands=operands)
>>> f"Lazy expression created: {lazy_expr}"
Lazy expression created: a1 * b1 + 2
>>> lazy_expr[:]
[[ 2. 2.390625 3.5625 ]
[ 5.515625 8.25 11.765625]
[16.0625 21.140625 27. ]]
"""
if isinstance(expression, LazyExpr):
if operands is not None:
Expand Down
39 changes: 39 additions & 0 deletions src/blosc2/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,25 @@ def fetch(self, item=None):
-------
out: :ref:`NDArray` or :ref:`SChunk`
The local container used to cache the already requested data.

Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(20).reshape(10, 2)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> full_data = proxy.fetch()
>>> f"Full data cache: {full_data[:]}"
Full data cache:
[[ 0 1][ 2 3][ 4 5]
[ 6 7][ 8 9][10 11]
[12 13][14 15][16 17]
[18 19]]
>>> slice_data = proxy[0:2, :]
>>> f"Slice data cache: {slice_data}"
Slice data cache:
[[0 1][2 3]]
"""
if item is None:
# Full realization
Expand Down Expand Up @@ -198,6 +217,26 @@ def __getitem__(self, item):
-------
out: numpy.ndarray
An array with the data slice.

Examples
--------
>>> import numpy as np
>>> import blosc2
>>> data = np.arange(100).reshape(10, 10)
>>> ndarray = blosc2.asarray(data)
>>> proxy = blosc2.Proxy(ndarray)
>>> slice_1 = proxy[0:3, 0:3]
>>> f"Slice 1: {slice_1}"
Slice 1:
[[ 0 1 2]
[10 11 12]
[20 21 22]]
>>> slice_2 = proxy[5:8, 2:5]
>>> f"Slice 2: {slice_2}"
Slice 2:
[[52 53 54]
[62 63 64]
[72 73 74]]
"""
# Populate the cache
self.fetch(item)
Expand Down
Loading