Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LazyArray examples #257

Merged
merged 5 commits into from
Sep 17, 2024
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,27 @@ def eval(self, item, **kwargs):
* If self is a LazyArray from an udf, the kwargs used to store the resulting
array will be the ones passed to the constructor in :func:`lazyudf` (except the
`urlpath`) updated with the kwargs passed when calling this method.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future, you can change these kind of lines, for size = np.prod(shape)

>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1
>>> output = expr.eval()
>>> f"Result of a + b (lazy evaluation): {output[:]}"
Result of a + b (lazy evaluation):
[[ 0. 1.25 2.5 ]
[ 3.75 5. 6.25]
[ 7.5 8.75 10. ]]
"""
pass

Expand All @@ -103,6 +124,26 @@ def __getitem__(self, item):
-------
out: np.ndarray
An array with the data containing the slice evaluated.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [30, 4]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> # Convert numpy arrays to Blosc2 arrays
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Perform the mathematical operation
>>> expr = a1 + b1 # LazyExpr expression
>>> expr[3]
[2.01680672 2.18487395 2.35294118 2.5210084 ]
>>> expr[2:4]
[[1.34453782 1.51260504 1.68067227 1.8487395 ]
[2.01680672 2.18487395 2.35294118 2.5210084 ]]
"""
pass

Expand All @@ -128,6 +169,28 @@ def save(self, **kwargs):
if its source is a :ref:`SChunk`, :ref:`NDArray` or a :ref:`C2Array` (see :func:`blosc2.open` notes
section for more info).
* This is currently only supported for :ref:`LazyExpr`.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> # Define file paths for storing the arrays
>>> a1 = blosc2.asarray(a, urlpath='a_array.b2nd', mode='w')
>>> b1 = blosc2.asarray(b, urlpath='b_array.b2nd', mode='w')
>>> # Perform the mathematical operation to create a LazyExpr expression
>>> expr = a1 + b1
>>> # Save the LazyExpr to disk
>>> expr.save(urlpath='lazy_array.b2nd', mode='w')
>>> # Open and load the LazyExpr from disk
>>> disk_expr = blosc2.open('lazy_array.b2nd')
>>> disk_expr[:2]
[[0. 1.25 2.5 ]
[3.75 5. 6.25]]
"""
pass

Expand Down Expand Up @@ -1904,6 +1967,30 @@ def lazyudf(func, inputs, dtype, chunked_eval=True, **kwargs):
out: :ref:`LazyUDF`
A :ref:`LazyUDF` is returned.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 10, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(10, 20, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> b1 = blosc2.asarray(b)
>>> # Define a user-defined function that will be applied to each block of data
>>> def my_function(inputs_tuple, output, offset):
>>> a, b = inputs_tuple
>>> output[:] = a + b
>>> # Create a LazyUDF object using the user-defined function
>>> lazy_udf = blosc2.lazyudf(my_function, [a1, b1], dtype)
>>> type(lazy_udf)
<class 'blosc2.lazyexpr.LazyUDF'>
>>> f"Result of LazyUDF evaluation: {lazy_udf[:]}"
Result of LazyUDF evaluation:
[[10. 12.5 15. ]
[17.5 20. 22.5]
[25. 27.5 30. ]]
"""
return LazyUDF(func, inputs, dtype, chunked_eval, **kwargs)

Expand Down Expand Up @@ -1933,6 +2020,31 @@ def lazyexpr(expression, operands=None, out=None, where=None):
out: :ref:`LazyExpr`
A :ref:`LazyExpr` is returned.

Examples
--------
>>> import blosc2
>>> import numpy as np
>>> dtype = np.float64
>>> shape = [3, 3]
>>> size = shape[0] * shape[1]
>>> a = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> b = np.linspace(0, 5, num=size, dtype=dtype).reshape(shape)
>>> a1 = blosc2.asarray(a)
>>> a1[:]
[[0. 0.625 1.25 ]
[1.875 2.5 3.125]
[3.75 4.375 5. ]]
>>> b1 = blosc2.asarray(b)
>>> expr = 'a1 * b1 + 2'
>>> operands = { 'a': a1, 'b': b1 }
>>> lazy_expr = blosc2.lazyexpr(expr, operands=operands)
>>> f"Lazy expression created: {lazy_expr}"
Lazy expression created: a1 * b1 + 2
>>> expr_ = a1 * b1 + 2
>>> expr_[:]
[[ 2. 2.390625 3.5625 ]
[ 5.515625 8.25 11.765625]
[16.0625 21.140625 27. ]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be better to show the results of the lazyexpr created with the blosc2.lazyexpr lazy_expr instead of creating the same expression and showing it (expr_)

"""
if isinstance(expression, LazyExpr):
if operands is not None:
Expand Down
Loading