Skip to content

Commit

Permalink
feat: expose simplification from ak.from_buffers (#2713)
Browse files Browse the repository at this point in the history
* feat: expose simplification from `ak.from_buffers`

* chore: rename test
  • Loading branch information
agoose77 authored Sep 18, 2023
1 parent 48bb0a1 commit 9ffbe0c
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/awkward/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2515,16 +2515,16 @@ def snapshot(self):
form = ak.forms.from_json(formstr)

with ak._errors.OperationErrorContext("ak.ArrayBuilder.snapshot", [], {}):
return ak.operations.ak_from_buffers._impl(
return ak.operations.from_buffers(
form,
length,
container,
buffer_key="{form_key}-{attribute}",
backend="cpu",
byteorder=ak._util.native_byteorder,
allow_noncanonical_form=True,
highlevel=True,
behavior=self._behavior,
simplify=True,
)

def null(self):
Expand Down
19 changes: 18 additions & 1 deletion src/awkward/operations/ak_from_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def from_buffers(
*,
backend="cpu",
byteorder="<",
allow_noncanonical_form=False,
highlevel=True,
behavior=None,
):
Expand All @@ -49,6 +50,9 @@ def from_buffers(
byteorder (`"<"`, `">"`): Endianness of buffers read from `container`.
If the byteorder does not match the current system byteorder, the
arrays will be copied.
allow_noncanonical_form (bool): If True, non-canonical forms will be
simplified to produce arrays with canonical layouts; otherwise,
an exception will be thrown for such forms.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.contents.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
Expand All @@ -73,6 +77,19 @@ def from_buffers(
The `buffer_key` should be the same as the one used in #ak.to_buffers.
When `allow_noncanonical_form` is set to True, this function readily accepts
non-simplified forms, i.e. forms which will be simplified by Awkward Array
into "canonical" representations, e.g. `option[option[...]]` → `option[...]`.
Such forms can be produced by the low-level ArrayBuilder `snapshot()` method.
Given that Awkward Arrays must have canonical layouts, it follows that
invoking this function with `allow_noncanonical_form` may produce arrays
whose forms differ to the input form.
In order for a non-simplified form to be considered valid, it should be one
that the #ak.contents.Content layout classes could produce iff. the
simplification rules were removed.
See #ak.to_buffers for examples.
"""
return _impl(
Expand All @@ -84,7 +101,7 @@ def from_buffers(
byteorder,
highlevel,
behavior,
False,
allow_noncanonical_form,
)


Expand Down
126 changes: 126 additions & 0 deletions tests/test_2713_from_buffers_allow_noncanonical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

import numpy as np
import pytest # noqa: F401

import awkward as ak


def test_union_simplification():
array = ak.Array(
ak.contents.UnionArray(
ak.index.Index8(np.arange(64, dtype=np.int8) % 2),
ak.index.Index64(np.arange(64, dtype=np.int64) // 2),
[
ak.contents.RecordArray(
[ak.contents.NumpyArray(np.arange(64, dtype=np.int64))], ["x"]
),
ak.contents.RecordArray(
[
ak.contents.NumpyArray(np.arange(64, dtype=np.int64)),
ak.contents.NumpyArray(np.arange(64, dtype=np.int8)),
],
["x", "y"],
),
],
)
)

form, length, container = ak.to_buffers(array)

assert form.to_dict() == {
"class": "UnionArray",
"tags": "i8",
"index": "i64",
"contents": [
{
"class": "RecordArray",
"fields": ["x"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"parameters": {},
"form_key": "node2",
}
],
"parameters": {},
"form_key": "node1",
},
{
"class": "RecordArray",
"fields": ["x", "y"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"parameters": {},
"form_key": "node4",
},
{
"class": "NumpyArray",
"primitive": "int8",
"inner_shape": [],
"parameters": {},
"form_key": "node5",
},
],
"parameters": {},
"form_key": "node3",
},
],
"parameters": {},
"form_key": "node0",
}

projected_form = {
"class": "UnionArray",
"tags": "i8",
"index": "i64",
"contents": [
{
"class": "RecordArray",
"fields": ["x"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"parameters": {},
"form_key": "node2",
}
],
"parameters": {},
"form_key": "node1",
},
{
"class": "RecordArray",
"fields": ["x"],
"contents": [
{
"class": "NumpyArray",
"primitive": "int64",
"inner_shape": [],
"parameters": {},
"form_key": "node4",
}
],
"parameters": {},
"form_key": "node3",
},
],
"parameters": {},
"form_key": "node0",
}
container.pop("node5-data")
projected = ak.from_buffers(
projected_form, length, container, allow_noncanonical_form=True
)
assert projected.layout.form.to_dict(verbose=False) == {
"class": "IndexedArray",
"index": "i64",
"content": {"class": "RecordArray", "fields": ["x"], "contents": ["int64"]},
}
assert ak.almost_equal(array[["x"]], projected)

0 comments on commit 9ffbe0c

Please sign in to comment.