From 9ffbe0c00d6fa75075f9b7815a8e50e92872f40b Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Mon, 18 Sep 2023 21:15:05 +0200 Subject: [PATCH] feat: expose simplification from `ak.from_buffers` (#2713) * feat: expose simplification from `ak.from_buffers` * chore: rename test --- src/awkward/highlevel.py | 4 +- src/awkward/operations/ak_from_buffers.py | 19 ++- ...st_2713_from_buffers_allow_noncanonical.py | 126 ++++++++++++++++++ 3 files changed, 146 insertions(+), 3 deletions(-) create mode 100644 tests/test_2713_from_buffers_allow_noncanonical.py diff --git a/src/awkward/highlevel.py b/src/awkward/highlevel.py index 7834e00c3d..4cde8e2b17 100644 --- a/src/awkward/highlevel.py +++ b/src/awkward/highlevel.py @@ -2515,16 +2515,16 @@ def snapshot(self): form = ak.forms.from_json(formstr) with ak._errors.OperationErrorContext("ak.ArrayBuilder.snapshot", [], {}): - return ak.operations.ak_from_buffers._impl( + return ak.operations.from_buffers( form, length, container, buffer_key="{form_key}-{attribute}", backend="cpu", byteorder=ak._util.native_byteorder, + allow_noncanonical_form=True, highlevel=True, behavior=self._behavior, - simplify=True, ) def null(self): diff --git a/src/awkward/operations/ak_from_buffers.py b/src/awkward/operations/ak_from_buffers.py index 3711e844e4..730b4127d3 100644 --- a/src/awkward/operations/ak_from_buffers.py +++ b/src/awkward/operations/ak_from_buffers.py @@ -26,6 +26,7 @@ def from_buffers( *, backend="cpu", byteorder="<", + allow_noncanonical_form=False, highlevel=True, behavior=None, ): @@ -49,6 +50,9 @@ def from_buffers( byteorder (`"<"`, `">"`): Endianness of buffers read from `container`. If the byteorder does not match the current system byteorder, the arrays will be copied. + allow_noncanonical_form (bool): If True, non-canonical forms will be + simplified to produce arrays with canonical layouts; otherwise, + an exception will be thrown for such forms. highlevel (bool): If True, return an #ak.Array; otherwise, return a low-level #ak.contents.Content subclass. behavior (None or dict): Custom #ak.behavior for the output array, if @@ -73,6 +77,19 @@ def from_buffers( The `buffer_key` should be the same as the one used in #ak.to_buffers. + When `allow_noncanonical_form` is set to True, this function readily accepts + non-simplified forms, i.e. forms which will be simplified by Awkward Array + into "canonical" representations, e.g. `option[option[...]]` → `option[...]`. + Such forms can be produced by the low-level ArrayBuilder `snapshot()` method. + Given that Awkward Arrays must have canonical layouts, it follows that + invoking this function with `allow_noncanonical_form` may produce arrays + whose forms differ to the input form. + + In order for a non-simplified form to be considered valid, it should be one + that the #ak.contents.Content layout classes could produce iff. the + simplification rules were removed. + + See #ak.to_buffers for examples. """ return _impl( @@ -84,7 +101,7 @@ def from_buffers( byteorder, highlevel, behavior, - False, + allow_noncanonical_form, ) diff --git a/tests/test_2713_from_buffers_allow_noncanonical.py b/tests/test_2713_from_buffers_allow_noncanonical.py new file mode 100644 index 0000000000..6d7c44f820 --- /dev/null +++ b/tests/test_2713_from_buffers_allow_noncanonical.py @@ -0,0 +1,126 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import numpy as np +import pytest # noqa: F401 + +import awkward as ak + + +def test_union_simplification(): + array = ak.Array( + ak.contents.UnionArray( + ak.index.Index8(np.arange(64, dtype=np.int8) % 2), + ak.index.Index64(np.arange(64, dtype=np.int64) // 2), + [ + ak.contents.RecordArray( + [ak.contents.NumpyArray(np.arange(64, dtype=np.int64))], ["x"] + ), + ak.contents.RecordArray( + [ + ak.contents.NumpyArray(np.arange(64, dtype=np.int64)), + ak.contents.NumpyArray(np.arange(64, dtype=np.int8)), + ], + ["x", "y"], + ), + ], + ) + ) + + form, length, container = ak.to_buffers(array) + + assert form.to_dict() == { + "class": "UnionArray", + "tags": "i8", + "index": "i64", + "contents": [ + { + "class": "RecordArray", + "fields": ["x"], + "contents": [ + { + "class": "NumpyArray", + "primitive": "int64", + "inner_shape": [], + "parameters": {}, + "form_key": "node2", + } + ], + "parameters": {}, + "form_key": "node1", + }, + { + "class": "RecordArray", + "fields": ["x", "y"], + "contents": [ + { + "class": "NumpyArray", + "primitive": "int64", + "inner_shape": [], + "parameters": {}, + "form_key": "node4", + }, + { + "class": "NumpyArray", + "primitive": "int8", + "inner_shape": [], + "parameters": {}, + "form_key": "node5", + }, + ], + "parameters": {}, + "form_key": "node3", + }, + ], + "parameters": {}, + "form_key": "node0", + } + + projected_form = { + "class": "UnionArray", + "tags": "i8", + "index": "i64", + "contents": [ + { + "class": "RecordArray", + "fields": ["x"], + "contents": [ + { + "class": "NumpyArray", + "primitive": "int64", + "inner_shape": [], + "parameters": {}, + "form_key": "node2", + } + ], + "parameters": {}, + "form_key": "node1", + }, + { + "class": "RecordArray", + "fields": ["x"], + "contents": [ + { + "class": "NumpyArray", + "primitive": "int64", + "inner_shape": [], + "parameters": {}, + "form_key": "node4", + } + ], + "parameters": {}, + "form_key": "node3", + }, + ], + "parameters": {}, + "form_key": "node0", + } + container.pop("node5-data") + projected = ak.from_buffers( + projected_form, length, container, allow_noncanonical_form=True + ) + assert projected.layout.form.to_dict(verbose=False) == { + "class": "IndexedArray", + "index": "i64", + "content": {"class": "RecordArray", "fields": ["x"], "contents": ["int64"]}, + } + assert ak.almost_equal(array[["x"]], projected)