Skip to content

Commit

Permalink
fix: support placeholders in from_buffers (#2714)
Browse files Browse the repository at this point in the history
* fix: support unknown lengths in NumpyArray

* refactor: don't handle placeholder arrays in `nplike.frombuffer`

* fix: ensure that slicing enforces typetracer invariant

* fix: placeholders are 0-bytes

* fix: support unknown lengths in `from_buffers`

* chore: improve comment

* fix: always reshape buffers

* refactor: appease pylint

* test: add simple tests

* fix: ensure unions handle placeholders in either branch
  • Loading branch information
agoose77 authored Sep 18, 2023
1 parent 9ffbe0c commit 4124889
Show file tree
Hide file tree
Showing 6 changed files with 552 additions and 48 deletions.
10 changes: 3 additions & 7 deletions src/awkward/_nplikes/array_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,11 @@ def ascontiguousarray(self, x: ArrayLike) -> ArrayLike:
return self._module.ascontiguousarray(x)

def frombuffer(
self, buffer, *, dtype: np.dtype | None = None, count: int = -1
self, buffer, *, dtype: np.dtype | None = None, count: ShapeItem = -1
) -> ArrayLike:
if isinstance(buffer, PlaceholderArray):
if count == -1:
return self.asarray(buffer)
else:
return self.asarray(buffer[:count])
else:
return self._module.frombuffer(buffer, dtype=dtype, count=count)
raise TypeError("placeholder arrays are not supported in `frombuffer`")
return self._module.frombuffer(buffer, dtype=dtype, count=count)

def from_dlpack(self, x: Any) -> ArrayLike:
return self._module.from_dlpack(x)
Expand Down
30 changes: 23 additions & 7 deletions src/awkward/_nplikes/placeholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(
):
self._nplike = nplike
self._shape = shape
self._dtype = dtype
self._dtype = np.dtype(dtype)

@property
def dtype(self) -> np.dtype:
Expand All @@ -37,7 +37,7 @@ def size(self) -> int:

@property
def nbytes(self) -> int:
return self.size * self._dtype.itemsize
return 0

@property
def strides(self) -> tuple[int, ...]:
Expand Down Expand Up @@ -67,13 +67,29 @@ def view(self, dtype: dtype) -> Self:
return type(self)(self._nplike, shape, dtype)

def __getitem__(self, index):
# Typetracers permit slices that don't touch data or shapes
if isinstance(index, slice):
if self._shape[0] is unknown_length:
return type(self)(self._nplike, self._shape, self._dtype)
length = self._shape[0]

# Unknown-length placeholders should not be sliced (as their shapes would be touched(
if length is unknown_length:
raise AssertionError(
"placeholder arrays that are sliced should have known shapes"
)
# Known-length placeholders *always* need a known shape
elif (
index.start is unknown_length
or index.stop is unknown_length
or index.step is unknown_length
):
raise AssertionError(
"known-length placeholders should never encounter unknown lengths in slices"
)
else:
start, stop, step = index.indices(self._shape[0])
new_shape = ((stop - start) // step,)
return type(self)(self._nplike, new_shape, self._dtype)
start, stop, step = index.indices(length)
new_length = (stop - start) // step

return type(self)(self._nplike, (new_length,), self._dtype)
else:
raise TypeError(
f"{type(self).__name__} supports only trivial slices, not {type(index).__name__}"
Expand Down
5 changes: 4 additions & 1 deletion src/awkward/forms/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ def from_dict(input: Mapping) -> Form:

if input["class"] == "NumpyArray":
primitive = input["primitive"]
inner_shape = input.get("inner_shape", [])
inner_shape = tuple(
unknown_length if item is None else item
for item in input.get("inner_shape", [])
)
return ak.forms.NumpyForm(
primitive, inner_shape, parameters=parameters, form_key=form_key
)
Expand Down
6 changes: 5 additions & 1 deletion src/awkward/forms/numpyform.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import awkward as ak
from awkward._errors import deprecate
from awkward._nplikes.numpylike import NumpyMetadata
from awkward._nplikes.shape import unknown_length
from awkward._parameters import type_parameters_equal
from awkward._typing import JSONSerializable, Self, final
from awkward._util import UNSET
Expand Down Expand Up @@ -139,7 +140,10 @@ def _to_dict_part(self, verbose, toplevel):
"primitive": self._primitive,
}
if verbose or len(self._inner_shape) > 0:
out["inner_shape"] = list(self._inner_shape)
out["inner_shape"] = [
None if item is unknown_length else item
for item in self._inner_shape
]
return self._to_dict_extra(out, verbose)

@property
Expand Down
99 changes: 67 additions & 32 deletions src/awkward/operations/ak_from_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from awkward._dispatch import high_level_function
from awkward._layout import wrap_layout
from awkward._nplikes.numpy import Numpy
from awkward._nplikes.numpylike import NumpyMetadata
from awkward._nplikes.numpylike import ArrayLike, NumpyLike, NumpyMetadata
from awkward._nplikes.placeholder import PlaceholderArray
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._regularize import is_integer
from awkward.forms.form import index_to_dtype, regularize_buffer_key

Expand Down Expand Up @@ -140,11 +142,27 @@ def _impl(
return wrap_layout(out, behavior, highlevel)


def _from_buffer(nplike, buffer, dtype, count, byteorder):
if nplike.is_own_array(buffer):
def _from_buffer(
nplike: NumpyLike, buffer, dtype: np.dtype, count: ShapeItem, byteorder: str
) -> ArrayLike:
# Unknown-length information implies that we didn't load shape-buffers (offsets, etc)
# for the parent of this node. Thus, this node and its children *must* only
# contain placeholders
if count is unknown_length:
if not isinstance(buffer, PlaceholderArray):
raise AssertionError("Encountered unknown length for concrete buffer")
return PlaceholderArray(nplike, (unknown_length,), dtype)
# Known-length information implies that we should have known-length buffers here
# Therefore, placeholders without shape information are not permitted
elif isinstance(buffer, PlaceholderArray) or nplike.is_own_array(buffer):
# Require 1D buffers
array = nplike.reshape(buffer.view(dtype), shape=(-1,), copy=False)

# Require 1D
# Raise if the buffer we encountered isn't definitely-sized
if array.size is unknown_length:
raise AssertionError(
"Encountered unknown length for placeholder in context where length should be known"
)
if array.size < count:
raise TypeError(
f"size of array ({array.size}) is less than size of form ({count})"
Expand All @@ -168,9 +186,7 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
elif isinstance(form, ak.forms.NumpyForm):
dtype = ak.types.numpytype.primitive_to_dtype(form.primitive)
raw_array = container[getkey(form, "data")]
real_length = length
for x in form.inner_shape:
real_length *= x
real_length = length * math.prod(form.inner_shape)
data = _from_buffer(
backend.nplike,
raw_array,
Expand All @@ -179,10 +195,8 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
byteorder=byteorder,
)
if form.inner_shape != ():
if len(data) == 0:
data = backend.nplike.reshape(data, (length, *form.inner_shape))
else:
data = backend.nplike.reshape(data, (-1, *form.inner_shape))
data = backend.nplike.reshape(data, (length, *form.inner_shape))

return ak.contents.NumpyArray(
data, parameters=form._parameters, backend=backend
)
Expand All @@ -199,12 +213,15 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)

elif isinstance(form, ak.forms.BitMaskedForm):
raw_array = container[getkey(form, "mask")]
excess_length = int(math.ceil(length / 8.0))
if length is unknown_length:
next_length = unknown_length
else:
next_length = int(math.ceil(length / 8.0))
mask = _from_buffer(
backend.index_nplike,
raw_array,
dtype=index_to_dtype[form.mask],
count=excess_length,
count=next_length,
byteorder=byteorder,
)
content = _reconstitute(
Expand Down Expand Up @@ -255,9 +272,12 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
count=length,
byteorder=byteorder,
)
next_length = (
0 if len(index) == 0 else max(0, backend.index_nplike.max(index) + 1)
)
if isinstance(index, PlaceholderArray):
next_length = unknown_length
else:
next_length = (
0 if len(index) == 0 else max(0, backend.index_nplike.max(index) + 1)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
)
Expand All @@ -280,13 +300,16 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
count=length,
byteorder=byteorder,
)
next_length = (
0
if len(index) == 0
else backend.index_nplike.index_as_shape_item(
backend.index_nplike.max(index) + 1
if isinstance(index, PlaceholderArray):
next_length = unknown_length
else:
next_length = (
0
if len(index) == 0
else backend.index_nplike.index_as_shape_item(
backend.index_nplike.max(index) + 1
)
)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
)
Expand Down Expand Up @@ -317,8 +340,13 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
count=length,
byteorder=byteorder,
)
reduced_stops = stops[starts != stops]
next_length = 0 if len(starts) == 0 else backend.index_nplike.max(reduced_stops)
if isinstance(stops, PlaceholderArray):
next_length = unknown_length
else:
reduced_stops = stops[starts != stops]
next_length = (
0 if len(starts) == 0 else backend.index_nplike.max(reduced_stops)
)
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
)
Expand All @@ -338,7 +366,11 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
count=length + 1,
byteorder=byteorder,
)
next_length = 0 if len(offsets) == 1 else offsets[-1]

if isinstance(offsets, PlaceholderArray):
next_length = unknown_length
else:
next_length = 0 if len(offsets) == 1 else offsets[-1]
content = _reconstitute(
form.content, next_length, container, getkey, backend, byteorder, simplify
)
Expand Down Expand Up @@ -391,13 +423,16 @@ def _reconstitute(form, length, container, getkey, backend, byteorder, simplify)
count=length,
byteorder=byteorder,
)
lengths = []
for tag in range(len(form.contents)):
selected_index = index[tags == tag]
if len(selected_index) == 0:
lengths.append(0)
else:
lengths.append(backend.index_nplike.max(selected_index) + 1)
if isinstance(index, PlaceholderArray) or isinstance(tags, PlaceholderArray):
lengths = [unknown_length] * len(form.contents)
else:
lengths = []
for tag in range(len(form.contents)):
selected_index = index[tags == tag]
if len(selected_index) == 0:
lengths.append(0)
else:
lengths.append(backend.index_nplike.max(selected_index) + 1)
contents = [
_reconstitute(
content, lengths[i], container, getkey, backend, byteorder, simplify
Expand Down
Loading

0 comments on commit 4124889

Please sign in to comment.