Skip to content

Commit a6d0f80

Browse files
committed
unsafe_zip -> zip_no_broadcast; check equal offsets at runtime with actual data
1 parent 406de57 commit a6d0f80

File tree

3 files changed

+42
-22
lines changed

3 files changed

+42
-22
lines changed

src/awkward/operations/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@
109109
from awkward.operations.ak_transform import *
110110
from awkward.operations.ak_type import *
111111
from awkward.operations.ak_unflatten import *
112-
from awkward.operations.ak_unsafe_zip import *
113112
from awkward.operations.ak_unzip import *
114113
from awkward.operations.ak_validity_error import *
115114
from awkward.operations.ak_values_astype import *
@@ -124,3 +123,4 @@
124123
from awkward.operations.ak_without_parameters import *
125124
from awkward.operations.ak_zeros_like import *
126125
from awkward.operations.ak_zip import *
126+
from awkward.operations.ak_zip_no_broadcast import *

src/awkward/operations/ak_unsafe_zip.py renamed to src/awkward/operations/ak_zip_no_broadcast.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111
from awkward._namedaxis import _get_named_axis, _unify_named_axis
1212
from awkward._nplikes.numpy_like import NumpyMetadata
1313

14-
__all__ = ("unsafe_zip",)
14+
__all__ = ("zip_no_broadcast",)
1515

1616
np = NumpyMetadata.instance()
1717

1818

1919
@high_level_function()
20-
def unsafe_zip(
20+
def zip_no_broadcast(
2121
arrays,
2222
*,
2323
parameters=None,
@@ -46,7 +46,7 @@ def unsafe_zip(
4646
of records or the slots of a collection of tuples.
4747
4848
Caution: unlike #ak.zip this function will _not_ broadcast the arrays together.
49-
It assumes that the given arrays have already the same layouts and lengths.
49+
During typetracing, it assumes that the given arrays have already the same layouts and lengths.
5050
5151
This operation may be thought of as the opposite of projection in
5252
#ak.Array.__getitem__, which extracts fields one at a time, or
@@ -60,7 +60,7 @@ def unsafe_zip(
6060
Zipping them together using a dict creates a collection of records with
6161
the same nesting structure as `one` and `two`.
6262
63-
>>> ak.unsafe_zip({"x": one, "y": two}).show()
63+
>>> ak.zip_no_broadcast({"x": one, "y": two}).show()
6464
[[{x: 1.1, y: 'a'}, {x: 2.2, y: 'b'}, {x: 3.3, y: 'c'}],
6565
[],
6666
[{x: 4.4, y: 'd'}],
@@ -153,7 +153,6 @@ def _impl(
153153
parameters["__record__"] = with_name
154154

155155
# only allow all NumpyArrays and ListOffsetArrays
156-
# maybe this could be done recursively, but for now just check the top level. This is also how ak.zip works.
157156
if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts):
158157
length = _check_equal_lengths(layouts)
159158
out = ak.contents.RecordArray(
@@ -162,14 +161,35 @@ def _impl(
162161
elif all(isinstance(layout, ak.contents.ListOffsetArray) for layout in layouts):
163162
contents = []
164163
for layout in layouts:
164+
# get the content of the ListOffsetArray
165165
if not isinstance(layout.content, ak.contents.NumpyArray):
166166
raise ValueError(
167167
"can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents"
168168
)
169169
contents.append(layout.content)
170-
# just get from the first one
171-
offsets = layouts[0].offsets
172-
length = _check_equal_lengths([layout.content for layout in layouts])
170+
171+
if backend.name == "typetracer":
172+
# just get from the first one
173+
# we're in typetracer mode, so we can't check the offsets (see else branch)
174+
offsets = layouts[0].offsets
175+
else:
176+
# this is at 'runtime' with actual data, that means we can check the offsets,
177+
# but only those that have actual data, i.e. no PlaceholderArrays
178+
# so first, let's filter out any PlaceholderArrays
179+
comparable_offsets = filter(
180+
lambda o: not isinstance(o, ak._nplikes.placeholder.PlaceholderArray),
181+
(layout.offsets for layout in layouts),
182+
)
183+
# check that offsets are the same
184+
first = next(comparable_offsets)
185+
if not all(
186+
first.nplike.all(offsets.data == first.data)
187+
for offsets in comparable_offsets
188+
):
189+
raise ValueError("all ListOffsetArrays must have the same offsets")
190+
offsets = first
191+
192+
length = _check_equal_lengths(contents)
173193
out = ak.contents.ListOffsetArray(
174194
offsets=offsets,
175195
content=ak.contents.RecordArray(
@@ -193,10 +213,10 @@ def _impl(
193213

194214

195215
def _check_equal_lengths(
196-
layouts: ak.contents.Content,
216+
contents: ak.contents.Content,
197217
) -> int | ak._nplikes.shape.UnknownLength:
198-
length = layouts[0].length
199-
for layout in layouts:
218+
length = contents[0].length
219+
for layout in contents:
200220
if layout.length != length:
201221
raise ValueError("all arrays must have the same length")
202222
return length

tests/test_3390_ak_unsafe_zip.py renamed to tests/test_3390_ak_zip_no_broadcast.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,31 @@
66
import awkward as ak
77

88

9-
def test_ak_unsafe_zip_NumpyArray_dict():
9+
def test_ak_zip_no_broadcast_NumpyArray_dict():
1010
a = ak.Array([1])
1111
b = ak.Array([2])
12-
c = ak.unsafe_zip({"a": a, "b": b})
12+
c = ak.zip_no_broadcast({"a": a, "b": b})
1313
assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
1414

1515

16-
def test_ak_unsafe_zip_ListOffsetArray_dict():
16+
def test_ak_zip_no_broadcast_ListOffsetArray_dict():
1717
a = ak.Array([[1], []])
1818
b = ak.Array([[2], []])
19-
c = ak.unsafe_zip({"a": a, "b": b})
19+
c = ak.zip_no_broadcast({"a": a, "b": b})
2020
assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
2121

2222

23-
def test_ak_unsafe_zip_NumpyArray_list():
23+
def test_ak_zip_no_broadcast_NumpyArray_list():
2424
a = ak.Array([1])
2525
b = ak.Array([2])
26-
c = ak.unsafe_zip([a, b])
26+
c = ak.zip_no_broadcast([a, b])
2727
assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
2828

2929

30-
def test_ak_unsafe_zip_ListOffsetArray_list():
30+
def test_ak_zip_no_broadcast_ListOffsetArray_list():
3131
a = ak.Array([[1], []])
3232
b = ak.Array([[2], []])
33-
c = ak.unsafe_zip([a, b])
33+
c = ak.zip_no_broadcast([a, b])
3434
assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
3535

3636

@@ -41,7 +41,7 @@ def test_typetracer_NumpyArray_non_touching():
4141
tracer.layout.form_with_key(), highlevel=True
4242
)
4343

44-
_ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
44+
_ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer})
4545
assert len(report.shape_touched) == 1
4646
assert len(report.data_touched) == 0
4747

@@ -53,6 +53,6 @@ def test_typetracer_ListOffsetArray_non_touching():
5353
tracer.layout.form_with_key(), highlevel=True
5454
)
5555

56-
_ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
56+
_ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer})
5757
assert len(report.shape_touched) == 1
5858
assert len(report.data_touched) == 0

0 commit comments

Comments
 (0)