From e497e2f462584246390ddc59b1f958bc931ce8c2 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Tue, 28 Jan 2025 16:31:14 -0500
Subject: [PATCH 1/6] feat: add a non-touching ak.zip, called 'ak.unsafe_zip'

---
 src/awkward/operations/__init__.py      |   1 +
 src/awkward/operations/ak_unsafe_zip.py | 192 ++++++++++++++++++++++++
 2 files changed, 193 insertions(+)
 create mode 100644 src/awkward/operations/ak_unsafe_zip.py

diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
index 91ebc9c184..49964b4066 100644
--- a/src/awkward/operations/__init__.py
+++ b/src/awkward/operations/__init__.py
@@ -109,6 +109,7 @@
 from awkward.operations.ak_transform import *
 from awkward.operations.ak_type import *
 from awkward.operations.ak_unflatten import *
+from awkward.operations.ak_unsafe_zip import *
 from awkward.operations.ak_unzip import *
 from awkward.operations.ak_validity_error import *
 from awkward.operations.ak_values_astype import *
diff --git a/src/awkward/operations/ak_unsafe_zip.py b/src/awkward/operations/ak_unsafe_zip.py
new file mode 100644
index 0000000000..3094e85e92
--- /dev/null
+++ b/src/awkward/operations/ak_unsafe_zip.py
@@ -0,0 +1,192 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from functools import reduce
+
+import awkward as ak
+from awkward._dispatch import high_level_function
+from awkward._layout import HighLevelContext, ensure_same_backend
+from awkward._namedaxis import _get_named_axis, _unify_named_axis
+from awkward._nplikes.numpy_like import NumpyMetadata
+
+__all__ = ("unsafe_zip",)
+
+np = NumpyMetadata.instance()
+
+
+@high_level_function()
+def unsafe_zip(
+    arrays,
+    *,
+    parameters=None,
+    with_name=None,
+    highlevel=True,
+    behavior=None,
+    attrs=None,
+):
+    """
+    Args:
+        arrays (mapping or sequence of arrays): Each value in this mapping or
+            sequence can be any array-like data that #ak.to_layout recognizes.
+        parameters (None or dict): Parameters for the new
+            #ak.contents.RecordArray node that is created by this operation.
+        with_name (None or str): Assigns a `"__record__"` name to the new
+            #ak.contents.RecordArray node that is created by this operation
+            (overriding `parameters`, if necessary).
+        highlevel (bool): If True, return an #ak.Array; otherwise, return
+            a low-level #ak.contents.Content subclass.
+        behavior (None or dict): Custom #ak.behavior for the output array, if
+            high-level.
+        attrs (None or dict): Custom attributes for the output array, if
+            high-level.
+
+    Combines `arrays` into a single structure as the fields of a collection
+    of records or the slots of a collection of tuples.
+
+    Caution: unlike #ak.zip this function will _not_ broadcast the arrays together.
+    It assumes that the given arrays have already the same layouts and lengths.
+
+    This operation may be thought of as the opposite of projection in
+    #ak.Array.__getitem__, which extracts fields one at a time, or
+    #ak.unzip, which extracts them all in one call.
+
+    Consider the following arrays, `one` and `two`.
+
+        >>> one = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5], [6.6]])
+        >>> two = ak.Array([["a", "b", "c"], [], ["d", "e"], ["f"]])
+
+    Zipping them together using a dict creates a collection of records with
+    the same nesting structure as `one` and `two`.
+
+        >>> ak.unsafe_zip({"x": one, "y": two}).show()
+        [[{x: 1.1, y: 'a'}, {x: 2.2, y: 'b'}, {x: 3.3, y: 'c'}],
+         [],
+         [{x: 4.4, y: 'd'}],
+         []]
+
+    Doing so with a list creates tuples, whose fields are not named.
+
+        >>> ak.zip([one, two]).show()
+        [[(1.1, 'a'), (2.2, 'b'), (3.3, 'c')],
+         [],
+         [(4.4, 'd')],
+         []]
+
+    See also #ak.zip and #ak.unzip.
+    """
+    # Dispatch
+    if isinstance(arrays, Mapping):
+        yield arrays.values()
+    else:
+        yield arrays
+
+    # Implementation
+    return _impl(
+        arrays,
+        parameters,
+        with_name,
+        highlevel,
+        behavior,
+        attrs,
+    )
+
+
+def _impl(
+    arrays,
+    parameters,
+    with_name,
+    highlevel,
+    behavior,
+    attrs,
+):
+    with HighLevelContext(behavior=behavior, attrs=attrs) as ctx:
+        if isinstance(arrays, Mapping):
+            layouts = ensure_same_backend(
+                *(
+                    ctx.unwrap(
+                        x,
+                        allow_record=False,
+                        allow_unknown=False,
+                        none_policy="pass-through",
+                        primitive_policy="pass-through",
+                    )
+                    for x in arrays.values()
+                )
+            )
+            fields = list(arrays.keys())
+
+            # propagate named axis from input to output,
+            #   use strategy "unify" (see: awkward._namedaxis)
+            out_named_axis = reduce(
+                _unify_named_axis, map(_get_named_axis, arrays.values())
+            )
+
+        else:
+            layouts = ensure_same_backend(
+                *(
+                    ctx.unwrap(
+                        x,
+                        allow_record=False,
+                        allow_unknown=False,
+                        none_policy="pass-through",
+                        primitive_policy="pass-through",
+                    )
+                    for x in arrays
+                )
+            )
+            fields = None
+
+            # propagate named axis from input to output,
+            #   use strategy "unify" (see: awkward._namedaxis)
+            out_named_axis = reduce(_unify_named_axis, map(_get_named_axis, arrays))
+
+    # determine backend
+    backend = next((b.backend for b in layouts if hasattr(b, "backend")), "cpu")
+
+    if with_name is not None:
+        if parameters is None:
+            parameters = {}
+        else:
+            parameters = dict(parameters)
+        parameters["__record__"] = with_name
+
+    # only allow all NumpyArrays and ListOffsetArrays
+    # maybe this could be done recursively, but for now just check the top level. This is also how ak.zip works.
+    if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts):
+        length = layouts[0].length
+        out = ak.contents.RecordArray(
+            layouts, fields, length=length, parameters=parameters, backend=backend
+        )
+    elif all(isinstance(layout, ak.contents.ListOffsetArray) for layout in layouts):
+        contents = []
+        for layout in layouts:
+            if not isinstance(layout.content, ak.contents.NumpyArray):
+                raise ValueError(
+                    "can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents"
+                )
+            contents.append(layout.content)
+        # just get from the first one
+        offsets = layouts[0].offsets
+        length = layouts[0].length
+        out = ak.contents.ListOffsetArray(
+            offsets=offsets,
+            content=ak.contents.RecordArray(
+                contents, fields, length=length, parameters=parameters, backend=backend
+            ),
+        )
+    else:
+        raise ValueError(
+            "all array layouts must be either NumpyArrays or ListOffsetArrays"
+        )
+
+    # Unify named axes propagated through the broadcast
+    wrapped_out = ctx.wrap(out, highlevel=highlevel)
+    return ak.operations.ak_with_named_axis._impl(
+        wrapped_out,
+        named_axis=out_named_axis,
+        highlevel=highlevel,
+        behavior=ctx.behavior,
+        attrs=ctx.attrs,
+    )

From eb4d4f80250a7e5edc37825fa95f1bbcbc6f9ae3 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Tue, 28 Jan 2025 17:03:28 -0500
Subject: [PATCH 2/6] fix getting correct length

---
 src/awkward/operations/ak_unsafe_zip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/operations/ak_unsafe_zip.py b/src/awkward/operations/ak_unsafe_zip.py
index 3094e85e92..78529300c2 100644
--- a/src/awkward/operations/ak_unsafe_zip.py
+++ b/src/awkward/operations/ak_unsafe_zip.py
@@ -169,7 +169,7 @@ def _impl(
             contents.append(layout.content)
         # just get from the first one
         offsets = layouts[0].offsets
-        length = layouts[0].length
+        length = layouts[0].content.length
         out = ak.contents.ListOffsetArray(
             offsets=offsets,
             content=ak.contents.RecordArray(

From f4f1e0d47fa009a5b8ca5090824cdbc03d738c44 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Tue, 28 Jan 2025 17:04:13 -0500
Subject: [PATCH 3/6] add tests

---
 tests/test_3390_ak_unsafe_zip.py | 58 ++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 tests/test_3390_ak_unsafe_zip.py

diff --git a/tests/test_3390_ak_unsafe_zip.py b/tests/test_3390_ak_unsafe_zip.py
new file mode 100644
index 0000000000..c98faf854d
--- /dev/null
+++ b/tests/test_3390_ak_unsafe_zip.py
@@ -0,0 +1,58 @@
+# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE
+# ruff: noqa: E402
+
+from __future__ import annotations
+
+import awkward as ak
+
+
+def test_ak_unsafe_zip_NumpyArray_dict():
+    a = ak.Array([1])
+    b = ak.Array([2])
+    c = ak.unsafe_zip({"a": a, "b": b})
+    assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
+
+
+def test_ak_unsafe_zip_ListOffsetArray_dict():
+    a = ak.Array([[1], []])
+    b = ak.Array([[2], []])
+    c = ak.unsafe_zip({"a": a, "b": b})
+    assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
+
+
+def test_ak_unsafe_zip_NumpyArray_list():
+    a = ak.Array([1])
+    b = ak.Array([2])
+    c = ak.unsafe_zip([a, b])
+    assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
+
+
+def test_ak_unsafe_zip_ListOffsetArray_list():
+    a = ak.Array([[1], []])
+    b = ak.Array([[2], []])
+    c = ak.unsafe_zip([a, b])
+    assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
+
+
+def test_typetracer_NumpyArray_non_touching():
+    tracer = ak.Array([1], backend="typetracer")
+
+    tracer, report = ak.typetracer.typetracer_with_report(
+        tracer.layout.form_with_key(), highlevel=True
+    )
+
+    _ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
+    assert len(report.shape_touched) == 1
+    assert len(report.data_touched) == 0
+
+
+def test_typetracer_ListOffsetArray_non_touching():
+    tracer = ak.Array([[1], [], [2, 3]], backend="typetracer")
+
+    tracer, report = ak.typetracer.typetracer_with_report(
+        tracer.layout.form_with_key(), highlevel=True
+    )
+
+    _ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
+    assert len(report.shape_touched) == 1
+    assert len(report.data_touched) == 0

From 406de57acc236d574ed01b6648c51a030986346d Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Tue, 28 Jan 2025 17:37:38 -0500
Subject: [PATCH 4/6] check same lengths (more safe)

---
 src/awkward/operations/ak_unsafe_zip.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/awkward/operations/ak_unsafe_zip.py b/src/awkward/operations/ak_unsafe_zip.py
index 78529300c2..1395490e6a 100644
--- a/src/awkward/operations/ak_unsafe_zip.py
+++ b/src/awkward/operations/ak_unsafe_zip.py
@@ -155,7 +155,7 @@ def _impl(
     # only allow all NumpyArrays and ListOffsetArrays
     # maybe this could be done recursively, but for now just check the top level. This is also how ak.zip works.
     if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts):
-        length = layouts[0].length
+        length = _check_equal_lengths(layouts)
         out = ak.contents.RecordArray(
             layouts, fields, length=length, parameters=parameters, backend=backend
         )
@@ -169,7 +169,7 @@ def _impl(
             contents.append(layout.content)
         # just get from the first one
         offsets = layouts[0].offsets
-        length = layouts[0].content.length
+        length = _check_equal_lengths([layout.content for layout in layouts])
         out = ak.contents.ListOffsetArray(
             offsets=offsets,
             content=ak.contents.RecordArray(
@@ -190,3 +190,13 @@ def _impl(
         behavior=ctx.behavior,
         attrs=ctx.attrs,
     )
+
+
+def _check_equal_lengths(
+    layouts: ak.contents.Content,
+) -> int | ak._nplikes.shape.UnknownLength:
+    length = layouts[0].length
+    for layout in layouts:
+        if layout.length != length:
+            raise ValueError("all arrays must have the same length")
+    return length

From a6d0f8074e04fa7c92ce1b3c38233d72c6de7dee Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Wed, 29 Jan 2025 13:50:30 -0500
Subject: [PATCH 5/6] unsafe_zip -> zip_no_broadcast; check equal offsets at
 runtime with actual data

---
 src/awkward/operations/__init__.py            |  2 +-
 ...k_unsafe_zip.py => ak_zip_no_broadcast.py} | 42 ++++++++++++++-----
 ...ip.py => test_3390_ak_zip_no_broadcast.py} | 20 ++++-----
 3 files changed, 42 insertions(+), 22 deletions(-)
 rename src/awkward/operations/{ak_unsafe_zip.py => ak_zip_no_broadcast.py} (81%)
 rename tests/{test_3390_ak_unsafe_zip.py => test_3390_ak_zip_no_broadcast.py} (71%)

diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py
index 49964b4066..306a49bb48 100644
--- a/src/awkward/operations/__init__.py
+++ b/src/awkward/operations/__init__.py
@@ -109,7 +109,6 @@
 from awkward.operations.ak_transform import *
 from awkward.operations.ak_type import *
 from awkward.operations.ak_unflatten import *
-from awkward.operations.ak_unsafe_zip import *
 from awkward.operations.ak_unzip import *
 from awkward.operations.ak_validity_error import *
 from awkward.operations.ak_values_astype import *
@@ -124,3 +123,4 @@
 from awkward.operations.ak_without_parameters import *
 from awkward.operations.ak_zeros_like import *
 from awkward.operations.ak_zip import *
+from awkward.operations.ak_zip_no_broadcast import *
diff --git a/src/awkward/operations/ak_unsafe_zip.py b/src/awkward/operations/ak_zip_no_broadcast.py
similarity index 81%
rename from src/awkward/operations/ak_unsafe_zip.py
rename to src/awkward/operations/ak_zip_no_broadcast.py
index 1395490e6a..729157a23c 100644
--- a/src/awkward/operations/ak_unsafe_zip.py
+++ b/src/awkward/operations/ak_zip_no_broadcast.py
@@ -11,13 +11,13 @@
 from awkward._namedaxis import _get_named_axis, _unify_named_axis
 from awkward._nplikes.numpy_like import NumpyMetadata
 
-__all__ = ("unsafe_zip",)
+__all__ = ("zip_no_broadcast",)
 
 np = NumpyMetadata.instance()
 
 
 @high_level_function()
-def unsafe_zip(
+def zip_no_broadcast(
     arrays,
     *,
     parameters=None,
@@ -46,7 +46,7 @@ def unsafe_zip(
     of records or the slots of a collection of tuples.
 
     Caution: unlike #ak.zip this function will _not_ broadcast the arrays together.
-    It assumes that the given arrays have already the same layouts and lengths.
+    During typetracing, it assumes that the given arrays have already the same layouts and lengths.
 
     This operation may be thought of as the opposite of projection in
     #ak.Array.__getitem__, which extracts fields one at a time, or
@@ -60,7 +60,7 @@ def unsafe_zip(
     Zipping them together using a dict creates a collection of records with
     the same nesting structure as `one` and `two`.
 
-        >>> ak.unsafe_zip({"x": one, "y": two}).show()
+        >>> ak.zip_no_broadcast({"x": one, "y": two}).show()
         [[{x: 1.1, y: 'a'}, {x: 2.2, y: 'b'}, {x: 3.3, y: 'c'}],
          [],
          [{x: 4.4, y: 'd'}],
@@ -153,7 +153,6 @@ def _impl(
         parameters["__record__"] = with_name
 
     # only allow all NumpyArrays and ListOffsetArrays
-    # maybe this could be done recursively, but for now just check the top level. This is also how ak.zip works.
     if all(isinstance(layout, ak.contents.NumpyArray) for layout in layouts):
         length = _check_equal_lengths(layouts)
         out = ak.contents.RecordArray(
@@ -162,14 +161,35 @@ def _impl(
     elif all(isinstance(layout, ak.contents.ListOffsetArray) for layout in layouts):
         contents = []
         for layout in layouts:
+            # get the content of the ListOffsetArray
             if not isinstance(layout.content, ak.contents.NumpyArray):
                 raise ValueError(
                     "can not (unsafe) zip ListOffsetArrays with non-NumpyArray contents"
                 )
             contents.append(layout.content)
-        # just get from the first one
-        offsets = layouts[0].offsets
-        length = _check_equal_lengths([layout.content for layout in layouts])
+
+        if backend.name == "typetracer":
+            # just get from the first one
+            # we're in typetracer mode, so we can't check the offsets (see else branch)
+            offsets = layouts[0].offsets
+        else:
+            # this is at 'runtime' with actual data, that means we can check the offsets,
+            # but only those that have actual data, i.e. no PlaceholderArrays
+            # so first, let's filter out any PlaceholderArrays
+            comparable_offsets = filter(
+                lambda o: not isinstance(o, ak._nplikes.placeholder.PlaceholderArray),
+                (layout.offsets for layout in layouts),
+            )
+            # check that offsets are the same
+            first = next(comparable_offsets)
+            if not all(
+                first.nplike.all(offsets.data == first.data)
+                for offsets in comparable_offsets
+            ):
+                raise ValueError("all ListOffsetArrays must have the same offsets")
+            offsets = first
+
+        length = _check_equal_lengths(contents)
         out = ak.contents.ListOffsetArray(
             offsets=offsets,
             content=ak.contents.RecordArray(
@@ -193,10 +213,10 @@ def _impl(
 
 
 def _check_equal_lengths(
-    layouts: ak.contents.Content,
+    contents: ak.contents.Content,
 ) -> int | ak._nplikes.shape.UnknownLength:
-    length = layouts[0].length
-    for layout in layouts:
+    length = contents[0].length
+    for layout in contents:
         if layout.length != length:
             raise ValueError("all arrays must have the same length")
     return length
diff --git a/tests/test_3390_ak_unsafe_zip.py b/tests/test_3390_ak_zip_no_broadcast.py
similarity index 71%
rename from tests/test_3390_ak_unsafe_zip.py
rename to tests/test_3390_ak_zip_no_broadcast.py
index c98faf854d..fe327ce4ec 100644
--- a/tests/test_3390_ak_unsafe_zip.py
+++ b/tests/test_3390_ak_zip_no_broadcast.py
@@ -6,31 +6,31 @@
 import awkward as ak
 
 
-def test_ak_unsafe_zip_NumpyArray_dict():
+def test_ak_zip_no_broadcast_NumpyArray_dict():
     a = ak.Array([1])
     b = ak.Array([2])
-    c = ak.unsafe_zip({"a": a, "b": b})
+    c = ak.zip_no_broadcast({"a": a, "b": b})
     assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
 
 
-def test_ak_unsafe_zip_ListOffsetArray_dict():
+def test_ak_zip_no_broadcast_ListOffsetArray_dict():
     a = ak.Array([[1], []])
     b = ak.Array([[2], []])
-    c = ak.unsafe_zip({"a": a, "b": b})
+    c = ak.zip_no_broadcast({"a": a, "b": b})
     assert ak.to_list(c) == ak.to_list(ak.zip({"a": a, "b": b}))
 
 
-def test_ak_unsafe_zip_NumpyArray_list():
+def test_ak_zip_no_broadcast_NumpyArray_list():
     a = ak.Array([1])
     b = ak.Array([2])
-    c = ak.unsafe_zip([a, b])
+    c = ak.zip_no_broadcast([a, b])
     assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
 
 
-def test_ak_unsafe_zip_ListOffsetArray_list():
+def test_ak_zip_no_broadcast_ListOffsetArray_list():
     a = ak.Array([[1], []])
     b = ak.Array([[2], []])
-    c = ak.unsafe_zip([a, b])
+    c = ak.zip_no_broadcast([a, b])
     assert ak.to_list(c) == ak.to_list(ak.zip([a, b]))
 
 
@@ -41,7 +41,7 @@ def test_typetracer_NumpyArray_non_touching():
         tracer.layout.form_with_key(), highlevel=True
     )
 
-    _ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
+    _ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer})
     assert len(report.shape_touched) == 1
     assert len(report.data_touched) == 0
 
@@ -53,6 +53,6 @@ def test_typetracer_ListOffsetArray_non_touching():
         tracer.layout.form_with_key(), highlevel=True
     )
 
-    _ = ak.unsafe_zip({"foo": tracer, "bar": tracer})
+    _ = ak.zip_no_broadcast({"foo": tracer, "bar": tracer})
     assert len(report.shape_touched) == 1
     assert len(report.data_touched) == 0

From 143214b20fd42225ac392dd5d8081132f14abde5 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Wed, 29 Jan 2025 13:53:37 -0500
Subject: [PATCH 6/6] fix typo in doc string

---
 src/awkward/operations/ak_zip_no_broadcast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/awkward/operations/ak_zip_no_broadcast.py b/src/awkward/operations/ak_zip_no_broadcast.py
index 729157a23c..87e7ccb1b2 100644
--- a/src/awkward/operations/ak_zip_no_broadcast.py
+++ b/src/awkward/operations/ak_zip_no_broadcast.py
@@ -68,7 +68,7 @@ def zip_no_broadcast(
 
     Doing so with a list creates tuples, whose fields are not named.
 
-        >>> ak.zip([one, two]).show()
+        >>> ak.zip_no_broadcast([one, two]).show()
         [[(1.1, 'a'), (2.2, 'b'), (3.3, 'c')],
          [],
          [(4.4, 'd')],