Skip to content

Commit

Permalink
fix: Drop timezone in from_arrow (#3392)
Browse files Browse the repository at this point in the history
* Drop timezone in from_arrow

* Add doc and test
  • Loading branch information
martindurant authored Jan 31, 2025
1 parent 3d14f2e commit 7cbca98
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/awkward/_connect/pyarrow/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ def popbuffers(paarray, awkwardarrow_type, storage_type, buffers, generate_bitma
if to64:
data = numpy.astype(numpy.frombuffer(data, dtype=np.int32), dtype=np.int64)
if dt is None:
if getattr(storage_type, "tz", None) is not None:
storage_type = pyarrow.lib.timestamp(storage_type.unit)
dt = storage_type.to_pandas_dtype()

out = ak.contents.NumpyArray(
Expand Down Expand Up @@ -670,6 +672,8 @@ def form_popbuffers(awkwardarrow_type, storage_type):
elif isinstance(storage_type, pyarrow.lib.DataType):
_, dt = _pyarrow_to_numpy_dtype.get(str(storage_type), (False, None))
if dt is None:
if getattr(storage_type, "tz", None) is not None:
storage_type = pyarrow.lib.timestamp(storage_type.unit)
dt = np.dtype(storage_type.to_pandas_dtype())

out = ak.forms.NumpyForm(
Expand Down
4 changes: 4 additions & 0 deletions src/awkward/operations/ak_from_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def from_arrow(
low-level #ak.forms.Form), even through Parquet, making Parquet a good way to save
Awkward Arrays for later use.
Because awkward uses numpy's dtype system, timestamp types do not have timezones.
If encountering timestamp types with timezones in the input arrow data, they
will be silently dropped.
See also #ak.to_arrow, #ak.to_arrow_table, #ak.from_parquet, #ak.from_arrow_schema.
"""
return _impl(array, generate_bitmasks, highlevel, behavior, attrs)
Expand Down
4 changes: 4 additions & 0 deletions src/awkward/operations/ak_from_arrow_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ def from_arrow_schema(schema):
Converts an Apache Arrow schema into an Awkward Form.
Because awkward uses numpy's dtype system, timestamp types do not have timezones.
If encountering timestamp types with timezones in the input arrow data, they
will be silently dropped.
See also #ak.to_arrow, #ak.to_arrow_table, #ak.from_arrow, #ak.to_parquet, #ak.from_parquet.
"""
return _impl(schema)
Expand Down
25 changes: 25 additions & 0 deletions tests/test_3392_from_arrow_tz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from __future__ import annotations

import pytest

import awkward as ak

pa = pytest.importorskip("pyarrow")


def test_tz_is_dropped():
data = pa.Table.from_arrays(
[
pa.array(
[
1,
2,
3,
],
type=pa.timestamp("ns", tz="UTC"),
)
],
names=["a"],
)
ak.from_arrow(data)
ak.from_arrow_schema(data.schema)

0 comments on commit 7cbca98

Please sign in to comment.