diff --git a/src/awkward/_connect/pyarrow/conversions.py b/src/awkward/_connect/pyarrow/conversions.py index ac3f8ca990..60c4be19ea 100644 --- a/src/awkward/_connect/pyarrow/conversions.py +++ b/src/awkward/_connect/pyarrow/conversions.py @@ -471,6 +471,8 @@ def popbuffers(paarray, awkwardarrow_type, storage_type, buffers, generate_bitma if to64: data = numpy.astype(numpy.frombuffer(data, dtype=np.int32), dtype=np.int64) if dt is None: + if getattr(storage_type, "tz", None) is not None: + storage_type = pyarrow.lib.timestamp(storage_type.unit) dt = storage_type.to_pandas_dtype() out = ak.contents.NumpyArray( @@ -670,6 +672,8 @@ def form_popbuffers(awkwardarrow_type, storage_type): elif isinstance(storage_type, pyarrow.lib.DataType): _, dt = _pyarrow_to_numpy_dtype.get(str(storage_type), (False, None)) if dt is None: + if getattr(storage_type, "tz", None) is not None: + storage_type = pyarrow.lib.timestamp(storage_type.unit) dt = np.dtype(storage_type.to_pandas_dtype()) out = ak.forms.NumpyForm( diff --git a/src/awkward/operations/ak_from_arrow.py b/src/awkward/operations/ak_from_arrow.py index 460f09f5e4..4e15638f79 100644 --- a/src/awkward/operations/ak_from_arrow.py +++ b/src/awkward/operations/ak_from_arrow.py @@ -40,6 +40,10 @@ def from_arrow( low-level #ak.forms.Form), even through Parquet, making Parquet a good way to save Awkward Arrays for later use. + Because awkward uses numpy's dtype system, timestamp types do not have timezones. + If encountering timestamp types with timezones in the input arrow data, they + will be silently dropped. + See also #ak.to_arrow, #ak.to_arrow_table, #ak.from_parquet, #ak.from_arrow_schema. """ return _impl(array, generate_bitmasks, highlevel, behavior, attrs) diff --git a/src/awkward/operations/ak_from_arrow_schema.py b/src/awkward/operations/ak_from_arrow_schema.py index 29fa073b50..88b5a9133a 100644 --- a/src/awkward/operations/ak_from_arrow_schema.py +++ b/src/awkward/operations/ak_from_arrow_schema.py @@ -18,6 +18,10 @@ def from_arrow_schema(schema): Converts an Apache Arrow schema into an Awkward Form. + Because awkward uses numpy's dtype system, timestamp types do not have timezones. + If encountering timestamp types with timezones in the input arrow data, they + will be silently dropped. + See also #ak.to_arrow, #ak.to_arrow_table, #ak.from_arrow, #ak.to_parquet, #ak.from_parquet. """ return _impl(schema) diff --git a/tests/test_3392_from_arrow_tz.py b/tests/test_3392_from_arrow_tz.py new file mode 100644 index 0000000000..9c2b11739d --- /dev/null +++ b/tests/test_3392_from_arrow_tz.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import pytest + +import awkward as ak + +pa = pytest.importorskip("pyarrow") + + +def test_tz_is_dropped(): + data = pa.Table.from_arrays( + [ + pa.array( + [ + 1, + 2, + 3, + ], + type=pa.timestamp("ns", tz="UTC"), + ) + ], + names=["a"], + ) + ak.from_arrow(data) + ak.from_arrow_schema(data.schema)