Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,7 @@ Conversion
- Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`)
- Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
- Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`)
- Bug in :meth:`ArrowDtype._get_common_dtype` and :meth:`ExtentionDtype._get_common_dtype` when using ``date32[pyarrow]`` and ``date64[pyarrow]`` types (:issue:`62343`)

Strings
^^^^^^^
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,11 @@ def pandas_dtype(dtype) -> DtypeObj:
result = result()
return result

# try a pyarrow dtype
from pandas.core.dtypes.dtypes import ArrowDtype
if isinstance(dtype, ArrowDtype):
return ArrowDtype(dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we ever get here? Seems like these cases would be caught above

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, removed this


# try a numpy dtype
# raise a consistent TypeError if failed
try:
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2277,6 +2277,12 @@ def name(self) -> str: # type: ignore[override]
@cache_readonly
def numpy_dtype(self) -> np.dtype:
"""Return an instance of the related numpy dtype"""
if pa.types.is_date32(self.pyarrow_dtype) or pa.types.is_date64(
self.pyarrow_dtype
):
# date32 and date64 are pyarrow timestamps but do not have a
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick they are not "pyarrow timestamps"

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed comment and also replaced code

# corresponding numpy dtype.
return np.dtype(object)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np.datetime64[D] might be more reasonable?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replaced date32 with np.datetime64[D], good catch!

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel
Should date64 also be cast to np.datetime64[D], or should it be kept as a timestamp[ms]?

if pa.types.is_timestamp(self.pyarrow_dtype):
# pa.timestamp(unit).to_pandas_dtype() returns ns units
# regardless of the pyarrow timestamp units.
Expand Down Expand Up @@ -2453,6 +2459,18 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:

null_dtype = type(self)(pa.null())

# Cover cases where numpy does not have a corresponding dtype, but
# only one non-null dtype is received, or all dtypes are null.
single_dtype = {
dtype
for dtype in dtypes
if dtype != null_dtype
}
if len(single_dtype) == 0:
return null_dtype
if len(single_dtype) == 1:
return single_dtype.pop()

new_dtype = find_common_type(
[
dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/dtypes/cast/test_find_common_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,19 @@ def test_interval_dtype_with_categorical(dtype):

result = find_common_type([dtype, cat.dtype])
assert result == dtype


@pytest.mark.parametrize(
"dtypes,expected",
[
(
["date32[pyarrow]", "null[pyarrow]"],
"date32[day][pyarrow]",
),
],
)
def test_pyarrow_dtypes(dtypes, expected):
"""Test finding common types with pyarrow dtypes not in numpy."""
source_dtypes = [pandas_dtype(dtype) for dtype in dtypes]
result = find_common_type(source_dtypes)
assert result == pandas_dtype(expected)
Loading