diff --git a/LICENSE b/LICENSE index a0cc369f725b8..d4e49a140f1cb 100644 --- a/LICENSE +++ b/LICENSE @@ -3,7 +3,7 @@ BSD 3-Clause License Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team All rights reserved. -Copyright (c) 2011-2021, Open source contributors. +Copyright (c) 2011-2022, Open source contributors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0dfe3345b38e6..0ad8273e0767a 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -378,7 +378,11 @@ def makePeriodIndex(k: int = 10, name=None, **kwargs) -> PeriodIndex: def makeMultiIndex(k=10, names=None, **kwargs): - return MultiIndex.from_product((("foo", "bar"), (1, 2)), names=names, **kwargs) + N = (k // 2) + 1 + rng = range(N) + mi = MultiIndex.from_product([("foo", "bar"), rng], names=names, **kwargs) + assert len(mi) >= k # GH#38795 + return mi[:k] _names = [ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d51f878a1c85a..27b4539488e40 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -200,7 +200,7 @@ def maybe_box_native(value: Scalar) -> Scalar: return value -def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: +def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: """ Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting into a numpy array. Failing to unbox would risk dropping nanoseconds. @@ -851,7 +851,7 @@ def infer_dtype_from_array( return arr.dtype, arr -def maybe_infer_dtype_type(element): +def _maybe_infer_dtype_type(element): """ Try to infer an object's dtype, for use in arithmetic ops. @@ -873,7 +873,7 @@ def maybe_infer_dtype_type(element): -------- >>> from collections import namedtuple >>> Foo = namedtuple("Foo", "dtype") - >>> maybe_infer_dtype_type(Foo(np.dtype("i8"))) + >>> _maybe_infer_dtype_type(Foo(np.dtype("i8"))) dtype('int64') """ tipo = None @@ -1495,7 +1495,7 @@ def construct_2d_arraylike_from_scalar( shape = (length, width) if dtype.kind in ["m", "M"]: - value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) + value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) elif dtype == _dtype_obj: if isinstance(value, (np.timedelta64, np.datetime64)): # calling np.array below would cast to pytimedelta/pydatetime @@ -1558,7 +1558,7 @@ def construct_1d_arraylike_from_scalar( if not isna(value): value = ensure_str(value) elif dtype.kind in ["M", "m"]: - value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) + value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) @@ -1566,9 +1566,9 @@ def construct_1d_arraylike_from_scalar( return subarr -def maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): +def _maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): """ - Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp + Wrap _maybe_unbox_datetimelike with a check for a timezone-aware Timestamp along with a timezone-naive datetime64 dtype, which is deprecated. """ # Caller is responsible for checking dtype.kind in ["m", "M"] @@ -1578,7 +1578,7 @@ def maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): value = maybe_box_datetimelike(value, dtype) try: - value = maybe_unbox_datetimelike(value, dtype) + value = _maybe_unbox_datetimelike(value, dtype) except TypeError: if ( isinstance(value, Timestamp) @@ -1598,7 +1598,7 @@ def maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj): stacklevel=find_stack_level(), ) new_value = value.tz_localize(None) - return maybe_unbox_datetimelike(new_value, dtype) + return _maybe_unbox_datetimelike(new_value, dtype) else: raise return value @@ -1748,7 +1748,7 @@ def convert_scalar_for_putitemlike(scalar: Scalar, dtype: np.dtype) -> Scalar: """ if dtype.kind in ["m", "M"]: scalar = maybe_box_datetimelike(scalar, dtype) - return maybe_unbox_datetimelike(scalar, dtype) + return _maybe_unbox_datetimelike(scalar, dtype) else: _validate_numeric_casting(dtype, scalar) return scalar @@ -1849,11 +1849,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if dtype == _dtype_obj: return element - tipo = maybe_infer_dtype_type(element) + tipo = _maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: - info = np.iinfo(dtype) - if isinstance(element, range): if _dtype_can_hold_range(element, dtype): return element @@ -1863,6 +1861,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # e.g. test_setitem_series_int8 if we have a python int 1 # tipo may be np.int32, despite the fact that it will fit # in smaller int dtypes. + info = np.iinfo(dtype) if info.min <= element <= info.max: return element raise ValueError @@ -1964,7 +1963,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: """ - maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), but in many cases a range can be held by a smaller integer dtype. Check if this is one of those cases. """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0ced984bbc568..ec3b9261dd1f5 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -352,7 +352,7 @@ def getitem_block_columns( return type(self)(new_values, new_mgr_locs, self.ndim) - # NB: this cannot be made cache_readonly because in libreduction we pin + # NB: this cannot be made cache_readonly because in mgr.set_values we pin # new .values that can have different shape GH#42631 @property def shape(self) -> Shape: diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 400846cc4ca1d..1162748370f3f 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -82,7 +82,7 @@ def test_where_raises(self, other): ser.where([True, False, True], other=other) def test_shift(self): - # https://github.com/pandas-dev/pandas/issues/31495 + # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 a = IntervalArray.from_breaks([1, 2, 3]) result = a.shift() # int -> float @@ -90,6 +90,7 @@ def test_shift(self): tm.assert_interval_array_equal(result, expected) def test_shift_datetime(self): + # GH#31502, GH#31504 a = IntervalArray.from_breaks(date_range("2000", periods=4)) result = a.shift(2) expected = a.take([-1, -1, 0], allow_fill=True) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c96e2fb49e397..f5a18037d97eb 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -382,6 +382,7 @@ def test_shift_empty_array(self, data, periods): self.assert_extension_array_equal(result, expected) def test_shift_zero_copies(self, data): + # GH#31502 result = data.shift(0) assert result is not data diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 2ee777cf53d29..90b348f401437 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -758,7 +758,7 @@ def test_where_try_cast_deprecated(frame_or_series): "the deprecated path, and also up-cast to int64 instead of int32 " "(for now)." ) -def test_where_int_downcasting_deprecated(using_array_manager, request): +def test_where_int_downcasting_deprecated(using_array_manager): # GH#44597 arr = np.arange(6).astype(np.int16).reshape(3, 2) df = DataFrame(arr) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index b2ea989f35e8c..7705ec9050aed 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -489,36 +489,20 @@ def test_where_series_bool(self, fill_val, exp_dtype): ], ids=["datetime64", "datetime64tz"], ) - def test_where_series_datetime64(self, fill_val, exp_dtype): - klass = pd.Series - - obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None)) - assert obj.dtype == "datetime64[ns]" - self._run_test(obj, fill_val, klass, exp_dtype) + def test_where_datetime64(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series - @pytest.mark.parametrize( - "fill_val", - [ - pd.Timestamp("2012-01-01"), - pd.Timestamp("2012-01-01").to_datetime64(), - pd.Timestamp("2012-01-01").to_pydatetime(), - ], - ) - def test_where_index_datetime(self, fill_val): - exp_dtype = "datetime64[ns]" - klass = pd.Index obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None)) assert obj.dtype == "datetime64[ns]" - self._run_test(obj, fill_val, klass, exp_dtype) - - def test_where_index_datetime64tz(self): - klass = pd.Index - fill_val = pd.Timestamp("2012-01-01", tz="US/Eastern") - exp_dtype = object - obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None)) - assert obj.dtype == "datetime64[ns]" - self._run_test(obj, fill_val, klass, exp_dtype) + fv = fill_val + # do the check with each of the available datetime scalars + if exp_dtype == "datetime64[ns]": + for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]: + self._run_test(obj, scalar, klass, exp_dtype) + else: + for scalar in [fv, fv.to_pydatetime()]: + self._run_test(obj, fill_val, klass, exp_dtype) @pytest.mark.xfail(reason="Test not implemented") def test_where_index_complex128(self): @@ -640,11 +624,16 @@ def test_fillna_float64(self, index_or_series, fill_val, fill_dtype): (True, object), ], ) - def test_fillna_series_complex128(self, fill_val, fill_dtype): - obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) + def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype, request): + klass = index_or_series + if klass is pd.Index: + mark = pytest.mark.xfail(reason="No Index[complex]") + request.node.add_marker(mark) + + obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128) assert obj.dtype == np.complex128 - exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) + exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j], dtype=fill_dtype) self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @pytest.mark.parametrize( diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 3671ddee60b6f..b82fa1b7f23c1 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -321,7 +321,7 @@ def test_frozenset_index(): assert s[idx1] == 3 -class TestDepreactedIndexers: +class TestDeprecatedIndexers: @pytest.mark.parametrize("key", [{1}, {1: 1}]) def test_getitem_dict_and_set_deprecated(self, key): # GH#42825 diff --git a/pandas/tests/series/methods/test_drop.py b/pandas/tests/series/methods/test_drop.py index a566f8f62d72e..59a60019bb1c1 100644 --- a/pandas/tests/series/methods/test_drop.py +++ b/pandas/tests/series/methods/test_drop.py @@ -21,8 +21,8 @@ def test_drop_unique_and_non_unique_index( data, index, axis, drop_labels, expected_data, expected_index ): - s = Series(data=data, index=index) - result = s.drop(drop_labels, axis=axis) + ser = Series(data=data, index=index) + result = ser.drop(drop_labels, axis=axis) expected = Series(data=expected_data, index=expected_index) tm.assert_series_equal(result, expected) @@ -45,17 +45,17 @@ def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error def test_drop_with_ignore_errors(): # errors='ignore' - s = Series(range(3), index=list("abc")) - result = s.drop("bc", errors="ignore") - tm.assert_series_equal(result, s) - result = s.drop(["a", "d"], errors="ignore") - expected = s.iloc[1:] + ser = Series(range(3), index=list("abc")) + result = ser.drop("bc", errors="ignore") + tm.assert_series_equal(result, ser) + result = ser.drop(["a", "d"], errors="ignore") + expected = ser.iloc[1:] tm.assert_series_equal(result, expected) # GH 8522 - s = Series([2, 3], index=[True, False]) - assert s.index.is_object() - result = s.drop(True) + ser = Series([2, 3], index=[True, False]) + assert ser.index.is_object() + result = ser.drop(True) expected = Series([3], index=[False]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/util/test_make_objects.py b/pandas/tests/util/test_make_objects.py new file mode 100644 index 0000000000000..6f5f2d3924e1f --- /dev/null +++ b/pandas/tests/util/test_make_objects.py @@ -0,0 +1,15 @@ +""" +Tests for tm.makeFoo functions. +""" + + +import numpy as np + +import pandas._testing as tm + + +def test_make_multiindex_respects_k(): + # GH#38795 respect 'k' arg + N = np.random.randint(0, 100) + mi = tm.makeMultiIndex(k=N) + assert len(mi) == N