Skip to content

Commit ff86177

Browse files
authored
ENH: Reflect changes from numpy namespace refactor Part 3 (pandas-dev#54579)
* ENH: Reflect changes from numpy namespace refactor part 3 * ENH: Move to fixtures for dtype access
1 parent b6333e6 commit ff86177

File tree

102 files changed

+280
-283
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+280
-283
lines changed

asv_bench/benchmarks/algos/isin.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def setup(self, series_type, vals_type):
247247
elif series_type == "long":
248248
ser_vals = np.arange(N_many)
249249
elif series_type == "long_floats":
250-
ser_vals = np.arange(N_many, dtype=np.float_)
250+
ser_vals = np.arange(N_many, dtype=np.float64)
251251

252252
self.series = Series(ser_vals).astype(object)
253253

@@ -258,7 +258,7 @@ def setup(self, series_type, vals_type):
258258
elif vals_type == "long":
259259
values = np.arange(N_many)
260260
elif vals_type == "long_floats":
261-
values = np.arange(N_many, dtype=np.float_)
261+
values = np.arange(N_many, dtype=np.float64)
262262

263263
self.values = values.astype(object)
264264

doc/source/getting_started/comparison/comparison_with_sql.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ methods.
107107
.. ipython:: python
108108
109109
frame = pd.DataFrame(
110-
{"col1": ["A", "B", np.NaN, "C", "D"], "col2": ["F", np.NaN, "G", "H", "I"]}
110+
{"col1": ["A", "B", np.nan, "C", "D"], "col2": ["F", np.nan, "G", "H", "I"]}
111111
)
112112
frame
113113

doc/source/user_guide/enhancingperf.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ can be improved by passing an ``np.ndarray``.
183183
...: return s * dx
184184
...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b,
185185
...: np.ndarray col_N):
186-
...: assert (col_a.dtype == np.float_
187-
...: and col_b.dtype == np.float_ and col_N.dtype == np.int_)
186+
...: assert (col_a.dtype == np.float64
187+
...: and col_b.dtype == np.float64 and col_N.dtype == np.int_)
188188
...: cdef Py_ssize_t i, n = len(col_N)
189189
...: assert (len(col_a) == len(col_b) == n)
190190
...: cdef np.ndarray[double] res = np.empty(n)

doc/source/user_guide/gotchas.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ present in the more domain-specific statistical programming language `R
327327
``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64``
328328
``numpy.object_`` | ``object_``
329329
``numpy.bool_`` | ``bool_``
330-
``numpy.character`` | ``string_, unicode_``
330+
``numpy.character`` | ``bytes_, str_``
331331

332332
The R language, by contrast, only has a handful of built-in data types:
333333
``integer``, ``numeric`` (floating-point), ``character``, and

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -4881,7 +4881,7 @@ unspecified columns of the given DataFrame. The argument ``selector``
48814881
defines which table is the selector table (which you can make queries from).
48824882
The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure
48834883
tables are synchronized. This means that if a row for one of the tables
4884-
being written to is entirely ``np.NaN``, that row will be dropped from all tables.
4884+
being written to is entirely ``np.nan``, that row will be dropped from all tables.
48854885

48864886
If ``dropna`` is False, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**.
48874887
Remember that entirely ``np.Nan`` rows are not written to the HDFStore, so if

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ You must pass in the ``line_terminator`` explicitly, even in this case.
556556
557557
.. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
558558

559-
Proper handling of ``np.NaN`` in a string data-typed column with the Python engine
559+
Proper handling of ``np.nan`` in a string data-typed column with the Python engine
560560
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
561561

562562
There was bug in :func:`read_excel` and :func:`read_csv` with the Python

pandas/_libs/algos.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ from pandas._libs.util cimport get_nat
5959

6060
cdef:
6161
float64_t FP_ERR = 1e-13
62-
float64_t NaN = <float64_t>np.NaN
62+
float64_t NaN = <float64_t>np.nan
6363
int64_t NPY_NAT = get_nat()
6464

6565

pandas/_libs/groupby.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ from pandas._libs.missing cimport checknull
5252

5353
cdef int64_t NPY_NAT = util.get_nat()
5454

55-
cdef float64_t NaN = <float64_t>np.NaN
55+
cdef float64_t NaN = <float64_t>np.nan
5656

5757
cdef enum InterpolationEnumType:
5858
INTERPOLATION_LINEAR,

pandas/_libs/lib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ cdef:
144144
object oINT64_MIN = <int64_t>INT64_MIN
145145
object oUINT64_MAX = <uint64_t>UINT64_MAX
146146

147-
float64_t NaN = <float64_t>np.NaN
147+
float64_t NaN = <float64_t>np.nan
148148

149149
# python-visible
150150
i8max = <int64_t>INT64_MAX

pandas/_libs/tslibs/util.pxd

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ cdef inline bint is_integer_object(object obj) noexcept nogil:
7575

7676
cdef inline bint is_float_object(object obj) noexcept nogil:
7777
"""
78-
Cython equivalent of `isinstance(val, (float, np.float_))`
78+
Cython equivalent of `isinstance(val, (float, np.float64))`
7979
8080
Parameters
8181
----------
@@ -91,7 +91,7 @@ cdef inline bint is_float_object(object obj) noexcept nogil:
9191

9292
cdef inline bint is_complex_object(object obj) noexcept nogil:
9393
"""
94-
Cython equivalent of `isinstance(val, (complex, np.complex_))`
94+
Cython equivalent of `isinstance(val, (complex, np.complex128))`
9595
9696
Parameters
9797
----------

pandas/_libs/window/aggregations.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ cdef:
5757
float32_t MAXfloat32 = np.inf
5858
float64_t MAXfloat64 = np.inf
5959

60-
float64_t NaN = <float64_t>np.NaN
60+
float64_t NaN = <float64_t>np.nan
6161

6262
cdef bint is_monotonic_increasing_start_end_bounds(
6363
ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end

pandas/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -777,7 +777,7 @@ def series_with_multilevel_index() -> Series:
777777
index = MultiIndex.from_tuples(tuples)
778778
data = np.random.default_rng(2).standard_normal(8)
779779
ser = Series(data, index=index)
780-
ser.iloc[3] = np.NaN
780+
ser.iloc[3] = np.nan
781781
return ser
782782

783783

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2109,7 +2109,7 @@ def _codes(self) -> np.ndarray:
21092109

21102110
def _box_func(self, i: int):
21112111
if i == -1:
2112-
return np.NaN
2112+
return np.nan
21132113
return self.categories[i]
21142114

21152115
def _unbox_scalar(self, key) -> int:

pandas/core/computation/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,8 @@ def __init__(self, lhs, rhs) -> None:
537537
)
538538

539539
# do not upcast float32s to float64 un-necessarily
540-
acceptable_dtypes = [np.float32, np.float_]
541-
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float_)
540+
acceptable_dtypes = [np.float32, np.float64]
541+
_cast_inplace(com.flatten(self), acceptable_dtypes, np.float64)
542542

543543

544544
UNARY_OPS_SYMS = ("+", "-", "~", "not")

pandas/core/dtypes/cast.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
850850
dtype = np.dtype(np.float64)
851851

852852
elif is_complex(val):
853-
dtype = np.dtype(np.complex_)
853+
dtype = np.dtype(np.complex128)
854854

855855
if lib.is_period(val):
856856
dtype = PeriodDtype(freq=val.freq)

pandas/core/dtypes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:
13511351
False
13521352
>>> is_complex_dtype(int)
13531353
False
1354-
>>> is_complex_dtype(np.complex_)
1354+
>>> is_complex_dtype(np.complex128)
13551355
True
13561356
>>> is_complex_dtype(np.array(['a', 'b']))
13571357
False

pandas/core/generic.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -5307,7 +5307,7 @@ def reindex(
53075307
level : int or name
53085308
Broadcast across a level, matching Index values on the
53095309
passed MultiIndex level.
5310-
fill_value : scalar, default np.NaN
5310+
fill_value : scalar, default np.nan
53115311
Value to use for missing values. Defaults to NaN, but can be any
53125312
"compatible" value.
53135313
limit : int, default None
@@ -7376,7 +7376,7 @@ def ffill(
73767376
2 3.0 4.0 NaN 1.0
73777377
3 3.0 3.0 NaN 4.0
73787378
7379-
>>> ser = pd.Series([1, np.NaN, 2, 3])
7379+
>>> ser = pd.Series([1, np.nan, 2, 3])
73807380
>>> ser.ffill()
73817381
0 1.0
73827382
1 1.0
@@ -8375,7 +8375,7 @@ def isna(self) -> Self:
83758375
--------
83768376
Show which entries in a DataFrame are NA.
83778377
8378-
>>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
8378+
>>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
83798379
... born=[pd.NaT, pd.Timestamp('1939-05-27'),
83808380
... pd.Timestamp('1940-04-25')],
83818381
... name=['Alfred', 'Batman', ''],
@@ -8394,7 +8394,7 @@ def isna(self) -> Self:
83948394
83958395
Show which entries in a Series are NA.
83968396
8397-
>>> ser = pd.Series([5, 6, np.NaN])
8397+
>>> ser = pd.Series([5, 6, np.nan])
83988398
>>> ser
83998399
0 5.0
84008400
1 6.0
@@ -8442,7 +8442,7 @@ def notna(self) -> Self:
84428442
--------
84438443
Show which entries in a DataFrame are not NA.
84448444
8445-
>>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
8445+
>>> df = pd.DataFrame(dict(age=[5, 6, np.nan],
84468446
... born=[pd.NaT, pd.Timestamp('1939-05-27'),
84478447
... pd.Timestamp('1940-04-25')],
84488448
... name=['Alfred', 'Batman', ''],
@@ -8461,7 +8461,7 @@ def notna(self) -> Self:
84618461
84628462
Show which entries in a Series are not NA.
84638463
8464-
>>> ser = pd.Series([5, 6, np.NaN])
8464+
>>> ser = pd.Series([5, 6, np.nan])
84658465
>>> ser
84668466
0 5.0
84678467
1 6.0
@@ -8628,7 +8628,7 @@ def clip(
86288628
86298629
Clips using specific lower threshold per column element, with missing values:
86308630
8631-
>>> t = pd.Series([2, -4, np.NaN, 6, 3])
8631+
>>> t = pd.Series([2, -4, np.nan, 6, 3])
86328632
>>> t
86338633
0 2.0
86348634
1 -4.0
@@ -9828,7 +9828,7 @@ def align(
98289828
copy : bool, default True
98299829
Always returns new objects. If copy=False and no reindexing is
98309830
required then original objects are returned.
9831-
fill_value : scalar, default np.NaN
9831+
fill_value : scalar, default np.nan
98329832
Value to use for missing values. Defaults to NaN, but can be any
98339833
"compatible" value.
98349834
method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None

pandas/core/groupby/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5418,7 +5418,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT:
54185418
def _reindex_output(
54195419
self,
54205420
output: OutputFrameOrSeries,
5421-
fill_value: Scalar = np.NaN,
5421+
fill_value: Scalar = np.nan,
54225422
qs: npt.NDArray[np.float64] | None = None,
54235423
) -> OutputFrameOrSeries:
54245424
"""
@@ -5436,7 +5436,7 @@ def _reindex_output(
54365436
----------
54375437
output : Series or DataFrame
54385438
Object resulting from grouping and applying an operation.
5439-
fill_value : scalar, default np.NaN
5439+
fill_value : scalar, default np.nan
54405440
Value to use for unobserved categories if self.observed is False.
54415441
qs : np.ndarray[float64] or None, default None
54425442
quantile values, only relevant for quantile.

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2837,7 +2837,7 @@ def isna(self) -> npt.NDArray[np.bool_]:
28372837
Show which entries in a pandas.Index are NA. The result is an
28382838
array.
28392839
2840-
>>> idx = pd.Index([5.2, 6.0, np.NaN])
2840+
>>> idx = pd.Index([5.2, 6.0, np.nan])
28412841
>>> idx
28422842
Index([5.2, 6.0, nan], dtype='float64')
28432843
>>> idx.isna()
@@ -2893,7 +2893,7 @@ def notna(self) -> npt.NDArray[np.bool_]:
28932893
Show which entries in an Index are not NA. The result is an
28942894
array.
28952895
2896-
>>> idx = pd.Index([5.2, 6.0, np.NaN])
2896+
>>> idx = pd.Index([5.2, 6.0, np.nan])
28972897
>>> idx
28982898
Index([5.2, 6.0, nan], dtype='float64')
28992899
>>> idx.notna()

pandas/core/indexes/interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _get_next_label(label):
125125
elif is_integer_dtype(dtype):
126126
return label + 1
127127
elif is_float_dtype(dtype):
128-
return np.nextafter(label, np.infty)
128+
return np.nextafter(label, np.inf)
129129
else:
130130
raise TypeError(f"cannot determine next label for type {repr(type(label))}")
131131

@@ -142,7 +142,7 @@ def _get_prev_label(label):
142142
elif is_integer_dtype(dtype):
143143
return label - 1
144144
elif is_float_dtype(dtype):
145-
return np.nextafter(label, -np.infty)
145+
return np.nextafter(label, -np.inf)
146146
else:
147147
raise TypeError(f"cannot determine next label for type {repr(type(label))}")
148148

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5586,7 +5586,7 @@ def dropna(
55865586
Empty strings are not considered NA values. ``None`` is considered an
55875587
NA value.
55885588
5589-
>>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
5589+
>>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay'])
55905590
>>> ser
55915591
0 NaN
55925592
1 2

pandas/core/strings/accessor.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,7 @@ def contains(
12151215
--------
12161216
Returning a Series of booleans using only a literal pattern.
12171217
1218-
>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN])
1218+
>>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan])
12191219
>>> s1.str.contains('og', regex=False)
12201220
0 False
12211221
1 True
@@ -1226,7 +1226,7 @@ def contains(
12261226
12271227
Returning an Index of booleans using only a literal pattern.
12281228
1229-
>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN])
1229+
>>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan])
12301230
>>> ind.str.contains('23', regex=False)
12311231
Index([False, False, False, True, nan], dtype='object')
12321232
@@ -3500,7 +3500,7 @@ def str_extractall(arr, pat, flags: int = 0) -> DataFrame:
35003500
for match_i, match_tuple in enumerate(regex.findall(subject)):
35013501
if isinstance(match_tuple, str):
35023502
match_tuple = (match_tuple,)
3503-
na_tuple = [np.NaN if group == "" else group for group in match_tuple]
3503+
na_tuple = [np.nan if group == "" else group for group in match_tuple]
35043504
match_list.append(na_tuple)
35053505
result_key = tuple(subject_key + (match_i,))
35063506
index_list.append(result_key)

pandas/io/formats/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1715,7 +1715,7 @@ def format_percentiles(
17151715
"""
17161716
percentiles = np.asarray(percentiles)
17171717

1718-
# It checks for np.NaN as well
1718+
# It checks for np.nan as well
17191719
if (
17201720
not is_numeric_dtype(percentiles)
17211721
or not np.all(percentiles >= 0)

pandas/tests/apply/test_frame_apply.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -637,15 +637,15 @@ def test_apply_with_byte_string():
637637
tm.assert_frame_equal(result, expected)
638638

639639

640-
@pytest.mark.parametrize("val", ["asd", 12, None, np.NaN])
640+
@pytest.mark.parametrize("val", ["asd", 12, None, np.nan])
641641
def test_apply_category_equalness(val):
642642
# Check if categorical comparisons on apply, GH 21239
643-
df_values = ["asd", None, 12, "asd", "cde", np.NaN]
643+
df_values = ["asd", None, 12, "asd", "cde", np.nan]
644644
df = DataFrame({"a": df_values}, dtype="category")
645645

646646
result = df.a.apply(lambda x: x == val)
647647
expected = Series(
648-
[np.NaN if pd.isnull(x) else x == val for x in df_values], name="a"
648+
[np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
649649
)
650650
tm.assert_series_equal(result, expected)
651651

pandas/tests/apply/test_series_apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def test_apply_categorical(by_row):
242242
assert result.dtype == object
243243

244244

245-
@pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
245+
@pytest.mark.parametrize("series", [["1-1", "1-1", np.nan], ["1-1", "1-2", np.nan]])
246246
def test_apply_categorical_with_nan_values(series, by_row):
247247
# GH 20714 bug fixed in: GH 24275
248248
s = Series(series, dtype="category")
@@ -254,7 +254,7 @@ def test_apply_categorical_with_nan_values(series, by_row):
254254

255255
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
256256
result = result.astype(object)
257-
expected = Series(["1", "1", np.NaN], dtype="category")
257+
expected = Series(["1", "1", np.nan], dtype="category")
258258
expected = expected.astype(object)
259259
tm.assert_series_equal(result, expected)
260260

pandas/tests/arrays/categorical/test_analytics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ def test_min_max_reduce(self):
7373
@pytest.mark.parametrize(
7474
"categories,expected",
7575
[
76-
(list("ABC"), np.NaN),
77-
([1, 2, 3], np.NaN),
76+
(list("ABC"), np.nan),
77+
([1, 2, 3], np.nan),
7878
pytest.param(
7979
Series(date_range("2020-01-01", periods=3), dtype="category"),
8080
NaT,

pandas/tests/arrays/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def test_set_na(self, left_right_dtypes):
129129
# GH#45484 TypeError, not ValueError, matches what we get with
130130
# non-NA un-holdable value.
131131
with pytest.raises(TypeError, match=msg):
132-
result[0] = np.NaN
132+
result[0] = np.nan
133133
return
134134

135135
result[0] = np.nan

0 commit comments

Comments
 (0)