Skip to content

Commit db419bf

Browse files
gfyoungjreback
authored andcommitted
MAINT: Drop has_index_names input from read_excel (pandas-dev#16522)
1 parent fc4408b commit db419bf

File tree

4 files changed

+52
-57
lines changed

4 files changed

+52
-57
lines changed

doc/source/io.rst

-5
Original file line numberDiff line numberDiff line change
@@ -2739,11 +2739,6 @@ should be passed to ``index_col`` and ``header``
27392739
import os
27402740
os.remove('path_to_file.xlsx')
27412741
2742-
.. warning::
2743-
2744-
Excel files saved in version 0.16.2 or prior that had index names will still able to be read in,
2745-
but the ``has_index_names`` argument must specified to ``True``.
2746-
27472742
27482743
Parsing Specific Columns
27492744
++++++++++++++++++++++++

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ Deprecations
7070
Removal of prior version deprecations/changes
7171
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
7272

73+
- ``pd.read_excel()`` has dropped the ``has_index_names`` parameter (:issue:`10967`)
7374

7475

7576
.. _whatsnew_0210.performance:

pandas/io/excel.py

+13-27
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,6 @@
141141
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
142142
data will be read in as floats: Excel stores all numbers as floats
143143
internally
144-
has_index_names : boolean, default None
145-
DEPRECATED: for version 0.17+ index names will be automatically
146-
inferred based on index_col. To read Excel output from 0.16.2 and
147-
prior that had saved index names, use True.
148144
149145
Returns
150146
-------
@@ -198,8 +194,8 @@ def get_writer(engine_name):
198194
def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
199195
index_col=None, names=None, parse_cols=None, parse_dates=False,
200196
date_parser=None, na_values=None, thousands=None,
201-
convert_float=True, has_index_names=None, converters=None,
202-
dtype=None, true_values=None, false_values=None, engine=None,
197+
convert_float=True, converters=None, dtype=None,
198+
true_values=None, false_values=None, engine=None,
203199
squeeze=False, **kwds):
204200

205201
# Can't use _deprecate_kwarg since sheetname=None has a special meaning
@@ -218,10 +214,9 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
218214
sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
219215
index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
220216
date_parser=date_parser, na_values=na_values, thousands=thousands,
221-
convert_float=convert_float, has_index_names=has_index_names,
222-
skip_footer=skip_footer, converters=converters, dtype=dtype,
223-
true_values=true_values, false_values=false_values, squeeze=squeeze,
224-
**kwds)
217+
convert_float=convert_float, skip_footer=skip_footer,
218+
converters=converters, dtype=dtype, true_values=true_values,
219+
false_values=false_values, squeeze=squeeze, **kwds)
225220

226221

227222
class ExcelFile(object):
@@ -283,9 +278,8 @@ def __fspath__(self):
283278
def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
284279
names=None, index_col=None, parse_cols=None, parse_dates=False,
285280
date_parser=None, na_values=None, thousands=None,
286-
convert_float=True, has_index_names=None,
287-
converters=None, true_values=None, false_values=None,
288-
squeeze=False, **kwds):
281+
convert_float=True, converters=None, true_values=None,
282+
false_values=None, squeeze=False, **kwds):
289283
"""
290284
Parse specified sheet(s) into a DataFrame
291285
@@ -296,7 +290,6 @@ def parse(self, sheet_name=0, header=0, skiprows=None, skip_footer=0,
296290
return self._parse_excel(sheetname=sheet_name, header=header,
297291
skiprows=skiprows, names=names,
298292
index_col=index_col,
299-
has_index_names=has_index_names,
300293
parse_cols=parse_cols,
301294
parse_dates=parse_dates,
302295
date_parser=date_parser, na_values=na_values,
@@ -343,23 +336,17 @@ def _excel2num(x):
343336
return i in parse_cols
344337

345338
def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
346-
skip_footer=0, index_col=None, has_index_names=None,
347-
parse_cols=None, parse_dates=False, date_parser=None,
348-
na_values=None, thousands=None, convert_float=True,
349-
true_values=None, false_values=None, verbose=False,
350-
dtype=None, squeeze=False, **kwds):
339+
skip_footer=0, index_col=None, parse_cols=None,
340+
parse_dates=False, date_parser=None, na_values=None,
341+
thousands=None, convert_float=True, true_values=None,
342+
false_values=None, verbose=False, dtype=None,
343+
squeeze=False, **kwds):
351344

352345
skipfooter = kwds.pop('skipfooter', None)
353346
if skipfooter is not None:
354347
skip_footer = skipfooter
355348

356349
_validate_header_arg(header)
357-
if has_index_names is not None:
358-
warn("\nThe has_index_names argument is deprecated; index names "
359-
"will be automatically inferred based on index_col.\n"
360-
"This argmument is still necessary if reading Excel output "
361-
"from 0.16.2 or prior with index names.", FutureWarning,
362-
stacklevel=3)
363350

364351
if 'chunksize' in kwds:
365352
raise NotImplementedError("chunksize keyword of read_excel "
@@ -511,8 +498,7 @@ def _parse_cell(cell_contents, cell_typ):
511498
else:
512499
last = data[row][col]
513500

514-
if is_list_like(header) and len(header) > 1:
515-
has_index_names = True
501+
has_index_names = is_list_like(header) and len(header) > 1
516502

517503
# GH 12292 : error when read one empty column from excel file
518504
try:

pandas/tests/io/test_excel.py

+38-25
Original file line numberDiff line numberDiff line change
@@ -881,8 +881,42 @@ def test_excel_multindex_roundtrip(self):
881881
tm.assert_frame_equal(
882882
df, act, check_names=check_names)
883883

884-
def test_excel_oldindex_format(self):
885-
# GH 4679
884+
def test_excel_old_index_format(self):
885+
# see gh-4679
886+
filename = 'test_index_name_pre17' + self.ext
887+
in_file = os.path.join(self.dirpath, filename)
888+
889+
# We detect headers to determine if index names exist, so
890+
# that "index" name in the "names" version of the data will
891+
# now be interpreted as rows that include null data.
892+
data = np.array([[None, None, None, None, None],
893+
['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
894+
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
895+
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
896+
['R3C0', 'R3C1', 'R3C2', 'R3C3', 'R3C4'],
897+
['R4C0', 'R4C1', 'R4C2', 'R4C3', 'R4C4']])
898+
columns = ['C_l0_g0', 'C_l0_g1', 'C_l0_g2', 'C_l0_g3', 'C_l0_g4']
899+
mi = MultiIndex(levels=[['R0', 'R_l0_g0', 'R_l0_g1',
900+
'R_l0_g2', 'R_l0_g3', 'R_l0_g4'],
901+
['R1', 'R_l1_g0', 'R_l1_g1',
902+
'R_l1_g2', 'R_l1_g3', 'R_l1_g4']],
903+
labels=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]],
904+
names=[None, None])
905+
si = Index(['R0', 'R_l0_g0', 'R_l0_g1', 'R_l0_g2',
906+
'R_l0_g3', 'R_l0_g4'], name=None)
907+
908+
expected = pd.DataFrame(data, index=si, columns=columns)
909+
910+
actual = pd.read_excel(in_file, 'single_names')
911+
tm.assert_frame_equal(actual, expected)
912+
913+
expected.index = mi
914+
915+
actual = pd.read_excel(in_file, 'multi_names')
916+
tm.assert_frame_equal(actual, expected)
917+
918+
# The analogous versions of the "names" version data
919+
# where there are explicitly no names for the indices.
886920
data = np.array([['R0C0', 'R0C1', 'R0C2', 'R0C3', 'R0C4'],
887921
['R1C0', 'R1C1', 'R1C2', 'R1C3', 'R1C4'],
888922
['R2C0', 'R2C1', 'R2C2', 'R2C3', 'R2C4'],
@@ -894,40 +928,19 @@ def test_excel_oldindex_format(self):
894928
['R_l1_g0', 'R_l1_g1', 'R_l1_g2',
895929
'R_l1_g3', 'R_l1_g4']],
896930
labels=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
897-
names=['R0', 'R1'])
931+
names=[None, None])
898932
si = Index(['R_l0_g0', 'R_l0_g1', 'R_l0_g2',
899-
'R_l0_g3', 'R_l0_g4'], name='R0')
900-
901-
in_file = os.path.join(
902-
self.dirpath, 'test_index_name_pre17' + self.ext)
933+
'R_l0_g3', 'R_l0_g4'], name=None)
903934

904935
expected = pd.DataFrame(data, index=si, columns=columns)
905-
with tm.assert_produces_warning(FutureWarning):
906-
actual = pd.read_excel(
907-
in_file, 'single_names', has_index_names=True)
908-
tm.assert_frame_equal(actual, expected)
909936

910-
expected.index.name = None
911937
actual = pd.read_excel(in_file, 'single_no_names')
912938
tm.assert_frame_equal(actual, expected)
913-
with tm.assert_produces_warning(FutureWarning):
914-
actual = pd.read_excel(
915-
in_file, 'single_no_names', has_index_names=False)
916-
tm.assert_frame_equal(actual, expected)
917939

918940
expected.index = mi
919-
with tm.assert_produces_warning(FutureWarning):
920-
actual = pd.read_excel(
921-
in_file, 'multi_names', has_index_names=True)
922-
tm.assert_frame_equal(actual, expected)
923941

924-
expected.index.names = [None, None]
925942
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1])
926943
tm.assert_frame_equal(actual, expected, check_names=False)
927-
with tm.assert_produces_warning(FutureWarning):
928-
actual = pd.read_excel(in_file, 'multi_no_names', index_col=[0, 1],
929-
has_index_names=False)
930-
tm.assert_frame_equal(actual, expected, check_names=False)
931944

932945
def test_read_excel_bool_header_arg(self):
933946
# GH 6114

0 commit comments

Comments
 (0)