Skip to content

Commit 644f232

Browse files
author
Lev Maximov
committed
mi_ and co_
1 parent cef3c63 commit 644f232

File tree

5 files changed

+215
-15
lines changed

5 files changed

+215
-15
lines changed

Diff for: changelog.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
0.3 added np.array support in pdi.find
22
0.4 rename level by position, 100% coverage
3-
0.5 set_level inplace=True by default, added minfo()
3+
0.5 set_level inplace=True by default, added minfo()
4+
0.6 mi_[] and co_[]

Diff for: pdi/__init__.py

+112-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
from typing import Hashable, Sequence
66

7-
__version__ = '0.5'
7+
__version__ = "0.6"
88

99
from pandas._typing import (
1010
AnyArrayLike,
@@ -79,6 +79,7 @@
7979
"join_levels",
8080
"split_level",
8181
"rename_level",
82+
"minfo",
8283
]
8384

8485

@@ -547,21 +548,34 @@ def join(dfs, on=None, how="left", suffixes=None):
547548

548549

549550
class Mi:
550-
def __init__(self, df):
551+
def __init__(self, df, drop_level=False):
551552
self.df = df
553+
self.drop_level = drop_level
552554

553555
def __repr__(self):
554556
return "Row indexer"
555557

556558
def __getitem__(self, args):
557-
return self.df.loc[args, :]
559+
res = self.df.loc[args, :]
560+
if self.drop_level:
561+
to_drop = []
562+
slices_present = False
563+
for i, arg in enumerate(args):
564+
if isinstance(arg, slice):
565+
slices_present = True
566+
elif pd.api.types.is_scalar(arg):
567+
to_drop.append(i)
568+
if slices_present and to_drop and len(to_drop) != self.df.index.nlevels:
569+
for level in reversed(to_drop):
570+
drop_level(res.index, level, inplace=True)
571+
return res
558572

559573
def __setitem__(self, k, v):
560574
self.df.loc[k, :] = v
561575

562576
def __call__(self, *args, **kwargs):
563577
levels, keys = tuple(kwargs.keys()), tuple(kwargs.values())
564-
return self.df.xs(keys, level=levels, drop_level=False)
578+
return self.df.xs(keys, level=levels, drop_level=self.drop_level)
565579

566580

567581
@property
@@ -601,22 +615,72 @@ def get_mi(self):
601615
return Mi(self)
602616

603617

618+
@property
619+
def get_mi_(self):
620+
"""
621+
Helps indexing MultiIndex in the rows (read and write access).
622+
Same policy for keeping and removing the filtered levels as in `.loc`.
623+
e.g. df.mi[:, 'a', :] returns all rows that have 'a' in the second level:
624+
625+
>>> df
626+
A B
627+
k l m
628+
a d g 1 2
629+
b e h 3 4
630+
c d i 5 6
631+
632+
>>> df.mi[:, 'a', :]
633+
A B
634+
k l m
635+
a d g 1 2
636+
c d i 5 6
637+
638+
>>> df.mi[:, 'a', :] = 0
639+
>>> df
640+
A B
641+
k l m
642+
a d g 0 0
643+
b e h 3 4
644+
c d i 0 0
645+
646+
Careful: once the result is created, it becomes a copy, so its changes
647+
are not propagated to the original dataframe.
648+
649+
Also you can use df.mi_(k='a'). Always keeps all the levels. Not writable.
650+
If you don't need some levels, you can drop them with pdi.drop_level()
651+
"""
652+
return Mi(self, drop_level=True)
653+
654+
604655
class Co:
605-
def __init__(self, df):
656+
def __init__(self, df, drop_level=False):
606657
self.df = df
658+
self.drop_level = drop_level
607659

608660
def __repr__(self):
609661
return "Column indexer"
610662

611663
def __getitem__(self, args):
612-
return self.df.loc[:, args]
664+
res = self.df.loc[:, args]
665+
if self.drop_level:
666+
to_drop = []
667+
slices_present = False
668+
for i, arg in enumerate(args):
669+
if isinstance(arg, slice):
670+
slices_present = True
671+
elif pd.api.types.is_scalar(arg):
672+
to_drop.append(i)
673+
if slices_present and to_drop and len(to_drop) != self.df.columns.nlevels:
674+
for level in reversed(to_drop):
675+
drop_level(res.columns, level, inplace=True)
676+
return res
613677

614678
def __setitem__(self, k, v):
615679
self.df.loc[:, k] = v
616680

617681
def __call__(self, *args, **kwargs):
618682
levels, keys = tuple(kwargs.keys()), tuple(kwargs.values())
619-
return self.df.xs(keys, level=levels, drop_level=False, axis=1)
683+
return self.df.xs(keys, level=levels, drop_level=self.drop_level, axis=1)
620684

621685

622686
@property
@@ -657,10 +721,51 @@ def get_co(self):
657721
return Co(self)
658722

659723

724+
@property
725+
def get_co_(self):
726+
"""
727+
Helps indexing MultiIndex in the colums (read and write access).
728+
Same policy for keeping and removing the filtered levels as in `.loc`.
729+
e.g. df.co[:, 'a', :] returns all columns that have 'a' in the second level:
730+
731+
>>> df
732+
K A B
733+
L C D C D
734+
M E F E F E F E F
735+
a 1 2 3 4 5 6 7 8
736+
b 9 10 11 12 13 14 15 16
737+
738+
>>> df.co[:, 'C', :]
739+
K A B
740+
L C C
741+
M E F E F
742+
a 1 2 5 6
743+
b 9 10 13 14
744+
745+
>>> df.co[:, 'C', :] = 0
746+
>>> df
747+
K A B
748+
L C D C D
749+
M E F E F E F E F
750+
a 0 0 3 4 0 0 7 8
751+
b 0 0 11 12 0 0 15 16
752+
753+
Careful: once the result is created, it becomes a copy, so its changes
754+
are not propagated to the original dataframe.
755+
756+
Also you can use df.co_(K='A'). Always keeps all the levels. Not writable.
757+
If you don't need some levels, you can drop them with pdi.drop_level()
758+
"""
759+
return Co(self, drop_level=True)
760+
761+
660762
def patch_mi_co():
661763
pd.DataFrame.mi = get_mi
764+
pd.DataFrame.mi_ = get_mi_
662765
pd.DataFrame.co = get_co
766+
pd.DataFrame.co_ = get_co_
663767
pd.Series.mi = get_mi
768+
pd.Series.mi_ = get_mi_
664769

665770

666771
def from_dict(d):

Diff for: pdi/levels.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -598,12 +598,16 @@ def rename_level(obj, mapping, level_id=None, axis=None, inplace=False):
598598

599599
def minfo(obj, prefix=''):
600600
if isinstance(obj, pd.DataFrame):
601+
print('Index:')
602+
minfo(obj.index, ' -')
601603
print('Columns:')
602-
minfo(obj.columns, ' - ')
604+
minfo(obj.columns, ' -')
605+
elif isinstance(obj, pd.Series):
603606
print('Index:')
604-
minfo(obj.index, ' - ')
607+
minfo(obj.index, ' -')
605608
else:
606609
for i in range(obj.nlevels):
607610
level = get_level(obj, i)
608-
print(prefix + f'{level.name}:', level.nunique(), 'values from', level[0], 'to', level[-1], end='')
611+
level_name = f' {level.name}:' if level.name is not None else ''
612+
print(prefix + level_name, level.nunique(), 'values from', level[0], 'to', level[-1], end='')
609613
print(f', dtype={level.dtype}')

Diff for: setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setuptools.setup(
99
name='pandas-illustrated',
10-
version='0.5',
10+
version='0.6',
1111
author='Lev Maximov',
1212
author_email='[email protected]',
1313
url='https://github.com/axil/pandas-illustrated',

Diff for: tests/test_mi_co.py

+93-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pandas as pd
44

55
from pdi import patch_mi_co
6-
from pdi.testing import gen_df, vic
6+
from pdi.testing import gen_df, vic, vin, vicn
77
import pdi
88

99

@@ -98,7 +98,6 @@ def test_patch_series():
9898

9999
def test_assignments():
100100
df = gen_df(1, 3)
101-
df
102101
df.co[:, "C", :] = 0
103102
assert vic(df) == (
104103
[[0, 0, 3, 4, 0, 0, 7, 8], [0, 0, 11, 12, 0, 0, 15, 16]],
@@ -116,7 +115,6 @@ def test_assignments():
116115
)
117116

118117
df = gen_df(3, 1)
119-
df
120118
df.mi[:, "d", :] = 0
121119
assert vic(df) == (
122120
[[1, 2], [3, 4], [0, 0], [0, 0], [9, 10], [11, 12], [0, 0], [0, 0]],
@@ -139,5 +137,97 @@ def test_from_not_dict():
139137
pdi.from_dict("hmm")
140138

141139

140+
def test_mi_():
141+
patch_mi_co()
142+
df = gen_df(3, 1)
143+
144+
assert vicn(df.mi_["a"]) == (
145+
[[1, 2], [3, 4], [5, 6], [7, 8]],
146+
[("c", "e"), ("c", "f"), ("d", "e"), ("d", "f")],
147+
["A", "B"],
148+
[["l", "m"], ["K"]],
149+
)
150+
151+
assert vicn(df.mi_["a", "c"]) == (
152+
[[1, 2], [3, 4]],
153+
["e", "f"],
154+
["A", "B"],
155+
[["m"], ["K"]],
156+
)
157+
158+
assert isinstance(df.mi_["a", "c", "e"], pd.Series)
159+
160+
assert vin(df.mi_["a", "c", "e"]) == ([1, 2], ["A", "B"], ("a", "c", "e"))
161+
162+
assert vicn(df.mi_[:, "c", :]) == (
163+
[[1, 2], [3, 4], [9, 10], [11, 12]],
164+
[("a", "e"), ("a", "f"), ("b", "e"), ("b", "f")],
165+
["A", "B"],
166+
[["k", "m"], ["K"]],
167+
)
168+
169+
assert vicn(df.mi_[:, "c", "a":"z"]) == (
170+
[[1, 2], [3, 4], [9, 10], [11, 12]],
171+
[("a", "e"), ("a", "f"), ("b", "e"), ("b", "f")],
172+
["A", "B"],
173+
[["k", "m"], ["K"]],
174+
)
175+
176+
assert vicn(df.mi_[:, "c", "a":"e"]) == (
177+
[[1, 2], [9, 10]],
178+
[("a", "e"), ("b", "e")],
179+
["A", "B"],
180+
[["k", "m"], ["K"]],
181+
)
182+
183+
assert vicn(df.mi_[:, :, :]) == vicn(df)
184+
185+
186+
def test_co_():
187+
patch_mi_co()
188+
df = gen_df(1, 3)
189+
190+
assert vicn(df.co_["A"]) == (
191+
[[1, 2, 3, 4], [9, 10, 11, 12]],
192+
["a", "b"],
193+
[("C", "E"), ("C", "F"), ("D", "E"), ("D", "F")],
194+
[["k"], ["L", "M"]],
195+
)
196+
197+
assert vicn(df.co_["A", "C"]) == (
198+
[[1, 2], [9, 10]],
199+
["a", "b"],
200+
["E", "F"],
201+
[["k"], ["M"]],
202+
)
203+
204+
assert isinstance(df.co_["A", "C", "E"], pd.Series)
205+
206+
assert vin(df.co_["A", "C", "E"]) == ([1, 9], ["a", "b"], ("A", "C", "E"))
207+
208+
assert vicn(df.co_[:, "C", :]) == (
209+
[[1, 2, 5, 6], [9, 10, 13, 14]],
210+
["a", "b"],
211+
[("A", "E"), ("A", "F"), ("B", "E"), ("B", "F")],
212+
[["k"], ["K", "M"]],
213+
)
214+
215+
assert vicn(df.co_[:, "C", "A":"Z"]) == (
216+
[[1, 2, 5, 6], [9, 10, 13, 14]],
217+
["a", "b"],
218+
[("A", "E"), ("A", "F"), ("B", "E"), ("B", "F")],
219+
[["k"], ["K", "M"]],
220+
)
221+
222+
assert vicn(df.co_[:, "C", "A":"E"]) == (
223+
[[1, 5], [9, 13]],
224+
["a", "b"],
225+
[("A", "E"), ("B", "E")],
226+
[["k"], ["K", "M"]],
227+
)
228+
229+
assert vicn(df.co_[:, :, :]) == vicn(df)
230+
231+
142232
if __name__ == "__main__":
143233
pytest.main(["-s", __file__]) # + '::test7'])

0 commit comments

Comments
 (0)