@@ -48,23 +48,23 @@ class _Unstacker(object):
48
48
>>> import pandas as pd
49
49
>>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
50
50
... ('two', 'a'), ('two', 'b')])
51
- >>> s = pd.Series(np.arange(1.0 , 5.0 ), index=index)
51
+ >>> s = pd.Series(np.arange(1, 5, dtype=np.int64 ), index=index)
52
52
>>> s
53
- one a 1
54
- b 2
55
- two a 3
56
- b 4
57
- dtype: float64
53
+ one a 1
54
+ b 2
55
+ two a 3
56
+ b 4
57
+ dtype: int64
58
58
59
59
>>> s.unstack(level=-1)
60
- a b
60
+ a b
61
61
one 1 2
62
62
two 3 4
63
63
64
64
>>> s.unstack(level=0)
65
65
one two
66
- a 1 2
67
- b 3 4
66
+ a 1 3
67
+ b 2 4
68
68
69
69
Returns
70
70
-------
@@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None):
789
789
>>> import pandas as pd
790
790
>>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
791
791
... 'team': ['Red Sox', 'Yankees'],
792
- ... 'year1': [2007, 2008 ], 'year2': [2008, 2008]})
792
+ ... 'year1': [2007, 2007 ], 'year2': [2008, 2008]})
793
793
>>> data
794
794
hr1 hr2 team year1 year2
795
795
0 514 545 Red Sox 2007 2008
796
796
1 573 526 Yankees 2007 2008
797
797
798
798
>>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
799
- team hr year
800
- 0 Red Sox 514 2007
801
- 1 Yankees 573 2007
802
- 2 Red Sox 545 2008
803
- 3 Yankees 526 2008
799
+ team year hr
800
+ 0 Red Sox 2007 514
801
+ 1 Yankees 2007 573
802
+ 2 Red Sox 2008 545
803
+ 3 Yankees 2008 526
804
804
805
805
Returns
806
806
-------
@@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
905
905
... })
906
906
>>> df["id"] = df.index
907
907
>>> df
908
- A1970 A1980 B1970 B1980 X id
908
+ A1970 A1980 B1970 B1980 X id
909
909
0 a d 2.5 3.2 -1.085631 0
910
910
1 b e 1.2 1.3 0.997345 1
911
911
2 c f 0.7 0.1 0.282978 2
912
912
>>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
913
+ ... # doctest: +NORMALIZE_WHITESPACE
913
914
X A B
914
915
id year
915
916
0 1970 -1.085631 a 2.5
@@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
940
941
8 3 3 2.1 2.9
941
942
>>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
942
943
>>> l
944
+ ... # doctest: +NORMALIZE_WHITESPACE
943
945
ht
944
946
famid birth age
945
947
1 1 1 2.8
@@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
979
981
980
982
Less wieldy column names are also handled
981
983
984
+ >>> np.random.seed(0)
982
985
>>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
983
986
... 'A(quarterly)-2011': np.random.rand(3),
984
987
... 'B(quarterly)-2010': np.random.rand(3),
985
988
... 'B(quarterly)-2011': np.random.rand(3),
986
989
... 'X' : np.random.randint(3, size=3)})
987
990
>>> df['id'] = df.index
988
- >>> df
989
- A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011
990
- 0 0.531828 0.724455 0.322959 0.293714
991
- 1 0.634401 0.611024 0.361789 0.630976
992
- 2 0.849432 0.722443 0.228263 0.092105
993
- \
991
+ >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
992
+ A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
993
+ 0 0.548814 0.544883 0.437587 ...
994
+ 1 0.715189 0.423655 0.891773 ...
995
+ 2 0.602763 0.645894 0.963663 ...
994
996
X id
995
997
0 0 0
996
998
1 1 1
997
- 2 2 2
998
- >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'],
999
- i='id', j='year', sep='-')
1000
- X A(quarterly) B(quarterly)
999
+ 2 1 2
1000
+
1001
+ >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
1002
+ ... j='year', sep='-')
1003
+ ... # doctest: +NORMALIZE_WHITESPACE
1004
+ X A(quarterly) B(quarterly)
1001
1005
id year
1002
- 0 2010 0 0.531828 0.322959
1003
- 1 2010 2 0.634401 0.361789
1004
- 2 2010 2 0.849432 0.228263
1005
- 0 2011 0 0.724455 0.293714
1006
- 1 2011 2 0.611024 0.630976
1007
- 2 2011 2 0.722443 0.092105
1006
+ 0 2010 0 0.548814 0.437587
1007
+ 1 2010 1 0.715189 0.891773
1008
+ 2 2010 1 0.602763 0.963663
1009
+ 0 2011 0 0.544883 0.383442
1010
+ 1 2011 1 0.423655 0.791725
1011
+ 2 2011 1 0.645894 0.528895
1008
1012
1009
1013
If we have many columns, we could also use a regex to find our
1010
1014
stubnames and pass that list on to wide_to_long
1011
1015
1012
- >>> stubnames = set([match[0] for match in
1013
- df.columns.str.findall('[A-B]\(.*\)').values
1014
- if match != [] ])
1016
+ >>> stubnames = sorted(
1017
+ ... set([match[0] for match in df.columns.str.findall(
1018
+ ... r'[A-B]\(.*\)').values if match != [] ])
1019
+ ... )
1015
1020
>>> list(stubnames)
1016
- ['B (quarterly)', 'A (quarterly)']
1021
+ ['A (quarterly)', 'B (quarterly)']
1017
1022
1018
1023
Notes
1019
1024
-----
@@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
1133
1138
2 0 0 1
1134
1139
1135
1140
>>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
1136
- 'C': [1, 2, 3]})
1141
+ ... 'C': [1, 2, 3]})
1137
1142
1138
1143
>>> pd.get_dummies(df, prefix=['col1', 'col2'])
1139
1144
C col1_a col1_b col2_a col2_b col2_c
@@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
1149
1154
3 1 0 0
1150
1155
4 1 0 0
1151
1156
1152
- >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True))
1157
+ >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
1153
1158
b c
1154
1159
0 0 0
1155
1160
1 1 0
0 commit comments