Skip to content

Commit d4f80b0

Browse files
ProsperousHeartjorisvandenbossche
authored andcommittedMay 31, 2017
DOC: correct docstring examples (pandas-dev#3439) (pandas-dev#16432)
1 parent fbdae2d commit d4f80b0

File tree

5 files changed

+108
-80
lines changed

5 files changed

+108
-80
lines changed
 

‎ci/build_docs.sh

+9
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ if [ "$DOC" ]; then
5959
git remote -v
6060

6161
git push origin gh-pages -f
62+
63+
echo "Running doctests"
64+
cd "$TRAVIS_BUILD_DIR"
65+
pytest --doctest-modules \
66+
pandas/core/reshape/concat.py \
67+
pandas/core/reshape/pivot.py \
68+
pandas/core/reshape/reshape.py \
69+
pandas/core/reshape/tile.py
70+
6271
fi
6372

6473
exit 0

‎pandas/core/reshape/concat.py

+2
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
197197
0
198198
a 2
199199
>>> pd.concat([df5, df6], verify_integrity=True)
200+
Traceback (most recent call last):
201+
...
200202
ValueError: Indexes have overlapping values: ['a']
201203
"""
202204
op = _Concatenator(objs, axis=axis, join_axes=join_axes,

‎pandas/core/reshape/pivot.py

+41-31
Original file line numberDiff line numberDiff line change
@@ -50,26 +50,36 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
5050
5151
Examples
5252
--------
53+
>>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
54+
... "bar", "bar", "bar", "bar"],
55+
... "B": ["one", "one", "one", "two", "two",
56+
... "one", "one", "two", "two"],
57+
... "C": ["small", "large", "large", "small",
58+
... "small", "large", "small", "small",
59+
... "large"],
60+
... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
5361
>>> df
54-
A B C D
55-
0 foo one small 1
56-
1 foo one large 2
57-
2 foo one large 2
58-
3 foo two small 3
59-
4 foo two small 3
60-
5 bar one large 4
61-
6 bar one small 5
62-
7 bar two small 6
63-
8 bar two large 7
62+
A B C D
63+
0 foo one small 1
64+
1 foo one large 2
65+
2 foo one large 2
66+
3 foo two small 3
67+
4 foo two small 3
68+
5 bar one large 4
69+
6 bar one small 5
70+
7 bar two small 6
71+
8 bar two large 7
6472
6573
>>> table = pivot_table(df, values='D', index=['A', 'B'],
6674
... columns=['C'], aggfunc=np.sum)
6775
>>> table
68-
small large
69-
foo one 1 4
70-
two 6 NaN
71-
bar one 5 4
72-
two 6 7
76+
... # doctest: +NORMALIZE_WHITESPACE
77+
C large small
78+
A B
79+
bar one 4.0 5.0
80+
two 7.0 6.0
81+
foo one 4.0 1.0
82+
two NaN 6.0
7383
7484
Returns
7585
-------
@@ -445,27 +455,27 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
445455
446456
Examples
447457
--------
448-
>>> a
449-
array([foo, foo, foo, foo, bar, bar,
450-
bar, bar, foo, foo, foo], dtype=object)
451-
>>> b
452-
array([one, one, one, two, one, one,
453-
one, two, two, two, one], dtype=object)
454-
>>> c
455-
array([dull, dull, shiny, dull, dull, shiny,
456-
shiny, dull, shiny, shiny, shiny], dtype=object)
457-
458-
>>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
459-
b one two
460-
c dull shiny dull shiny
458+
>>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar",
459+
... "bar", "bar", "foo", "foo", "foo"], dtype=object)
460+
>>> b = np.array(["one", "one", "one", "two", "one", "one",
461+
... "one", "two", "two", "two", "one"], dtype=object)
462+
>>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny",
463+
... "shiny", "dull", "shiny", "shiny", "shiny"],
464+
... dtype=object)
465+
466+
>>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])
467+
... # doctest: +NORMALIZE_WHITESPACE
468+
b one two
469+
c dull shiny dull shiny
461470
a
462-
bar 1 2 1 0
463-
foo 2 2 1 2
471+
bar 1 2 1 0
472+
foo 2 2 1 2
464473
465474
>>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'])
466475
>>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f'])
467476
>>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data,
468-
# but they still will be counted in the output
477+
... # but they still will be counted in the output
478+
... # doctest: +SKIP
469479
col_0 d e f
470480
row_0
471481
a 1 0 0

‎pandas/core/reshape/reshape.py

+43-38
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,23 @@ class _Unstacker(object):
4848
>>> import pandas as pd
4949
>>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
5050
... ('two', 'a'), ('two', 'b')])
51-
>>> s = pd.Series(np.arange(1.0, 5.0), index=index)
51+
>>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
5252
>>> s
53-
one a 1
54-
b 2
55-
two a 3
56-
b 4
57-
dtype: float64
53+
one a 1
54+
b 2
55+
two a 3
56+
b 4
57+
dtype: int64
5858
5959
>>> s.unstack(level=-1)
60-
a b
60+
a b
6161
one 1 2
6262
two 3 4
6363
6464
>>> s.unstack(level=0)
6565
one two
66-
a 1 2
67-
b 3 4
66+
a 1 3
67+
b 2 4
6868
6969
Returns
7070
-------
@@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None):
789789
>>> import pandas as pd
790790
>>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
791791
... 'team': ['Red Sox', 'Yankees'],
792-
... 'year1': [2007, 2008], 'year2': [2008, 2008]})
792+
... 'year1': [2007, 2007], 'year2': [2008, 2008]})
793793
>>> data
794794
hr1 hr2 team year1 year2
795795
0 514 545 Red Sox 2007 2008
796796
1 573 526 Yankees 2007 2008
797797
798798
>>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
799-
team hr year
800-
0 Red Sox 514 2007
801-
1 Yankees 573 2007
802-
2 Red Sox 545 2008
803-
3 Yankees 526 2008
799+
team year hr
800+
0 Red Sox 2007 514
801+
1 Yankees 2007 573
802+
2 Red Sox 2008 545
803+
3 Yankees 2008 526
804804
805805
Returns
806806
-------
@@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
905905
... })
906906
>>> df["id"] = df.index
907907
>>> df
908-
A1970 A1980 B1970 B1980 X id
908+
A1970 A1980 B1970 B1980 X id
909909
0 a d 2.5 3.2 -1.085631 0
910910
1 b e 1.2 1.3 0.997345 1
911911
2 c f 0.7 0.1 0.282978 2
912912
>>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
913+
... # doctest: +NORMALIZE_WHITESPACE
913914
X A B
914915
id year
915916
0 1970 -1.085631 a 2.5
@@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
940941
8 3 3 2.1 2.9
941942
>>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
942943
>>> l
944+
... # doctest: +NORMALIZE_WHITESPACE
943945
ht
944946
famid birth age
945947
1 1 1 2.8
@@ -979,41 +981,44 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
979981
980982
Less wieldy column names are also handled
981983
984+
>>> np.random.seed(0)
982985
>>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
983986
... 'A(quarterly)-2011': np.random.rand(3),
984987
... 'B(quarterly)-2010': np.random.rand(3),
985988
... 'B(quarterly)-2011': np.random.rand(3),
986989
... 'X' : np.random.randint(3, size=3)})
987990
>>> df['id'] = df.index
988-
>>> df
989-
A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011
990-
0 0.531828 0.724455 0.322959 0.293714
991-
1 0.634401 0.611024 0.361789 0.630976
992-
2 0.849432 0.722443 0.228263 0.092105
993-
\
991+
>>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
992+
A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
993+
0 0.548814 0.544883 0.437587 ...
994+
1 0.715189 0.423655 0.891773 ...
995+
2 0.602763 0.645894 0.963663 ...
994996
X id
995997
0 0 0
996998
1 1 1
997-
2 2 2
998-
>>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'],
999-
i='id', j='year', sep='-')
1000-
X A(quarterly) B(quarterly)
999+
2 1 2
1000+
1001+
>>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
1002+
... j='year', sep='-')
1003+
... # doctest: +NORMALIZE_WHITESPACE
1004+
X A(quarterly) B(quarterly)
10011005
id year
1002-
0 2010 0 0.531828 0.322959
1003-
1 2010 2 0.634401 0.361789
1004-
2 2010 2 0.849432 0.228263
1005-
0 2011 0 0.724455 0.293714
1006-
1 2011 2 0.611024 0.630976
1007-
2 2011 2 0.722443 0.092105
1006+
0 2010 0 0.548814 0.437587
1007+
1 2010 1 0.715189 0.891773
1008+
2 2010 1 0.602763 0.963663
1009+
0 2011 0 0.544883 0.383442
1010+
1 2011 1 0.423655 0.791725
1011+
2 2011 1 0.645894 0.528895
10081012
10091013
If we have many columns, we could also use a regex to find our
10101014
stubnames and pass that list on to wide_to_long
10111015
1012-
>>> stubnames = set([match[0] for match in
1013-
df.columns.str.findall('[A-B]\(.*\)').values
1014-
if match != [] ])
1016+
>>> stubnames = sorted(
1017+
... set([match[0] for match in df.columns.str.findall(
1018+
... r'[A-B]\(.*\)').values if match != [] ])
1019+
... )
10151020
>>> list(stubnames)
1016-
['B(quarterly)', 'A(quarterly)']
1021+
['A(quarterly)', 'B(quarterly)']
10171022
10181023
Notes
10191024
-----
@@ -1133,7 +1138,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
11331138
2 0 0 1
11341139
11351140
>>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
1136-
'C': [1, 2, 3]})
1141+
... 'C': [1, 2, 3]})
11371142
11381143
>>> pd.get_dummies(df, prefix=['col1', 'col2'])
11391144
C col1_a col1_b col2_a col2_b col2_c
@@ -1149,7 +1154,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
11491154
3 1 0 0
11501155
4 1 0 0
11511156
1152-
>>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True))
1157+
>>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
11531158
b c
11541159
0 0 0
11551160
1 1 0

‎pandas/core/reshape/tile.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
7575
Examples
7676
--------
7777
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True)
78-
([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533],
79-
(6.533, 9.7], (0.191, 3.367]]
80-
Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]],
81-
array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ]))
78+
... # doctest: +ELLIPSIS
79+
([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ...
80+
Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ...
8281
83-
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3,
84-
labels=["good","medium","bad"])
82+
>>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]),
83+
... 3, labels=["good", "medium", "bad"])
84+
... # doctest: +SKIP
8585
[good, good, good, medium, bad, good]
8686
Categories (3, object): [good < medium < bad]
8787
8888
>>> pd.cut(np.ones(5), 4, labels=False)
89-
array([1, 1, 1, 1, 1], dtype=int64)
89+
array([1, 1, 1, 1, 1])
9090
"""
9191
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
9292

@@ -182,15 +182,17 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
182182
Examples
183183
--------
184184
>>> pd.qcut(range(5), 4)
185-
[[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]]
186-
Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]]
185+
... # doctest: +ELLIPSIS
186+
[(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
187+
Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ...
187188
188-
>>> pd.qcut(range(5), 3, labels=["good","medium","bad"])
189+
>>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
190+
... # doctest: +SKIP
189191
[good, good, medium, bad, bad]
190192
Categories (3, object): [good < medium < bad]
191193
192194
>>> pd.qcut(range(5), 4, labels=False)
193-
array([0, 0, 1, 2, 3], dtype=int64)
195+
array([0, 0, 1, 2, 3])
194196
"""
195197
x_is_series, series_index, name, x = _preprocess_for_cut(x)
196198

0 commit comments

Comments
 (0)
Please sign in to comment.