From 5608ed93c681ffb078c2cadbbfd3cbfce0755589 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Mon, 22 May 2017 12:32:48 -0700 Subject: [PATCH 01/14] 1st update for issue 3439 --- pandas/core/reshape/reshape.py | 56 ++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index f944dfe22361a..0762f96417dcc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -50,21 +50,21 @@ class _Unstacker(object): ... ('two', 'a'), ('two', 'b')]) >>> s = pd.Series(np.arange(1.0, 5.0), index=index) >>> s - one a 1 - b 2 - two a 3 - b 4 + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 dtype: float64 >>> s.unstack(level=-1) - a b - one 1 2 - two 3 4 + a b + one 1.0 2.0 + two 3.0 4.0 >>> s.unstack(level=0) one two - a 1 2 - b 3 4 + a 1.0 3.0 + b 2.0 4.0 Returns ------- @@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns): new_labels = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? - new_levels.append(level_vals) + new_levels.append(frame.columns.levels[level_num]) new_labels.append(np.tile(level_labels, N)) new_names.append(frame.columns.names[level_num]) @@ -789,18 +789,18 @@ def lreshape(data, groups, dropna=True, label=None): >>> import pandas as pd >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], ... 'team': ['Red Sox', 'Yankees'], - ... 'year1': [2007, 2008], 'year2': [2008, 2008]}) + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) - team hr year - 0 Red Sox 514 2007 - 1 Yankees 573 2007 - 2 Red Sox 545 2008 - 3 Yankees 526 2008 + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 Returns ------- @@ -905,11 +905,12 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): ... }) >>> df["id"] = df.index >>> df - A1970 A1980 B1970 B1980 X id + A1970 A1980 B1970 B1980 X id 0 a d 2.5 3.2 -1.085631 0 1 b e 1.2 1.3 0.997345 1 2 c f 0.7 0.1 0.282978 2 >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE X A B id year 0 1970 -1.085631 a 2.5 @@ -940,6 +941,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): 8 3 3 2.1 2.9 >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') >>> l + ... # doctest: +NORMALIZE_WHITESPACE ht famid birth age 1 1 1 2.8 @@ -979,6 +981,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): Less wieldy column names are also handled + >>> np.random.seed(0) >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3), ... 'A(quarterly)-2011': np.random.rand(3), ... 'B(quarterly)-2010': np.random.rand(3), @@ -986,15 +989,17 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): ... 'X' : np.random.randint(3, size=3)}) >>> df['id'] = df.index >>> df - A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011 - 0 0.531828 0.724455 0.322959 0.293714 - 1 0.634401 0.611024 0.361789 0.630976 - 2 0.849432 0.722443 0.228263 0.092105 - \ + ... # doctest: +NORMALIZE_WHITESPACE + A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011 \ + 0 0.548814 0.544883 0.437587 0.383442 + 1 0.715189 0.423655 0.891773 0.791725 + 2 0.602763 0.645894 0.963663 0.528895 + X id 0 0 0 1 1 1 - 2 2 2 + 2 1 2 + >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', j='year', sep='-') X A(quarterly) B(quarterly) @@ -1132,8 +1137,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 1 0 1 0 2 0 0 1 - >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], - 'C': [1, 2, 3]}) + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], 'C': [1, 2, 3]}) >>> pd.get_dummies(df, prefix=['col1', 'col2']) C col1_a col1_b col2_a col2_b col2_c @@ -1149,7 +1153,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 3 1 0 0 4 1 0 0 - >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)) + >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) b c 0 0 0 1 1 0 From 10dda381408927baeae30079159ddd3a0d881365 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Mon, 22 May 2017 13:46:22 -0700 Subject: [PATCH 02/14] 2nd update for issue 3439 --- pandas/core/reshape/reshape.py | 50 ++++++++++++++++------------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0762f96417dcc..81a50d8f56981 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -48,23 +48,23 @@ class _Unstacker(object): >>> import pandas as pd >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ... ('two', 'a'), ('two', 'b')]) - >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s = pd.Series(np.arange(1, 5), index=index) >>> s - one a 1.0 - b 2.0 - two a 3.0 - b 4.0 - dtype: float64 + one a 1 + b 2 + two a 3 + b 4 + dtype: int32 >>> s.unstack(level=-1) - a b - one 1.0 2.0 - two 3.0 4.0 + a b + one 1 2 + two 3 4 >>> s.unstack(level=0) one two - a 1.0 3.0 - b 2.0 4.0 + a 1 3 + b 2 4 Returns ------- @@ -994,31 +994,28 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): 0 0.548814 0.544883 0.437587 0.383442 1 0.715189 0.423655 0.891773 0.791725 2 0.602763 0.645894 0.963663 0.528895 - X id 0 0 0 1 1 1 2 1 2 - >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], - i='id', j='year', sep='-') - X A(quarterly) B(quarterly) + >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(quarterly) B(quarterly) id year - 0 2010 0 0.531828 0.322959 - 1 2010 2 0.634401 0.361789 - 2 2010 2 0.849432 0.228263 - 0 2011 0 0.724455 0.293714 - 1 2011 2 0.611024 0.630976 - 2 2011 2 0.722443 0.092105 + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 If we have many columns, we could also use a regex to find our stubnames and pass that list on to wide_to_long - >>> stubnames = set([match[0] for match in - df.columns.str.findall('[A-B]\(.*\)').values - if match != [] ]) + >>> stubnames = sorted(set([match[0] for match in df.columns.str.findall('[A-B]\(.*\)').values if match != [] ])) >>> list(stubnames) - ['B(quarterly)', 'A(quarterly)'] + ['A(quarterly)', 'B(quarterly)'] Notes ----- @@ -1137,7 +1134,8 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 1 0 1 0 2 0 0 1 - >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], 'C': [1, 2, 3]}) + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], + ... 'C': [1, 2, 3]}) >>> pd.get_dummies(df, prefix=['col1', 'col2']) C col1_a col1_b col2_a col2_b col2_c From cdf11971483d0fcc7938fa9364b159693ce36ca7 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Mon, 22 May 2017 13:51:47 -0700 Subject: [PATCH 03/14] 3rd update for issue 3439 --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 81a50d8f56981..0987757e0c77d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -689,7 +689,7 @@ def _convert_level_number(level_num, columns): new_labels = [np.arange(N).repeat(levsize)] new_names = [this.index.name] # something better? - new_levels.append(frame.columns.levels[level_num]) + new_levels.append(level_vals) new_labels.append(np.tile(level_labels, N)) new_names.append(frame.columns.names[level_num]) From 02e0fccc460ea193fc6b7c08c2e3d88911a19d5b Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Mon, 22 May 2017 14:10:54 -0700 Subject: [PATCH 04/14] 4th update for issue 3439 --- pandas/core/reshape/reshape.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 0987757e0c77d..71f13ea286a09 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -988,8 +988,7 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): ... 'B(quarterly)-2011': np.random.rand(3), ... 'X' : np.random.randint(3, size=3)}) >>> df['id'] = df.index - >>> df - ... # doctest: +NORMALIZE_WHITESPACE + >>> df # doctest: +NORMALIZE_WHITESPACE A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011 \ 0 0.548814 0.544883 0.437587 0.383442 1 0.715189 0.423655 0.891773 0.791725 From 20fe63a41dd2e68b9ea89c31cf94e84f7950cc70 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Mon, 22 May 2017 14:29:56 -0700 Subject: [PATCH 05/14] 5th update for issue 3439 --- pandas/core/reshape/concat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index af2eb734a02f6..96603b6adc3b0 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -197,6 +197,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, 0 a 2 >>> pd.concat([df5, df6], verify_integrity=True) + Traceback (most recent call last): + ... ValueError: Indexes have overlapping values: ['a'] """ op = _Concatenator(objs, axis=axis, join_axes=join_axes, From a19a729e066f3b85e211135049527767c42d2875 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 09:55:21 -0700 Subject: [PATCH 06/14] 6th update for issue 3439 --- pandas/core/reshape/pivot.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 74dbbfc00cb11..114d834894d4b 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -438,27 +438,26 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, Examples -------- - >>> a - array([foo, foo, foo, foo, bar, bar, - bar, bar, foo, foo, foo], dtype=object) - >>> b - array([one, one, one, two, one, one, - one, two, two, two, one], dtype=object) - >>> c - array([dull, dull, shiny, dull, dull, shiny, - shiny, dull, shiny, shiny, shiny], dtype=object) + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) - b one two - c dull shiny dull shiny + ... # doctest: +NORMALIZE_WHITESPACE + b one two + c dull shiny dull shiny a - bar 1 2 1 0 - foo 2 2 1 2 + bar 1 2 1 0 + foo 2 2 1 2 >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, - # but they still will be counted in the output + ... # but they still will be counted in the output col_0 d e f row_0 a 1 0 0 From 5d972ad46d9345ac40ef908d4f1c3a1acb7b08ed Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 10:17:46 -0700 Subject: [PATCH 07/14] 7th update for issue 3439 --- pandas/core/reshape/pivot.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 114d834894d4b..b1ec3ac4709de 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -50,7 +50,14 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', Examples -------- - >>> df + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) A B C D 0 foo one small 1 1 foo one large 2 @@ -458,6 +465,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) >>> crosstab(foo, bar) # 'c' and 'f' are not represented in the data, ... # but they still will be counted in the output + ... # doctest: +SKIP col_0 d e f row_0 a 1 0 0 From 5379089a020a6bcdcf43a21f4ea979f8fd40e02d Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 10:22:58 -0700 Subject: [PATCH 08/14] 8th update for issue 3439 --- pandas/core/reshape/tile.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 746742f47f2aa..6d7e5cc5cd58f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -75,13 +75,11 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Examples -------- >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True) - ([(0.191, 3.367], (0.191, 3.367], (0.191, 3.367], (3.367, 6.533], - (6.533, 9.7], (0.191, 3.367]] - Categories (3, object): [(0.191, 3.367] < (3.367, 6.533] < (6.533, 9.7]], - array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) + ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], (6.533, 9.7], (0.19, 3.367]] + Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] < (6.533, 9.7]], array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) - >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, - labels=["good","medium","bad"]) + >>> result, bins = pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), + ... 3, labels=["good","medium","bad"]) [good, good, good, medium, bad, good] Categories (3, object): [good < medium < bad] From f7f3289107d3ca07ccfc71202c7f547b3a300cc2 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 10:43:14 -0700 Subject: [PATCH 09/14] 8th update for issue 3439 --- pandas/core/reshape/pivot.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b1ec3ac4709de..795cf74995d8b 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -58,16 +58,17 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', ... "small", "large", "small", "small", ... "large"], ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 >>> table = pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) From 891d42bce299c9f1ca6f030e5dd807a4ae27cf32 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 10:49:01 -0700 Subject: [PATCH 10/14] 9th update for issue 3439 --- pandas/core/reshape/pivot.py | 2 +- pandas/core/reshape/tile.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 795cf74995d8b..b04bc785db6e6 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -454,7 +454,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, ... "shiny", "dull", "shiny", "shiny", "shiny"], ... dtype=object) - >>> crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) ... # doctest: +NORMALIZE_WHITESPACE b one two c dull shiny dull shiny diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 6d7e5cc5cd58f..78d1bfaa9b498 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -78,8 +78,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], (6.533, 9.7], (0.19, 3.367]] Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] < (6.533, 9.7]], array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) - >>> result, bins = pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), + >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), ... 3, labels=["good","medium","bad"]) + ... # doctest: +SKIP [good, good, good, medium, bad, good] Categories (3, object): [good < medium < bad] From a701026fae30c97748e3dec66783379b0d6c98f6 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 11:00:19 -0700 Subject: [PATCH 11/14] 9th update for issue 3439 --- pandas/core/reshape/pivot.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b04bc785db6e6..945916f97cff2 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -73,11 +73,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', >>> table = pivot_table(df, values='D', index=['A', 'B'], ... columns=['C'], aggfunc=np.sum) >>> table - small large - foo one 1 4 - two 6 NaN - bar one 5 4 - two 6 7 + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 Returns ------- From 7105f0a67b492b2ddff5059e9e5c658f43a891b7 Mon Sep 17 00:00:00 2001 From: Kassandra Keeton Date: Tue, 23 May 2017 11:45:30 -0700 Subject: [PATCH 12/14] 10th update for issue 3439 --- pandas/core/reshape/tile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 78d1bfaa9b498..ba3cd43d30a94 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -181,8 +181,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): Examples -------- >>> pd.qcut(range(5), 4) - [[0, 1], [0, 1], (1, 2], (2, 3], (3, 4]] - Categories (4, object): [[0, 1] < (1, 2] < (2, 3] < (3, 4]] + [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] + Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] < (2.0, 3.0] < (3.0, 4.0]] >>> pd.qcut(range(5), 3, labels=["good","medium","bad"]) [good, good, medium, bad, bad] From 24a0f1c6cfe626e38429fb28d089831f7aab1ff7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 26 May 2017 10:02:17 -0500 Subject: [PATCH 13/14] PEP8 fixes --- pandas/core/reshape/pivot.py | 10 +++++----- pandas/core/reshape/reshape.py | 26 +++++++++++++++----------- pandas/core/reshape/tile.py | 17 ++++++++++------- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 945916f97cff2..a9a5051965ece 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -50,11 +50,11 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', Examples -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", + ... "C": ["small", "large", "large", "small", ... "small", "large", "small", "small", ... "large"], ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) @@ -453,7 +453,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, >>> b = np.array(["one", "one", "one", "two", "one", "one", ... "one", "two", "two", "two", "one"], dtype=object) >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", - ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... "shiny", "dull", "shiny", "shiny", "shiny"], ... dtype=object) >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 71f13ea286a09..dcb83d225699d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -48,13 +48,13 @@ class _Unstacker(object): >>> import pandas as pd >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ... ('two', 'a'), ('two', 'b')]) - >>> s = pd.Series(np.arange(1, 5), index=index) + >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) >>> s one a 1 b 2 two a 3 b 4 - dtype: int32 + dtype: int64 >>> s.unstack(level=-1) a b @@ -988,17 +988,18 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): ... 'B(quarterly)-2011': np.random.rand(3), ... 'X' : np.random.randint(3, size=3)}) >>> df['id'] = df.index - >>> df # doctest: +NORMALIZE_WHITESPACE - A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 B(quarterly)-2011 \ - 0 0.548814 0.544883 0.437587 0.383442 - 1 0.715189 0.423655 0.891773 0.791725 - 2 0.602763 0.645894 0.963663 0.528895 + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ... + 0 0.548814 0.544883 0.437587 ... + 1 0.715189 0.423655 0.891773 ... + 2 0.602763 0.645894 0.963663 ... X id 0 0 0 1 1 1 2 1 2 - - >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', j='year', sep='-') + + >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', + ... j='year', sep='-') ... # doctest: +NORMALIZE_WHITESPACE X A(quarterly) B(quarterly) id year @@ -1012,7 +1013,10 @@ def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'): If we have many columns, we could also use a regex to find our stubnames and pass that list on to wide_to_long - >>> stubnames = sorted(set([match[0] for match in df.columns.str.findall('[A-B]\(.*\)').values if match != [] ])) + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != [] ]) + ... ) >>> list(stubnames) ['A(quarterly)', 'B(quarterly)'] @@ -1133,7 +1137,7 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, 1 0 1 0 2 0 0 1 - >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], ... 'C': [1, 2, 3]}) >>> pd.get_dummies(df, prefix=['col1', 'col2']) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index ba3cd43d30a94..866f229bec418 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -75,17 +75,18 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, Examples -------- >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), 3, retbins=True) - ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], (6.533, 9.7], (0.19, 3.367]] - Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] < (6.533, 9.7]], array([ 0.1905 , 3.36666667, 6.53333333, 9.7 ])) + ... # doctest: +ELLIPSIS + ([(0.19, 3.367], (0.19, 3.367], (0.19, 3.367], (3.367, 6.533], ... + Categories (3, interval[float64]): [(0.19, 3.367] < (3.367, 6.533] ... >>> pd.cut(np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]), - ... 3, labels=["good","medium","bad"]) + ... 3, labels=["good", "medium", "bad"]) ... # doctest: +SKIP [good, good, good, medium, bad, good] Categories (3, object): [good < medium < bad] >>> pd.cut(np.ones(5), 4, labels=False) - array([1, 1, 1, 1, 1], dtype=int64) + array([1, 1, 1, 1, 1]) """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 @@ -181,15 +182,17 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): Examples -------- >>> pd.qcut(range(5), 4) + ... # doctest: +ELLIPSIS [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] - Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] < (2.0, 3.0] < (3.0, 4.0]] + Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ... - >>> pd.qcut(range(5), 3, labels=["good","medium","bad"]) + >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP [good, good, medium, bad, bad] Categories (3, object): [good < medium < bad] >>> pd.qcut(range(5), 4, labels=False) - array([0, 0, 1, 2, 3], dtype=int64) + array([0, 0, 1, 2, 3]) """ x_is_series, series_index, name, x = _preprocess_for_cut(x) From ffd3e3ca05da04a9bd8c610e0b5e50274327fa26 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 26 May 2017 10:06:52 -0500 Subject: [PATCH 14/14] Enable doc build --- ci/build_docs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 26917b8f9b792..a038304fe0f7a 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -59,6 +59,15 @@ if [ "$DOC" ]; then git remote -v git push origin gh-pages -f + + echo "Running doctests" + cd "$TRAVIS_BUILD_DIR" + pytest --doctest-modules \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py + fi exit 0