From d5ca5ae302ceee5cd4139d0c05f7539812e45f62 Mon Sep 17 00:00:00 2001 From: Benjamin Zaitlen Date: Tue, 5 Mar 2019 18:17:50 -0800 Subject: [PATCH 1/4] add test for timeseries index --- dask_cudf/tests/test_core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dask_cudf/tests/test_core.py b/dask_cudf/tests/test_core.py index e7febe3..301e1a7 100644 --- a/dask_cudf/tests/test_core.py +++ b/dask_cudf/tests/test_core.py @@ -43,6 +43,18 @@ def test_from_cudf_with_generic_idx(): assert isinstance(ddf.index.compute(), cudf.dataframe.index.GenericIndex) dd.assert_eq(ddf.loc[1:2, ["a"]], cdf.loc[1:2, ["a"]]) +def test_timeseries_index(): + + gdf = cudf.DataFrame() + gdf['date'] = pd.date_range('11/20/2018', periods=72, freq='D') + gdf['value'] = np.random.sample(len(gdf)) + + ddf = dgd.from_cudf(gdf, npartitions=2) + + ddf_ts_idx = gdf.set_index('date') + gdf_ts_idx = gdf.set_index('date') + dd.assert_eq(ddf_ts_idx, gdf_ts_idx) + def _fragmented_gdf(df, nsplit): n = len(df) From 8f2dc348cfd68c58605cb91941f6b026bd4e7ff1 Mon Sep 17 00:00:00 2001 From: Benjamin Zaitlen Date: Tue, 5 Mar 2019 18:49:48 -0800 Subject: [PATCH 2/4] fix test and lint --- dask_cudf/tests/test_core.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/dask_cudf/tests/test_core.py b/dask_cudf/tests/test_core.py index 301e1a7..a28e714 100644 --- a/dask_cudf/tests/test_core.py +++ b/dask_cudf/tests/test_core.py @@ -43,18 +43,6 @@ def test_from_cudf_with_generic_idx(): assert isinstance(ddf.index.compute(), cudf.dataframe.index.GenericIndex) dd.assert_eq(ddf.loc[1:2, ["a"]], cdf.loc[1:2, ["a"]]) -def test_timeseries_index(): - - gdf = cudf.DataFrame() - gdf['date'] = pd.date_range('11/20/2018', periods=72, freq='D') - gdf['value'] = np.random.sample(len(gdf)) - - ddf = dgd.from_cudf(gdf, npartitions=2) - - ddf_ts_idx = gdf.set_index('date') - gdf_ts_idx = gdf.set_index('date') - dd.assert_eq(ddf_ts_idx, gdf_ts_idx) - def _fragmented_gdf(df, nsplit): n = len(df) @@ -127,6 +115,23 @@ def test_set_index(nelem): dd.assert_eq(expect, got, check_index=False, check_divisions=False) +def test_timeseries_index(): + + df = pd.DataFrame() + df['date'] = pd.date_range('11/20/2018', periods=72, freq='D') + df['value'] = np.random.sample(len(df)) + + gdf = cudf.DataFrame.from_pandas(df) + ddf = dgd.from_cudf(gdf, npartitions=2) + + ddf_ts_idx = ddf.set_index('date') + + got = ddf_ts_idx.compute().to_pandas() + expect = df.set_index('date') + + dd.assert_eq(got, expect, check_index=False, check_divisions=False) + + def assert_frame_equal_by_index_group(expect, got): assert sorted(expect.columns) == sorted(got.columns) assert sorted(set(got.index)) == sorted(set(expect.index)) From 0eefafcb71e716f957075233baceb259a96686cf Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Tue, 14 May 2019 17:00:17 -0400 Subject: [PATCH 3/4] Use dask dataframe api and pass dask dataframe into assert_eq --- dask_cudf/tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dask_cudf/tests/test_core.py b/dask_cudf/tests/test_core.py index a28e714..aa3201d 100644 --- a/dask_cudf/tests/test_core.py +++ b/dask_cudf/tests/test_core.py @@ -122,11 +122,11 @@ def test_timeseries_index(): df['value'] = np.random.sample(len(df)) gdf = cudf.DataFrame.from_pandas(df) - ddf = dgd.from_cudf(gdf, npartitions=2) + ddf = dgd.from_pandas(gdf, npartitions=2) ddf_ts_idx = ddf.set_index('date') - got = ddf_ts_idx.compute().to_pandas() + got = ddf_ts_idx expect = df.set_index('date') dd.assert_eq(got, expect, check_index=False, check_divisions=False) From 69c3e42de6406ff1fdeff2e36cfd8bddbd2f5554 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Wed, 15 May 2019 12:31:55 -0400 Subject: [PATCH 4/4] Use dask.dataframe, not dask-cudf.dataframe --- dask_cudf/tests/test_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dask_cudf/tests/test_core.py b/dask_cudf/tests/test_core.py index aa3201d..2038222 100644 --- a/dask_cudf/tests/test_core.py +++ b/dask_cudf/tests/test_core.py @@ -122,7 +122,7 @@ def test_timeseries_index(): df['value'] = np.random.sample(len(df)) gdf = cudf.DataFrame.from_pandas(df) - ddf = dgd.from_pandas(gdf, npartitions=2) + ddf = dd.from_pandas(gdf, npartitions=2) ddf_ts_idx = ddf.set_index('date')