diff --git a/nvtabular/ops/categorify.py b/nvtabular/ops/categorify.py index cf5a486b54..4ebc878621 100644 --- a/nvtabular/ops/categorify.py +++ b/nvtabular/ops/categorify.py @@ -28,6 +28,7 @@ import pandas as pd import pyarrow as pa import pyarrow.dataset as pa_ds +from dask import config from dask.base import tokenize from dask.blockwise import BlockIndex from dask.core import flatten @@ -1251,7 +1252,8 @@ def _drop_first_row(part, index): if has_size: # Avoid using dask_cudf to calculate divisions # (since it may produce too-few partitions) - df = df.sort_values(name_size, ascending=False) + with config.set({"dataframe.shuffle.method": "tasks"}): + df = df.sort_values(name_size, ascending=False) unique_path = _save_encodings( df,