From 5d8d4606a523095e49ed4e8b04d205620f530fd6 Mon Sep 17 00:00:00 2001 From: Adam Laiacano Date: Wed, 21 Jun 2023 10:46:19 -0400 Subject: [PATCH] catch renaming error --- nvtabular/ops/categorify.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/nvtabular/ops/categorify.py b/nvtabular/ops/categorify.py index b3d0933137..ba63b72778 100644 --- a/nvtabular/ops/categorify.py +++ b/nvtabular/ops/categorify.py @@ -1644,7 +1644,10 @@ def _encode( cats_only=True, reader=read_pq_func, ) - if len(value) and value[tmp_label_column].iloc[0] < OOV_OFFSET + num_oov_buckets: + if ( + len(value) + and value[tmp_label_column].iloc[0] < OOV_OFFSET + num_oov_buckets + ): # See: https://github.com/rapidsai/cudf/issues/12837 value[tmp_label_column] += OOV_OFFSET + num_oov_buckets else: @@ -1770,16 +1773,16 @@ def _encode( np.full( len(codes), indistinct, - like=merged_df[tmp_column_name].values, + like=merged_df[tmp_label_column].values, ), ) - labels.iloc[merged_df["order"]] = merged_df[tmp_column_name] + labels.iloc[merged_df["order"]] = merged_df[tmp_label_column] else: - labels = merged_df.sort_values("order")[tmp_column_name].reset_index(drop=True) + labels = merged_df.sort_values("order")[tmp_label_column].reset_index(drop=True) else: labels = codes.merge( value, left_on=selection_l.names, right_on=selection_r.names, how="left" - ).sort_values("order")[tmp_column_name] + ).sort_values("order")[tmp_label_column] labels.fillna(indistinct, inplace=True) labels = labels.values else: