diff --git a/CHANGES.rst b/CHANGES.rst index fdcf05d6..5aa34524 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,9 +2,10 @@ Changelog ========= -Version 5.3.0 (2021-11-26) +Version 5.3.0 (2021-12-10) ========================== * Add Deprecation warnings and migration helpers in order to facilitate the Kartothek version 6.0.0 migration. +* Removed warning for distinct categoricals (#501) Version 5.2.0 (2021-11-22) diff --git a/kartothek/io/dask/_utils.py b/kartothek/io/dask/_utils.py index 9a8d2f6b..f94b2788 100644 --- a/kartothek/io/dask/_utils.py +++ b/kartothek/io/dask/_utils.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- -import warnings from functools import partial import pandas as pd @@ -12,8 +11,6 @@ except ImportError: pass -CATEGORICAL_EFFICIENCY_WARN_LIMIT = 100000 - def _identity(): def _id(x): @@ -49,12 +46,6 @@ def _cast_categorical_to_index_cat(df, categories): def _construct_categorical(column, dataset_metadata_factory): dataset_metadata = dataset_metadata_factory.load_index(column) values = dataset_metadata.indices[column].index_dct.keys() - if len(values) > CATEGORICAL_EFFICIENCY_WARN_LIMIT: - warnings.warn( - "Column {} has {} distinct values, reading as categorical may increase memory consumption.", - column, - len(values), - ) return pd.api.types.CategoricalDtype(values, ordered=False)