diff --git a/src/datasets/packaged_modules/text/text.py b/src/datasets/packaged_modules/text/text.py index 3de987bd0c3..0f88475203c 100644 --- a/src/datasets/packaged_modules/text/text.py +++ b/src/datasets/packaged_modules/text/text.py @@ -1,6 +1,6 @@ import itertools import warnings -from dataclasses import dataclass +from dataclasses import InitVar, dataclass from io import StringIO from typing import Optional @@ -20,20 +20,20 @@ class TextConfig(datasets.BuilderConfig): features: Optional[datasets.Features] = None encoding: str = "utf-8" - errors = "deprecated" + errors: InitVar[Optional[str]] = "deprecated" encoding_errors: Optional[str] = None chunksize: int = 10 << 20 # 10MB keep_linebreaks: bool = False sample_by: str = "line" - def __post_init__(self): - if self.errors != "deprecated": + def __post_init__(self, errors): + if errors != "deprecated": warnings.warn( - "'errors' was deprecated in favor of 'encoding_erros' in version 2.14.0 and will be removed in 3.0.0.\n" - f"You can remove this warning by passing 'encoding_errors={self.errors}' instead.", + "'errors' was deprecated in favor of 'encoding_errors' in version 2.14.0 and will be removed in 3.0.0.\n" + f"You can remove this warning by passing 'encoding_errors={errors}' instead.", FutureWarning, ) - self.encoding_errors = self.errors + self.encoding_errors = errors class Text(datasets.ArrowBasedBuilder):