ruff 2

tvdboom · Dec 5, 2023 · d7a225e · d7a225e
1 parent 1813e96
commit d7a225e
Show file tree

Hide file tree

Showing 58 changed files with 2,167 additions and 1,761 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -77,7 +77,7 @@ maybe an issue for your problem already exists, and the discussion
 might inform you of workarounds readily available.
 
 We want to fix all the issues as soon as possible, but before fixing a
-bug we need to reproduce and confirm it. In order to reproduce bugs we
+bug, we need to reproduce and confirm it. In order to reproduce bugs, we
 will systematically ask you to provide a minimal reproduction scenario
 using the custom issue template.
 
@@ -90,15 +90,14 @@ and accept your changes.
 
 * Update the documentation so all of your changes are reflected there.
 * Adhere to [PEP 8](https://peps.python.org/pep-0008/) standards.
-* Use a maximum of 91 characters per line. Try to keep docstrings below
+* Use a maximum of 99 characters per line. Try to keep docstrings below
   74 characters.
 * Update the project unit tests to test your code changes as thoroughly
   as possible.
 * Make sure that your code is properly commented with docstrings and
   comments explaining your rationale behind non-obvious coding practices.
 * Run [isort](https://pycqa.github.io/isort/): `isort atom tests`.
-* Run [flake8](https://github.com/pycqa/flake8): `flake8 --show-source --statistics atom tests`.
-* Run [pydocstyle](https://www.pydocstyle.org/en/stable/): `pydocstyle atom tests`.
+* Run [ruff](https://docs.astral.sh/ruff/): ` ruff check --fix atom tests`.
 * Run [mypy](https://www.mypy-lang.org/): `mypy atom tests`.
 
 If your contribution requires a new library dependency:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,27 +1,25 @@
-ci:
-  autoupdate_schedule: monthly
-
 repos:
   - repo: https://github.com/pycqa/isort
-    rev: 5.11.4
+    rev: 5.12.0
     hooks:
       - id: isort
         files: ^atom/.*\.py$|tests/.*\.py$
 
-  - repo: https://github.com/pycqa/flake8
-    rev: 6.0.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.7
     hooks:
-      - id: flake8
-        additional_dependencies: [flake8-pyproject]
-        files: ^atom/.*\.py$|tests/.*\.py$
-        args: ["--show-source", "--statistics"]
+    - id: ruff
+      types_or: [ python, pyi, jupyter ]
+      args: ["--fix"]
+      files: ^atom/.*\.py$|tests/.*\.py$
 
-  - repo: https://github.com/pycqa/pydocstyle
-    rev: 6.3.0
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
     hooks:
-      - id: pydocstyle
-        additional_dependencies: [tomli]
-        files: ^atom/.*\.py$|tests/.*\.py$
+    - id: check-yaml
+    - id: end-of-file-fixer
+    - id: mixed-line-ending
+    - id: check-merge-conflict
 
   - repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.7.1

diff --git a/atom/__init__.py b/atom/__init__.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """Automated Tool for Optimized Modeling (ATOM).
 
 Author: Mavs

diff --git a/atom/api.py b/atom/api.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """Automated Tool for Optimized Modeling (ATOM).
 
 Author: Mavs
@@ -244,21 +242,22 @@ class ATOMClassifier(ATOM):
         `#!python device="gpu"` to use the GPU. Read more in the
         [user guide][gpu-acceleration].
 
-    engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+    engine: dict or None, default=None
         Execution engine to use for [data][data-acceleration] and
         [estimators][estimator-acceleration]. The value should be a
         dictionary with keys `data` and/or `estimator`, with their
-        corresponding choice as values. Choose from:
+        corresponding choice as values. If None, the default values
+        are used.Choose from:
 
         - "data":
 
-            - "numpy"
+            - "numpy" (default)
             - "pyarrow"
             - "modin"
 
         - "estimator":
 
-            - "sklearn"
+            - "sklearn" (default)
             - "sklearnex"
             - "cuml"
 
@@ -356,7 +355,7 @@ def __init__(
         holdout_size: Scalar | None = None,
         n_jobs: NJobs = 1,
         device: str = "cpu",
-        engine: Engine = {"data": "numpy", "estimator": "sklearn"},
+        engine: Engine | None = None,
         backend: Backend = "loky",
         memory: Bool | str | Path | Memory = False,
         verbose: Verbose = 0,
@@ -480,21 +479,22 @@ class ATOMForecaster(ATOM):
         `#!python device="gpu"` to use the GPU. Read more in the
         [user guide][gpu-acceleration].
 
-    engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+    engine: dict or None, default=None
         Execution engine to use for [data][data-acceleration] and
         [estimators][estimator-acceleration]. The value should be a
         dictionary with keys `data` and/or `estimator`, with their
-        corresponding choice as values. Choose from:
+        corresponding choice as values. If None, the default values
+        are used.Choose from:
 
         - "data":
 
-            - "numpy"
+            - "numpy" (default)
             - "pyarrow"
             - "modin"
 
         - "estimator":
 
-            - "sklearn"
+            - "sklearn" (default)
             - "sklearnex"
             - "cuml"
 
@@ -585,7 +585,7 @@ def __init__(
         holdout_size: Scalar | None = None,
         n_jobs: NJobs = 1,
         device: str = "cpu",
-        engine: Engine = {"data": "numpy", "estimator": "sklearn"},
+        engine: Engine | None = None,
         backend: Backend = "loky",
         memory: Bool | str | Path | Memory = False,
         verbose: Verbose = 0,
@@ -719,21 +719,22 @@ class ATOMRegressor(ATOM):
         `#!python device="gpu"` to use the GPU. Read more in the
         [user guide][gpu-acceleration].
 
-    engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+    engine: dict or None, default=None
         Execution engine to use for [data][data-acceleration] and
         [estimators][estimator-acceleration]. The value should be a
         dictionary with keys `data` and/or `estimator`, with their
-        corresponding choice as values. Choose from:
+        corresponding choice as values. If None, the default values
+        are used.Choose from:
 
         - "data":
 
-            - "numpy"
+            - "numpy" (default)
             - "pyarrow"
             - "modin"
 
         - "estimator":
 
-            - "sklearn"
+            - "sklearn" (default)
             - "sklearnex"
             - "cuml"
 
@@ -830,7 +831,7 @@ def __init__(
         holdout_size: Scalar | None = None,
         n_jobs: NJobs = 1,
         device: str = "cpu",
-        engine: Engine = {"data": "numpy", "estimator": "sklearn"},
+        engine: Engine | None = None,
         backend: Backend = "loky",
         memory: Bool | str | Path | Memory = False,
         verbose: Verbose = 0,

diff --git a/atom/atom.py b/atom/atom.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """Automated Tool for Optimized Modeling (ATOM).
 
 Author: Mavs
@@ -88,7 +86,8 @@ class ATOM(BaseRunner, ATOMPlot, metaclass=ABCMeta):
 
     @property
     @abstractmethod
-    def _goal(self) -> Goal: ...
+    def _goal(self) -> Goal:
+        ...
 
     def __init__(
         self,
@@ -103,7 +102,7 @@ def __init__(
         holdout_size: Scalar | None = None,
         n_jobs: NJobs = 1,
         device: str = "cpu",
-        engine: Engine = {"data": "numpy", "estimator": "sklearn"},
+        engine: Engine | None = None,
         backend: Backend = "loky",
         memory: Bool | str | Path | Memory = False,
         verbose: Verbose = 0,
@@ -152,7 +151,9 @@ def __init__(
         elif self.backend != "loky":
             self._log(
                 "Leaving n_jobs=1 ignores all parallelization. Set n_jobs>1 to make use "
-                f"of the {self.backend} parallelization backend.", 1, severity="warning"
+                f"of the {self.backend} parallelization backend.",
+                1,
+                severity="warning",
             )
         if "cpu" not in self.device.lower():
             self._log(f"Device: {self.device}", 1)
@@ -265,8 +266,8 @@ def branch(self):
         self._branches.branches.remove(current)
         self._branches.current = self._branches[0].name
         self._log(
-            f"Branch {current} successfully deleted. "
-            f"Switched to branch {self.branch.name}.", 1
+            f"Branch {current} successfully deleted. Switched to branch {self.branch.name}.",
+            1,
         )
 
     @property
@@ -357,7 +358,7 @@ def outliers(self) -> pd.Series:
         """
         if not is_sparse(self.X):
             data = self.branch.train.select_dtypes(include=["number"])
-            z_scores = (np.abs(stats.zscore(data.to_numpy(float, na_value=np.nan))) > 3)
+            z_scores = np.abs(stats.zscore(data.to_numpy(float, na_value=np.nan))) > 3
             z_scores = pd.Series(z_scores.sum(axis=0), index=data.columns)
             return z_scores[z_scores > 0]
 
@@ -372,7 +373,7 @@ def n_outliers(self) -> Int:
         """
         if not is_sparse(self.X):
             data = self.branch.train.select_dtypes(include=["number"])
-            z_scores = (np.abs(stats.zscore(data.to_numpy(float, na_value=np.nan))) > 3)
+            z_scores = np.abs(stats.zscore(data.to_numpy(float, na_value=np.nan))) > 3
             return z_scores.any(axis=1).sum()
 
         raise AttributeError("This property is unavailable for sparse datasets.")
@@ -495,8 +496,8 @@ def distribution(
                 stat = stats.kstest(X, dist, args=param)
 
                 # Add as column to the dataframe
-                df.at[(dist, "score"), col] = round(stat[0], 4)
-                df.at[(dist, "p_value"), col] = round(stat[1], 4)
+                df.loc[(dist, "score"), col] = round(stat[0], 4)
+                df.loc[(dist, "p_value"), col] = round(stat[1], 4)
 
         return df
 
@@ -687,7 +688,8 @@ def load(cls, filename: str | Path, data: tuple[Any, ...] | None = None) -> ATOM
 
         # Reassign the transformer attributes (warnings random_state, etc...)
         BaseTransformer.__init__(
-            atom, **{x: getattr(atom, x) for x in BaseTransformer.attrs},
+            atom,
+            **{x: getattr(atom, x) for x in BaseTransformer.attrs},
         )
 
         if data is not None:
@@ -727,8 +729,8 @@ def load(cls, filename: str | Path, data: tuple[Any, ...] | None = None) -> ATOM
                 if atom._config.index is False:
                     branch._container = DataContainer(
                         data=(dataset := branch._container.data.reset_index(drop=True)),
-                        train_idx=dataset.index[:len(branch._container.train_idx)],
-                        test_idx=dataset.index[-len(branch._container.test_idx):],
+                        train_idx=dataset.index[: len(branch._container.train_idx)],
+                        test_idx=dataset.index[-len(branch._container.test_idx) :],
                         n_cols=branch._container.n_cols,
                     )
 
@@ -741,7 +743,7 @@ def load(cls, filename: str | Path, data: tuple[Any, ...] | None = None) -> ATOM
         return atom
 
     @composed(crash, method_to_log)
-    def reset(self, hard: Bool = False):
+    def reset(self, *, hard: Bool = False):
         """Reset the instance to it's initial state.
 
         Deletes all branches and models. The dataset is also reset
@@ -970,7 +972,8 @@ def stats(self, _vb: Int = -2, /):
                 duplicates = None
                 self._log(
                     "Unable to calculate the number of duplicate "
-                    "rows because a column is unhashable.", 3
+                    "rows because a column is unhashable.",
+                    3,
                 )
 
             if not self.X.empty:
@@ -1082,6 +1085,7 @@ def _prepare_kwargs(
     def _add_transformer(
         self,
         transformer: T_Transformer,
+        *,
         columns: ColumnSelector | None = None,
         train_only: Bool = False,
         **fit_params,
@@ -1146,7 +1150,9 @@ def _add_transformer(
                     "Features and target columns passed to transformer "
                     f"{transformer_c.__class__.__name__}. Either select features or "
                     "the target column, not both at the same time. The transformation "
-                    "of the target column will be ignored.", 1, severity="warning"
+                    "of the target column will be ignored.",
+                    1,
+                    severity="warning",
                 )
             transformer_c._cols = inc
 
@@ -1202,8 +1208,8 @@ def _add_transformer(
         if self._config.index is False:
             self.branch._container = DataContainer(
                 data=(data := self.dataset.reset_index(drop=True)),
-                train_idx=data.index[:len(self.branch._data.train_idx)],
-                test_idx=data.index[-len(self.branch._data.test_idx):],
+                train_idx=data.index[: len(self.branch._data.train_idx)],
+                test_idx=data.index[-len(self.branch._data.test_idx) :],
                 n_cols=self.branch._data.n_cols,
             )
             if self.branch._holdout is not None:
@@ -1307,13 +1313,22 @@ def add(
         """
         if isinstance(transformer, SkPipeline):
             # Recursively add all transformers to the pipeline
-            for name, est in transformer.named_steps.items():
+            for est in transformer.named_steps.values():
                 self._log(f"Adding {est.__class__.__name__} to the pipeline...", 1)
-                self._add_transformer(est, columns, train_only, **fit_params)
+                self._add_transformer(
+                    transformer=est,
+                    columns=columns,
+                    train_only=train_only,
+                    **fit_params,
+                )
         else:
-            self._log(
-                f"Adding {transformer.__class__.__name__} to the pipeline...", 1)
-            self._add_transformer(transformer, columns, train_only, **fit_params)
+            self._log(f"Adding {transformer.__class__.__name__} to the pipeline...", 1)
+            self._add_transformer(
+                transformer=transformer,
+                columns=columns,
+                train_only=train_only,
+                **fit_params,
+            )
 
     @composed(crash, method_to_log)
     def apply(
@@ -1640,6 +1655,7 @@ def prune(
     def scale(
         self,
         strategy: ScalerStrats = "standard",
+        *,
         include_binary: Bool = False,
         **kwargs,
     ):
@@ -2036,7 +2052,8 @@ def _run(self, trainer: BaseRunner):
                 self._delete_models(model.name)
                 self._log(
                     f"Consecutive runs of model {model.name}. "
-                    "The former model has been overwritten.", 1
+                    "The former model has been overwritten.",
+                    1,
                 )
 
         self._models.extend(trainer._models)