From 5fca7a1ce2903b1365d63745ddf94962e12305c6 Mon Sep 17 00:00:00 2001 From: Mavs Date: Wed, 29 Nov 2023 17:25:03 +0100 Subject: [PATCH] fix examples 2 --- .github/workflows/config.yml | 10 ++- atom/atom.py | 6 +- atom/basemodel.py | 99 ++++++++++++++------------- atom/plots/predictionplot.py | 4 +- atom/utils/utils.py | 12 +--- docs_sources/dependencies.md | 3 +- docs_sources/user_guide/predicting.md | 4 +- examples/accelerating_cuml.ipynb | 6 +- pyproject.toml | 3 +- tests/test_basemodel.py | 4 +- tests/test_baserunner.py | 2 +- tests/test_branch.py | 14 ---- 12 files changed, 78 insertions(+), 89 deletions(-) diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index 73c8817cb..166085b26 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -124,4 +124,12 @@ jobs: pip install -U pytest pytest-xdist nbmake scikeras tensorflow pip install -e .[full] - name: Run example notebooks - run: pytest --nbmake -n=auto --nbmake-timeout=600 --ignore=./examples/webapp/ --ignore=./examples/accelerating_cuml.ipynb ./examples/ + run: | + pytest \ + -n=auto \ + --nbmake \ + --nbmake-timeout=600 \ + --ignore=./examples/webapp/ \ + --ignore=./examples/accelerating_cuml.ipynb \ + --ignore=./examples/ray_backend.ipynb \ + ./examples/ diff --git a/atom/atom.py b/atom/atom.py index a269dd510..f269ccef0 100644 --- a/atom/atom.py +++ b/atom/atom.py @@ -548,12 +548,12 @@ def eda( rows_c = [(self.branch._get_rows(v), k) for k, v in rows.items()] if len(rows_c) == 1: - self.report = self.memory.cache(sv.analyze)( + self.report = sv.analyze( source=rows_c[0], target_feat=self.branch._get_target(target, only_columns=True), ) elif len(rows_c) == 2: - self.report = self.memory.cache(sv.compare)( + self.report = sv.compare( source=rows_c[0], compare=rows_c[1], target_feat=self.branch._get_target(target, only_columns=True), @@ -1222,7 +1222,7 @@ def _add_transformer( name = transformer_c.__class__.__name__ while name in self.pipeline: counter += 1 - name = f"{transformer_c.__class__.__name__}{counter}" + name = f"{transformer_c.__class__.__name__.lower()}-{counter}" self.branch.pipeline.steps.append((name, transformer_c)) diff --git a/atom/basemodel.py b/atom/basemodel.py index 7777c3532..8a9fbe29d 100644 --- a/atom/basemodel.py +++ b/atom/basemodel.py @@ -64,7 +64,7 @@ from atom.utils.constants import DF_ATTRS from atom.utils.types import ( HT, Backend, Bool, DataFrame, Engine, FHSelector, Float, FloatZeroToOneExc, - Int, IntLargerEqualZero, MetricConstructor, NJobs, Pandas, + Int, IntLargerEqualZero, MetricConstructor, MetricFunction, NJobs, Pandas, PredictionMethods, PredictionMethodsTS, Predictor, RowSelector, Scalar, Scorer, Sequence, Stages, TargetSelector, Verbose, Warnings, XSelector, YSelector, dataframe_t, float_t, int_t, @@ -2431,7 +2431,7 @@ def _prediction( self, X: RowSelector | XSelector, y: YSelector | None = ..., - metric: MetricConstructor = ..., + metric: str | MetricFunction | Scorer | None = ..., sample_weight: Sequence[Scalar] | None = ..., verbose: Int | None = ..., method: Literal["score"] = ..., @@ -2442,7 +2442,7 @@ def _prediction( self, X: RowSelector | XSelector, y: YSelector | None = ..., - metric: MetricConstructor = ..., + metric: str | MetricFunction | Scorer | None = ..., sample_weight: Sequence[Scalar] | None = ..., verbose: Int | None = ..., method: PredictionMethods = ..., @@ -2452,7 +2452,7 @@ def _prediction( self, X: RowSelector | XSelector, y: YSelector | None = None, - metric: MetricConstructor = None, + metric: str | MetricFunction | Scorer | None = None, sample_weight: Sequence[Scalar] | None = None, verbose: Int | None = None, method: PredictionMethods = "predict", @@ -2465,7 +2465,7 @@ def _prediction( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2619,7 +2619,7 @@ def decision_function( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2656,7 +2656,7 @@ def predict( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2692,7 +2692,7 @@ def predict_log_proba( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2728,7 +2728,7 @@ def predict_proba( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2754,7 +2754,7 @@ def score( X: RowSelector | XSelector, y: YSelector | None = None, *, - metric: MetricConstructor = None, + metric: str | MetricFunction | Scorer | None = None, sample_weight: Sequence[Scalar] | None = None, verbose: Int | None = None, ) -> Float: @@ -2773,7 +2773,7 @@ def score( Parameters ---------- - X: hashable, range, slice, sequence or dataframe-like + X: hashable, segment, sequence or dataframe-like [Selection of rows][row-and-column-selection] or feature set with shape=(n_samples, n_features) to make predictions on. @@ -2825,9 +2825,9 @@ class ForecastModel(BaseModel): @overload def _prediction( self, - y: YSelector | None = None, - X: RowSelector | XSelector | None = None, - metric: MetricConstructor = None, + y: RowSelector | YSelector | None = None, + X: XSelector | None = None, + metric: str | MetricFunction | Scorer | None = None, verbose: Int | None = None, method: Literal["score"] = ..., **kwargs, @@ -2836,9 +2836,9 @@ def _prediction( @overload def _prediction( self, - y: YSelector | None = None, - X: RowSelector | XSelector | None = None, - metric: MetricConstructor = None, + y: RowSelector | YSelector | None = None, + X: XSelector | None = None, + metric: str | MetricFunction | Scorer | None = None, verbose: Int | None = None, method: PredictionMethodsTS = ..., **kwargs, @@ -2846,9 +2846,9 @@ def _prediction( def _prediction( self, - y: YSelector | None = None, - X: RowSelector | XSelector | None = None, - metric: MetricConstructor = None, + y: RowSelector | YSelector | None = None, + X: XSelector | None = None, + metric: str | MetricFunction | Scorer | None = None, verbose: Int | None = None, method: PredictionMethodsTS = "predict", **kwargs, @@ -2861,11 +2861,11 @@ def _prediction( Parameters ---------- - y: sequence or dataframe-like + y: int, str, dict, sequence, dataframe or None, default=None Ground truth observations. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. metric: str, func, scorer or None, default=None Metric to calculate. Choose from any of sklearn's scorers, @@ -2890,10 +2890,13 @@ def _prediction( called. """ - Xt, yt = self.transform(X, y, verbose=verbose) + Xt, yt = X, y # self.transform(X, y, verbose=verbose) TODO: Fix pipeline ts if method != "score": - return self.memory.cache(getattr(self.estimator, method))(**kwargs) + if "y" in sign(func := getattr(self.estimator, method)): + return self.memory.cache(func)(y=yt, X=Xt, **kwargs) + else: + return self.memory.cache(func)(X=Xt, **kwargs) else: if metric is None: scorer = self._metric[0] @@ -2925,8 +2928,8 @@ def predict( The forecasting horizon encoding the time stamps to forecast at. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. verbose: int or None, default=None Verbosity level for the transformers in the pipeline. If None, @@ -2965,8 +2968,8 @@ def predict_interval( The forecasting horizon encoding the time stamps to forecast at. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. coverage: float or sequence, default=0.9 Nominal coverage(s) of predictive interval(s). @@ -3014,8 +3017,8 @@ def predict_proba( The forecasting horizon encoding the time stamps to forecast at. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. marginal: bool, default=True Whether returned distribution is marginal by time index. @@ -3062,8 +3065,8 @@ def predict_quantiles( The forecasting horizon encoding the time stamps to forecast at. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. alpha: float or list of float, default=[0.05, 0.95] A probability or list of, at which quantile forecasts are @@ -3093,7 +3096,7 @@ def predict_quantiles( @composed(crash, method_to_log, beartype) def predict_residuals( self, - y: Sequence[Any] | DataFrame, + y: RowSelector | YSelector, X: XSelector | None = None, *, verbose: Int | None = None, @@ -3108,11 +3111,11 @@ def predict_residuals( Parameters ---------- - y: sequence or dataframe-like - Ground truth observations to compute residuals to. + y: int, str, dict, sequence or dataframe + Ground truth observations. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `y`. verbose: int or None, default=None Verbosity level for the transformers in the pipeline. If None, @@ -3131,7 +3134,7 @@ def predict_residuals( @composed(crash, method_to_log, beartype) def predict_var( self, - fh: FHSelector, + fh: RowSelector | FHSelector, X: XSelector | None = None, *, cov: Bool = False, @@ -3151,11 +3154,11 @@ def predict_var( The forecasting horizon encoding the time stamps to forecast at. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. cov: bool, default=False - Whether to computes covariance matrix forecast or marginal + Whether to compute covariance matrix forecast or marginal variance forecasts. verbose: int or None, default=None @@ -3181,11 +3184,11 @@ def predict_var( @composed(crash, method_to_log, beartype) def score( self, - y: Sequence[Any] | DataFrame, - X: DataFrame | None = None, + y: RowSelector | YSelector, + X: XSelector | None = None, fh: FHSelector | None = None, *, - metric: MetricConstructor = None, + metric: str | MetricFunction | Scorer | None = None, verbose: Int | None = None, ) -> Float: """Get a metric score on new data. @@ -3203,11 +3206,11 @@ def score( Parameters ---------- - y: sequence or dataframe-like + y: int, str, dict, sequence or dataframe Ground truth observations. - X: dataframe-like or None, default=None - Exogenous time series corresponding to fh. + X: hashable, segment, sequence, dataframe-like or None, default=None + Exogenous time series corresponding to `fh`. fh: int, sequence or [ForecastingHorizon][] or None, default=None The forecasting horizon encoding the time stamps to diff --git a/atom/plots/predictionplot.py b/atom/plots/predictionplot.py index 44b4ee9c4..2b36bf3c0 100644 --- a/atom/plots/predictionplot.py +++ b/atom/plots/predictionplot.py @@ -273,7 +273,7 @@ def plot_confusion_matrix( models: int, str, Model, segment, sequence or None, default=None Models to plot. If None, all models are selected. - rows: hashable, range, slice or sequence, default="test" + rows: hashable, segment or sequence, default="test" [Selection of rows][row-and-column-selection] on which to calculate the confusion matrix. @@ -2678,7 +2678,7 @@ def plot_probabilities( models: int, str, Model, segment, sequence or None, default=None Models to plot. If None, all models are selected. - rows: hashable, range, slice or sequence, default="test" + rows: hashable, segment or sequence, default="test" [Selection of rows][row-and-column-selection] on which to calculate the metric. diff --git a/atom/utils/utils.py b/atom/utils/utils.py index d70d69218..952a974fd 100644 --- a/atom/utils/utils.py +++ b/atom/utils/utils.py @@ -56,7 +56,7 @@ from atom.utils.constants import __version__ from atom.utils.types import ( Bool, DataFrame, Estimator, Float, Index, IndexSelector, Int, - IntLargerEqualZero, MetricConstructor, Model, Pandas, Predictor, Scalar, + IntLargerEqualZero, MetricFunction, Model, Pandas, Predictor, Scalar, Scorer, Segment, Sequence, Series, Transformer, TReturn, TReturns, Verbose, XSelector, YSelector, YTypes, dataframe_t, int_t, pandas_t, segment_t, sequence_t, series_t, @@ -2054,7 +2054,7 @@ def check_attr(attr: str) -> bool: return True -def get_custom_scorer(metric: MetricConstructor) -> Scorer: +def get_custom_scorer(metric: str | MetricFunction | Scorer) -> Scorer: """Get a scorer from a str, func or scorer. Scorers used by ATOM have a name attribute. @@ -2780,14 +2780,6 @@ def wrap_methods(f: Callable) -> Callable: - Check if the instance is fitted before transforming. - Convert output to pyarrow dtypes if specified in config. - Parameters - ---------- - f: callable - Function to decorate. - - check_fitted: bool - Whether to check if the instance is fitted. - """ @wraps(f) diff --git a/docs_sources/dependencies.md b/docs_sources/dependencies.md index a93e7cd29..ab48b3e1b 100644 --- a/docs_sources/dependencies.md +++ b/docs_sources/dependencies.md @@ -31,6 +31,7 @@ packages are necessary for its correct functioning. * **[gplearn](https://gplearn.readthedocs.io/en/stable/index.html)** (>=0.4.2) * **[imbalanced-learn](https://imbalanced-learn.readthedocs.io/en/stable/api.html)** (>=0.11.0) * **[ipython](https://ipython.readthedocs.io/en/stable/)** (>=8.11.0) +* **[ipywidgets](https://pypi.org/project/ipywidgets/)** (>=8.1.1) * **[featuretools](https://www.featuretools.com/)** (>=1.28.0) * **[joblib](https://joblib.readthedocs.io/en/latest/)** (>=1.3.1) * **[matplotlib](https://matplotlib.org/)** (>=3.7.2) @@ -63,7 +64,7 @@ additional libraries. You can install all the optional dependencies using * **[lightgbm](https://lightgbm.readthedocs.io/en/latest/)** (>=4.1.0) * **[pmdarima](http://alkaline-ml.com/pmdarima/)** (>=2.0.3) * **[schemdraw](https://schemdraw.readthedocs.io/en/latest/index.html)** (>=0.16) -* **[sweetviz](https://github.com/fbdesignpro/sweetviz)** (>=2.2.1) +* **[sweetviz](https://github.com/fbdesignpro/sweetviz)** (>=2.3.1) * **[wordcloud](http://amueller.github.io/word_cloud/)** (>=1.9.2) * **[xgboost](https://xgboost.readthedocs.io/en/latest/)** (>=2.0.0) diff --git a/docs_sources/user_guide/predicting.md b/docs_sources/user_guide/predicting.md index 50c1da72a..6f1f5c0bc 100644 --- a/docs_sources/user_guide/predicting.md +++ b/docs_sources/user_guide/predicting.md @@ -41,8 +41,8 @@ For forecast tasks: !!! warning The `score` method return atom's metric score, not the metric returned - by sklearn's score method for estimators. Use the method's `metric` - parameter to calculate a different metric. + by sklearn/sktime's score method for estimators. Use the method's + `metric` parameter to calculate a different metric. !!! note * The output of ATOM's methods are pandas objects, not numpy arrays. diff --git a/examples/accelerating_cuml.ipynb b/examples/accelerating_cuml.ipynb index d70a7e5a2..6ef1b2a1e 100644 --- a/examples/accelerating_cuml.ipynb +++ b/examples/accelerating_cuml.ipynb @@ -729,9 +729,9 @@ ], "metadata": { "kernelspec": { - "display_name": "rapids-22.08:Python", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "conda-env-rapids-22.08-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -743,7 +743,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.11.2" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index f15a95b87..4aaced72a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ "gplearn>=0.4.2", "imbalanced-learn>=0.11.0", "ipython>=8.11.0", + "ipywidgets>=8.1.1", "featuretools>=1.28.0", "joblib>=1.3.1", "matplotlib>=3.7.2", @@ -54,7 +55,7 @@ full = [ "lightgbm>=4.1.0", "pmdarima>=2.0.3", "schemdraw>=0.16", - "sweetviz>=2.2.1", + "sweetviz>=2.3.1", "wordcloud>=1.9.2", "xgboost>=2.0.0", ] diff --git a/tests/test_basemodel.py b/tests/test_basemodel.py index 2722fa8d6..b00d8d52b 100644 --- a/tests/test_basemodel.py +++ b/tests/test_basemodel.py @@ -191,7 +191,7 @@ def test_multi_objective_optimization(): def test_hyperparameter_tuning_with_plot(): """Assert that you can plot the hyperparameter tuning as it runs.""" atom = ATOMClassifier(X_bin, y_bin, random_state=1) - atom.run(["LDA", "lSVM", "SVM"], n_trials=10, errors="raise", ht_params={"plot": True}) + atom.run(models=["LDA", "lSVM", "SVM"], n_trials=10, ht_params={"plot": True}) def test_xgb_optimizes_score(): @@ -721,12 +721,10 @@ def test_clear(): atom.plot_shap_beeswarm(display=False) atom.evaluate(rows="holdout") assert atom.sgd._evals - assert atom.sgd._memoizer.memory assert not atom.sgd._shap._shap_values.empty assert "holdout" in atom.sgd.branch.__dict__ atom.clear() assert not atom.sgd._evals - assert not atom.sgd._memoizer.memory assert atom.sgd._shap._shap_values.empty assert "holdout" not in atom.sgd.branch.__dict__ diff --git a/tests/test_baserunner.py b/tests/test_baserunner.py index 060ad4150..d34a9c2a2 100644 --- a/tests/test_baserunner.py +++ b/tests/test_baserunner.py @@ -549,7 +549,7 @@ def test_input_is_train_test_with_parameter_y(): def test_input_is_train_test_for_forecast(): """Assert that input train, test works for forecast tasks.""" - trainer = DirectForecaster("ES", random_state=1) + trainer = DirectForecaster("ES", errors="raise", random_state=1) trainer.run(fc_train, fc_test) assert_series_equal(trainer.y, pd.concat([fc_train, fc_test])) diff --git a/tests/test_branch.py b/tests/test_branch.py index 486ad7637..78b45a40d 100644 --- a/tests/test_branch.py +++ b/tests/test_branch.py @@ -200,20 +200,6 @@ def test_all_property(): assert len(atom.branch._all) == len(X_bin) -def test_allX_property(): - """Assert that the _allX property returns the features for dataset + holdout.""" - atom = ATOMRegressor(X_bin, y_bin, holdout_size=0.1, random_state=1) - assert len(atom.branch.dataset) != len(X_bin) - assert atom.branch._allX.shape == X_bin.shape - - -def test_ally_property(): - """Assert that the _ally property returns the target for dataset + holdout.""" - atom = ATOMRegressor(X_bin, y_bin, holdout_size=0.1, random_state=1) - assert len(atom.branch.dataset) != len(X_bin) - assert atom.branch._ally.shape == (len(X_bin),) - - def test_dataset_setter(): """Assert that the dataset setter changes the whole dataset.""" new_dataset = merge(X_bin, y_bin)