diff --git a/atom/basemodel.py b/atom/basemodel.py index d62ec9156..0c05ee702 100644 --- a/atom/basemodel.py +++ b/atom/basemodel.py @@ -3014,7 +3014,15 @@ def _prediction( called. """ - Xt, yt = self.transform(X, y, verbose=verbose) + if y is not None or X is not None: + if isinstance(out := self.transform(X, y, verbose=verbose), tuple): + Xt, yt = out + elif X is not None: + Xt, yt = out, y + else: + Xt, yt = X, out + else: + Xt, yt = X, y if method != "score": fh = kwargs.get("fh") @@ -3022,9 +3030,9 @@ def _prediction( kwargs["fh"] = self.branch._get_rows(fh).index if "y" in sign(func := getattr(self.estimator, method)): - return self.memory.cache(func)(fh=fh, y=yt, X=Xt, **kwargs) + return self.memory.cache(func)(y=yt, X=Xt, **kwargs) else: - return self.memory.cache(func)(fh=fh, X=Xt, **kwargs) + return self.memory.cache(func)(X=Xt, **kwargs) else: if metric is None: scorer = self._metric[0] diff --git a/atom/baserunner.py b/atom/baserunner.py index 13ff9e2fa..0c83088f7 100644 --- a/atom/baserunner.py +++ b/atom/baserunner.py @@ -40,7 +40,8 @@ Bool, DataFrame, FloatZeroToOneExc, HarmonicsSelector, Int, IntLargerOne, MetricConstructor, Model, ModelSelector, ModelsSelector, Pandas, RowSelector, Scalar, Seasonality, Segment, Sequence, Series, - TargetSelector, YSelector, dataframe_t, int_t, segment_t, sequence_t, + TargetSelector, YSelector, bool_t, dataframe_t, int_t, segment_t, + sequence_t, ) from atom.utils.utils import ( ClassMap, DataContainer, Goal, SeasonalPeriod, Task, bk, check_is_fitted, @@ -888,9 +889,18 @@ def _delete_models(self, models: str | Model | Sequence[str | Model]): self._metric = ClassMap() @crash - def available_models(self) -> pd.DataFrame: + def available_models(self, **kwargs) -> pd.DataFrame: """Give an overview of the available predefined models. + Parameters + ---------- + **kwargs + Filter the returned models providing any of the column as + keyword arguments, where the value is the desired filter, + e.g., `accepts_sparse=True`, to get all models that accept + sparse input or `supports_engines="cuml"` to get all models + that support the [cuML][] engine. + Returns ------- pd.DataFrame @@ -902,8 +912,8 @@ def available_models(self) -> pd.DataFrame: - **estimator:** Name of the model's underlying estimator. - **module:** The estimator's module. - **handles_missing:** Whether the model can handle missing - (`NaN`) values without preprocessing. If False, consider using - the [Imputer][] class before training the models. + values without preprocessing. If False, consider using the + [Imputer][] class before training the models. - **needs_scaling:** Whether the model requires feature scaling. If True, [automated feature scaling][] is applied. - **accepts_sparse:** Whether the model accepts [sparse input][sparse-datasets]. @@ -922,7 +932,16 @@ def available_models(self) -> pd.DataFrame: for model in MODELS: m = model(goal=self._goal, branches=self._branches) if self._goal.name in m._estimators: - rows.append(m.get_tags()) + tags = m.get_tags() + + for key, value in kwargs.items(): + k = tags.get(key) + if isinstance(value, bool_t) and value is not bool(k): + break + elif isinstance(value, str) and not re.search(value, k, re.I): + break + else: + rows.append(tags) return pd.DataFrame(rows) diff --git a/atom/models/ts.py b/atom/models/ts.py index 410e5b804..df2df3e0b 100644 --- a/atom/models/ts.py +++ b/atom/models/ts.py @@ -21,7 +21,7 @@ class ARIMA(ForecastModel): - """Autoregressive Integrated Moving Average Model. + """Autoregressive Integrated Moving Average. Seasonal ARIMA models and exogenous input is supported, hence this estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX. @@ -178,7 +178,7 @@ def _get_distributions(self) -> dict[str, BaseDistribution]: class AutoARIMA(ForecastModel): - """Automatic Autoregressive Integrated Moving Average Model. + """Automatic Autoregressive Integrated Moving Average. [ARIMA][] implementation that includes automated fitting of (S)ARIMA(X) hyperparameters (p, d, q, P, D, Q). The AutoARIMA @@ -649,7 +649,7 @@ def _get_distributions(self) -> dict[str, BaseDistribution]: class MSTL(ForecastModel): - """Multiple Seasonal-Trend decomposition using LOESS model. + """Multiple Seasonal-Trend decomposition using LOESS. The MSTL decomposes the time series in multiple seasonalities using LOESS. Then forecasts the trend using a custom non-seasonal model @@ -956,7 +956,7 @@ def _get_distributions() -> dict[str, BaseDistribution]: class SARIMAX(ForecastModel): - """Seasonal Autoregressive Integrated Moving Average with eXogenous factors. + """Seasonal Autoregressive Integrated Moving Average. SARIMAX stands for Seasonal Autoregressive Integrated Moving Average with eXogenous factors. It extends [ARIMA][] by incorporating seasonal @@ -1106,7 +1106,7 @@ def _get_distributions(self) -> dict[str, BaseDistribution]: class STL(ForecastModel): - """Seasonal-Trend decomposition using Loess. + """Seasonal-Trend decomposition using LOESS. STL is a technique commonly used for decomposing time series data into components like trend, seasonality, and residuals. @@ -1381,7 +1381,7 @@ def _get_distributions() -> dict[str, BaseDistribution]: class VARMAX(ForecastModel): - """Vector Autoregressive Moving-Average with exogenous variables. + """Vector Autoregressive Moving-Average. VARMAX is an extension of the [VAR][] model that incorporates not only lagged values of the endogenous variables, but also includes diff --git a/atom/pipeline.py b/atom/pipeline.py index 4e3179e5c..d2877f156 100644 --- a/atom/pipeline.py +++ b/atom/pipeline.py @@ -477,6 +477,9 @@ def transform( Transformed target column. Only returned if provided. """ + if X is None and y is None: + raise ValueError("X and y cannot be both None.") + for _, _, transformer in self._iter(**kwargs): with adjust_verbosity(transformer, self.verbose): X, y = self._mem_transform(transformer, X, y) @@ -520,6 +523,9 @@ def inverse_transform( Transformed target column. Only returned if provided. """ + if X is None and y is None: + raise ValueError("X and y cannot be both None.") + for _, _, transformer in reversed(list(self._iter())): with adjust_verbosity(transformer, self.verbose): X, y = self._mem_transform(transformer, X, y, method="inverse_transform") diff --git a/atom/plots/predictionplot.py b/atom/plots/predictionplot.py index 4bc8b2f37..719529a26 100644 --- a/atom/plots/predictionplot.py +++ b/atom/plots/predictionplot.py @@ -967,10 +967,11 @@ def plot_feature_importance( def plot_forecast( self, models: ModelsSelector = None, - fh: RowSelector | ForecastingHorizon = "test", + fh: RowSelector | ForecastingHorizon = "dataset", X: XSelector | None = None, target: TargetSelector = 0, *, + plot_insample: Bool = False, plot_interval: Bool = True, title: str | dict[str, Any] | None = None, legend: Legend | dict[str, Any] | None = "upper left", @@ -988,7 +989,7 @@ def plot_forecast( models: int, str, Model, segment, sequence or None, default=None Models to plot. If None, all models are selected. - fh: hashable, segment, sequence, dataframe or [ForecastingHorizon][], default="test" + fh: hashable, segment, sequence, dataframe or [ForecastingHorizon][], default="dataset" The [forecasting horizon][row-and-column-selection] for which to plot the predictions. @@ -999,6 +1000,10 @@ def plot_forecast( target: int or str, default=0 Target column to look at. Only for [multivariate][] tasks. + plot_insample: bool, default=False + Whether to draw in-sample predictions (predictions on the training + set). Models that do not support this feature are silently skipped. + plot_interval: bool, default=True Whether to plot prediction intervals together with the exact predicted values. Models wihtout a `predict_interval` method @@ -1040,7 +1045,7 @@ def plot_forecast( -------- atom.plots:DataPlot.plot_distribution atom.plots:DataPlot.plot_series - atom.plots:PredictionPlot.plot_roc + atom.plots:PredictionPlot.plot_errors Examples -------- @@ -1070,7 +1075,7 @@ def plot_forecast( fh = self.branch._get_rows(fh).index if X is None: - X = self.branch.X.loc[fh] + X = self.branch._all.loc[fh] else: X = self.transform(X) @@ -1083,9 +1088,12 @@ def plot_forecast( if self.task.is_multioutput: y_pred = y_pred[target_c] + if not plot_insample: + y_pred.loc[m.branch.train.index] = np.NaN + fig.add_trace( self._draw_line( - x=self._get_plot_index(y_pred), + x=(x := self._get_plot_index(y_pred)), y=y_pred, mode="lines+markers", parent=m.name, @@ -1098,7 +1106,7 @@ def plot_forecast( if plot_interval: try: y_pred = m.predict_interval(fh=fh, X=X) - except NotImplementedError: + except (AttributeError, NotImplementedError): continue # Fails for some models like ES if self.task.is_multioutput: @@ -1107,10 +1115,13 @@ def plot_forecast( else: y = y_pred # Univariate + if not plot_insample: + y_pred.loc[m.branch.train.index] = np.NaN + fig.add_traces( [ go.Scatter( - x=self._get_plot_index(y_pred), + x=x, y=y.iloc[:, 1], mode="lines", line={"width": 1, "color": BasePlot._fig.get_elem(m.name)}, @@ -1121,7 +1132,7 @@ def plot_forecast( yaxis=yaxis, ), go.Scatter( - x=self._get_plot_index(y_pred), + x=x, y=y.iloc[:, 0], mode="lines", line={"width": 1, "color": BasePlot._fig.get_elem(m.name)}, @@ -1139,12 +1150,11 @@ def plot_forecast( # Draw original time series fig.add_trace( go.Scatter( - x=y_pred.index, - y=self.branch.dataset.loc[y_pred.index, target_c], + x=x, + y=self.branch._all.loc[y_pred.index, target_c], mode="lines+markers", line={"width": 1, "color": "black", "dash": "dash"}, opacity=0.6, - layer="below", showlegend=False, xaxis=xaxis, yaxis=yaxis, diff --git a/docs_sources/about.md b/docs_sources/about.md index 019037720..79342b49e 100644 --- a/docs_sources/about.md +++ b/docs_sources/about.md @@ -215,6 +215,13 @@ core project contributors with a set of developer tools free of charge. +
+ +
+ +
-
-
+
+ +