diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 6894372fe..108de5968 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -91,7 +91,7 @@ and accept your changes.
* Update the documentation so all of your changes are reflected there.
* Adhere to [PEP 8](https://peps.python.org/pep-0008/) standards.
* Use a maximum of 99 characters per line. Try to keep docstrings below
- 74 characters.
+ 80 characters.
* Update the project unit tests to test your code changes as thoroughly
as possible.
* Make sure that your code is properly commented with docstrings and
diff --git a/atom/api.py b/atom/api.py
index 9fc0918f2..55537c62e 100644
--- a/atom/api.py
+++ b/atom/api.py
@@ -17,8 +17,9 @@
from atom.atom import ATOM
from atom.utils.types import (
- Backend, Bool, ColumnSelector, Engine, IndexSelector, IntLargerEqualZero,
- NJobs, Predictor, Scalar, Verbose, Warnings, YSelector,
+ Backend, Bool, ColumnSelector, Engine, IndexSelector, Int,
+ IntLargerEqualZero, NJobs, Predictor, Scalar, Sequence, Verbose, Warnings,
+ YSelector,
)
from atom.utils.utils import Goal
@@ -35,13 +36,13 @@ def ATOMModel(
needs_scaling: Bool = False,
native_multilabel: Bool = False,
native_multioutput: Bool = False,
- has_validation: str | None = None,
+ validation: str | None = None,
) -> T_Predictor:
"""Convert an estimator to a model that can be ingested by atom.
- This function adds the relevant attributes to the estimator so
- that they can be used by atom. Note that only estimators that
- follow [sklearn's API][api] are compatible.
+ This function adds the relevant tags to the estimator so that they
+ can be used by `atom`. Note that only estimators that follow
+ [sklearn's API][api] are compatible.
Read more about custom models in the [user guide][custom-models].
@@ -75,7 +76,7 @@ def ATOMModel(
If False and the task is multioutput, a multioutput
meta-estimator is wrapped around the estimator.
- has_validation: str or None, default=None
+ validation: str or None, default=None
Whether the model allows [in-training validation][].
- If None: No support for in-training validation.
@@ -121,7 +122,7 @@ def ATOMModel(
estimator_c.needs_scaling = needs_scaling
estimator_c.native_multioutput = native_multioutput
estimator_c.native_multilabel = native_multilabel
- estimator_c.has_validation = has_validation
+ estimator_c.validation = validation
return estimator_c
@@ -453,6 +454,24 @@ class ATOMForecaster(ATOM):
and model training. The features are still used in the remaining
methods.
+ sp: int, str, sequence or None, default=None
+ [Seasonal period][seasonality] of the time series.
+
+ - If None: No seasonal period.
+ - If int: Seasonal period, e.g., 7 for weekly data, and 12 for
+ monthly data.
+ - If str:
+
+ - Seasonal period provided as [PeriodAlias][], e.g., "M" for
+ 12 or "H" for 24.
+ - "index": The frequency of the data index is mapped to a
+ seasonal period.
+ - "infer": Automatically infer the seasonal period from the
+ data (calls [get_seasonal_period][self-get_seasonal_period]
+ under the hood, using default parameters).
+
+ - If sequence: Multiple seasonal periods provided as int or str.
+
test_size: int or float, default=0.2
- If <=1: Fraction of the dataset to include in the test set.
- If >1: Number of rows to include in the test set.
@@ -592,6 +611,7 @@ def __init__(
*arrays,
y: YSelector = -1,
ignore: ColumnSelector | None = None,
+ sp: Int | str | Sequence[Int | str] | None = None,
n_rows: Scalar = 1,
test_size: Scalar = 0.2,
holdout_size: Scalar | None = None,
@@ -611,6 +631,7 @@ def __init__(
y=y,
index=True,
ignore=ignore,
+ sp=sp,
test_size=test_size,
holdout_size=holdout_size,
shuffle=False,
diff --git a/atom/atom.py b/atom/atom.py
index 95ed7274c..aecf632b3 100644
--- a/atom/atom.py
+++ b/atom/atom.py
@@ -56,7 +56,7 @@
NJobs, NormalizerStrats, NumericalStrats, Operators, Pandas, PrunerStrats,
RowSelector, Scalar, ScalerStrats, Sequence, Series, TargetSelector,
Transformer, VectorizerStarts, Verbose, Warnings, XSelector, YSelector,
- sequence_t, tsindex_t,
+ sequence_t,
)
from atom.utils.utils import (
ClassMap, DataConfig, DataContainer, Goal, adjust_verbosity, bk,
@@ -95,6 +95,7 @@ def __init__(
y: YSelector = -1,
index: IndexSelector = False,
ignore: ColumnSelector | None = None,
+ sp: Int | str | Sequence[Int | str] | None = None,
shuffle: Bool = True,
stratify: IndexSelector = True,
n_rows: Scalar = 1,
@@ -133,18 +134,19 @@ def __init__(
holdout_size=holdout_size,
)
- self._log("<< ================== ATOM ================== >>", 1)
-
# Initialize the branch system and fill with data
self._branches = BranchManager(memory=self.memory)
self._branches.fill(*self._get_data(arrays, y=y))
self.ignore = ignore # type: ignore[assignment]
+ self.sp = sp # type: ignore[assignment]
+
self.missing = DEFAULT_MISSING
self._models = ClassMap()
self._metric = ClassMap()
+ self._log("<< ================== ATOM ================== >>", 1)
self._log("\nConfiguration ==================== >>", 1)
self._log(f"Algorithm task: {self.task}.", 1)
if self.n_jobs > 1:
@@ -747,8 +749,8 @@ def load(cls, filename: str | Path, data: tuple[Any, ...] | None = None) -> ATOM
if atom._config.index is False:
branch._container = DataContainer(
data=(dataset := branch._container.data.reset_index(drop=True)),
- train_idx=dataset.index[: len(branch._container.train_idx)],
- test_idx=dataset.index[-len(branch._container.test_idx) :],
+ train_idx=dataset.index[:len(branch._container.train_idx)],
+ test_idx=dataset.index[-len(branch._container.test_idx):],
n_cols=branch._container.n_cols,
)
@@ -956,11 +958,13 @@ def stats(self, _vb: Int = -2, /):
"""
self._log("Dataset stats " + "=" * 20 + " >>", _vb)
self._log(f"Shape: {self.shape}", _vb)
+ if self.task.is_forecast and self.sp:
+ self._log(f"Seasonal period: {self.sp}", _vb)
- for set_ in ("train", "test", "holdout"):
- if (data := getattr(self, set_)) is not None:
- self._log(f"{set_.capitalize()} set size: {len(data)}", _vb)
- if isinstance(self.branch.train.index, tsindex_t):
+ for ds in ("train", "test", "holdout"):
+ if (data := getattr(self, ds)) is not None:
+ self._log(f"{ds.capitalize()} set size: {len(data)}", _vb)
+ if self.task.is_forecast:
self._log(f" --> From: {min(data.index)} To: {max(data.index)}", _vb)
self._log("-" * 37, _vb)
@@ -1231,7 +1235,7 @@ def _add_transformer(
self.branch._container = DataContainer(
data=(data := self.dataset.reset_index(drop=True)),
train_idx=data.index[: len(self.branch._data.train_idx)],
- test_idx=data.index[-len(self.branch._data.test_idx) :],
+ test_idx=data.index[-len(self.branch._data.test_idx):],
n_cols=self.branch._data.n_cols,
)
if self.branch._holdout is not None:
diff --git a/atom/basemodel.py b/atom/basemodel.py
index 8e539e91c..71e89db65 100644
--- a/atom/basemodel.py
+++ b/atom/basemodel.py
@@ -253,13 +253,23 @@ def __init__(
self._branch = branches.current
self._train_idx = len(self.branch._data.train_idx) # Can change for sh and ts
- if self.needs_scaling and not check_scaling(self.X, pipeline=self.pipeline):
- self.scaler = Scaler().fit(self.X_train)
+ if hasattr(self, "needs_scaling"):
+ if self.needs_scaling and not check_scaling(self.X, pipeline=self.pipeline):
+ self.scaler = Scaler().fit(self.X_train)
def __repr__(self) -> str:
"""Display class name."""
return f"{self.__class__.__name__}()"
+ def __dir__(self) -> list[str]:
+ """Add additional attrs from __getattr__ to the dir."""
+ attrs = list(super().__dir__())
+ if "_branch" in self.__dict__:
+ attrs += [x for x in dir(self.branch) if not x.startswith("_")]
+ attrs += list(DF_ATTRS)
+ attrs += list(self.columns)
+ return attrs
+
def __getattr__(self, item: str) -> Any:
"""Get attributes from branch or data."""
if "_branch" in self.__dict__:
@@ -449,9 +459,10 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
estimator = MultiOutputClassifier(estimator)
elif self.task.is_regression:
estimator = MultiOutputRegressor(estimator)
- elif hasattr(self, "_estimators") and self._goal.name not in self._estimators:
- # Forecasting task with a regressor
- estimator = make_reduction(estimator)
+ elif self.task.is_forecast:
+ if hasattr(self, "_estimators") and self._goal.name not in self._estimators:
+ # Forecasting task with a regressor
+ estimator = make_reduction(estimator)
return self._inherit(estimator)
@@ -494,13 +505,13 @@ def _fit_estimator(
Fitted instance.
"""
- if self.has_validation and hasattr(estimator, "partial_fit") and validation:
+ if getattr(self, "validation", False) and hasattr(estimator, "partial_fit") and validation:
# Loop over first parameter in estimator
try:
- steps = estimator.get_params()[self.has_validation]
+ steps = estimator.get_params()[self.validation]
except KeyError:
# For meta-estimators like multioutput
- steps = estimator.get_params()[f"estimator__{self.has_validation}"]
+ steps = estimator.get_params()[f"estimator__{self.validation}"]
for step in range(steps):
kwargs = {}
@@ -533,8 +544,8 @@ def _fit_estimator(
if trial.should_prune():
# Hacky solution to add the pruned step to the output
- if self.has_validation in trial.params:
- trial.params[self.has_validation] = f"{step}/{steps}"
+ if self.validation in trial.params:
+ trial.params[self.validation] = f"{step}/{steps}"
trial.set_user_attr("estimator", estimator)
raise TrialPruned
@@ -1308,7 +1319,7 @@ def name(self, value: str):
"""Change the model's name."""
# Drop the acronym if provided by the user
if re.match(f"{self.acronym}_", value, re.I):
- value = value[len(self.acronym) + 1 :]
+ value = value[len(self.acronym) + 1:]
# Add the acronym in front (with right capitalization)
self._name = f"{self.acronym}{f'_{value}' if value else ''}"
@@ -2437,6 +2448,32 @@ def transform(
class ClassRegModel(BaseModel):
"""Classification and regression models."""
+ def get_tags(self) -> dict[str, Any]:
+ """Get the model's tags.
+
+ Return class parameters that provide general information about
+ the estimator's characteristics.
+
+ Returns
+ -------
+ dict
+ Model's tags.
+
+ """
+ return {
+ "acronym": self.acronym,
+ "fullname": self.fullname,
+ "estimator": self._est_class,
+ "module": self._est_class.__module__.split(".")[0] + self._module,
+ "handles_missing": self.handles_missing,
+ "needs_scaling": self.needs_scaling,
+ "accepts_sparse": self.accepts_sparse,
+ "native_multilabel": self.native_multilabel,
+ "native_multioutput": self.native_multioutput,
+ "validation": self.validation,
+ "supports_engines": ", ".join(self.supports_engines),
+ }
+
@overload
def _prediction(
self,
@@ -2845,6 +2882,29 @@ def score(
class ForecastModel(BaseModel):
"""Forecasting models."""
+ def get_tags(self) -> dict[str, Any]:
+ """Get the model's tags.
+
+ Return class parameters that provide general information about
+ the estimator's characteristics.
+
+ Returns
+ -------
+ dict
+ Model's tags.
+
+ """
+ return {
+ "acronym": self.acronym,
+ "fullname": self.fullname,
+ "estimator": self._est_class.__name__,
+ "module": self._est_class.__module__.split(".")[0] + self._module,
+ "handles_missing": self.handles_missing,
+ "in_sample_prediction": self.in_sample_prediction,
+ "native_multivariate": self.native_multivariate,
+ "supports_engines": ", ".join(self.supports_engines),
+ }
+
@overload
def _prediction(
self,
diff --git a/atom/baserunner.py b/atom/baserunner.py
index b776749ac..7019bac81 100644
--- a/atom/baserunner.py
+++ b/atom/baserunner.py
@@ -7,6 +7,7 @@
from __future__ import annotations
+import math
import random
import re
from abc import ABCMeta
@@ -17,12 +18,17 @@
from typing import Any
import dill as pickle
+import numpy as np
import pandas as pd
from beartype import beartype
+from pandas.tseries.frequencies import to_offset
+from pmdarima.arima.utils import ndiffs
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.utils.metaestimators import available_if
from sktime.datatypes import check_is_mtype
+from sktime.param_est.seasonality import SeasonalityACF
+from sktime.transformations.series.difference import Differencer
from atom.basetracker import BaseTracker
from atom.basetransformer import BaseTransformer
@@ -31,14 +37,15 @@
from atom.pipeline import Pipeline
from atom.utils.constants import DF_ATTRS
from atom.utils.types import (
- Bool, DataFrame, FloatZeroToOneExc, Int, MetricConstructor, Model,
- ModelSelector, ModelsSelector, Pandas, RowSelector, Scalar, Segment,
- Sequence, Series, YSelector, dataframe_t, int_t, segment_t, sequence_t,
+ Bool, DataFrame, FloatZeroToOneExc, HarmonicsSelector, Int,
+ MetricConstructor, Model, ModelSelector, ModelsSelector, Pandas,
+ RowSelector, Scalar, Segment, Sequence, Series, YSelector, dataframe_t,
+ int_t, segment_t, sequence_t,
)
from atom.utils.utils import (
- ClassMap, DataContainer, Task, bk, check_is_fitted, composed, crash,
- divide, flt, get_cols, get_segment, get_versions, has_task, lst, merge,
- method_to_log, n_cols,
+ ClassMap, DataContainer, SeasonalPeriod, Task, bk, check_is_fitted,
+ composed, crash, divide, flt, get_cols, get_segment, get_versions,
+ has_task, lst, merge, method_to_log, n_cols,
)
@@ -70,6 +77,17 @@ def __setstate__(self, state: dict[str, Any]):
severity="warning",
)
+ def __dir__(self) -> list[str]:
+ """Add additional attrs from __getattr__ to the dir."""
+ attrs = list(super().__dir__())
+ attrs += [x for x in dir(self.branch) if not x.startswith("_")]
+ attrs += list(DF_ATTRS)
+ attrs += [b.name.lower() for b in self._branches]
+ attrs += list(self.columns)
+ if isinstance(self._models, ClassMap):
+ attrs += [m.name.lower() for m in self._models]
+ return attrs
+
def __getattr__(self, item: str) -> Any:
"""Get branch, attr from branch, model, column or attr from dataset."""
if item in self.__dict__["_branches"]:
@@ -138,6 +156,33 @@ def task(self) -> Task:
"""Dataset's [task][] type."""
return self._goal.infer_task(self.y)
+ @property
+ def sp(self) -> int | list[int] | None:
+ """Seasonal period(s) of the time series.
+
+ Read more about seasonality in the [user guide][seasonality].
+
+ """
+ return self._sp
+
+ @sp.setter
+ def sp(self, sp: Int | str | Sequence[Int | str] | None):
+ """Convert seasonal period to integer value."""
+ if sp is None:
+ self._sp = None
+ elif sp == "index":
+ if not hasattr(self.dataset.index, "freqstr"):
+ raise ValueError(
+ f"Invalid value for the seasonal period, got {sp}. "
+ f"The dataset's index has no attribute freqstr."
+ )
+ else:
+ self._sp = self.dataset.index.freqstr
+ elif sp == "infer":
+ self._sp = self.get_seasonal_period()
+ else:
+ self._sp = flt([self._get_sp(x) for x in lst(sp)])
+
@property
def og(self) -> Branch:
"""Branch containing the original dataset.
@@ -269,6 +314,36 @@ def frac(m: Model) -> float:
# Utility methods ============================================== >>
+ @staticmethod
+ def _get_sp(sp: Int | str) -> int:
+ """Get the seasonal period from a value or string.
+
+ Parameters
+ ----------
+ sp: int or str
+ Seasonal period provided as int or [DateOffset][].
+
+ Returns
+ -------
+ int
+ Seasonal period.
+
+ """
+ if isinstance(sp, str):
+ if offset := to_offset(sp): # Convert to pandas' DateOffset
+ name, period = offset.name.split("-")[0], offset.n
+
+ if name not in SeasonalPeriod.__members__:
+ raise ValueError(
+ f"Invalid value for the seasonal period, got {name}. "
+ f"Valid values are: {', '.join(SeasonalPeriod.__members__)}"
+ )
+
+ # Formula is same as SeasonalPeriod[name] for period=1
+ return math.lcm(SeasonalPeriod[name].value, period) // period
+ else:
+ return int(sp)
+
def _set_index(self, df: DataFrame, y: Pandas | None) -> DataFrame:
"""Assign an index to the dataframe.
@@ -482,8 +557,8 @@ def _no_data_sets(
container = DataContainer(
data=(data := complete_set.iloc[: len(data)]),
- train_idx=data.index[: -len(test)],
- test_idx=data.index[-len(test) :],
+ train_idx=data.index[:-len(test)],
+ test_idx=data.index[-len(test):],
n_cols=len(get_cols(y)),
)
@@ -500,7 +575,7 @@ def _no_data_sets(
raise ex
if holdout is not None:
- holdout = complete_set.iloc[len(data) :]
+ holdout = complete_set.iloc[len(data):]
return container, holdout
@@ -589,19 +664,19 @@ def _has_data_sets(
train.index = self._config.index[: len(train)]
test.index = self._config.index[len(train) : len(train) + len(test)]
if holdout is not None:
- holdout.index = self._config.index[-len(holdout) :]
+ holdout.index = self._config.index[-len(holdout):]
complete_set = self._set_index(bk.concat([train, test, holdout]), y_test)
container = DataContainer(
- data=(data := complete_set.iloc[: len(train) + len(test)]),
+ data=(data := complete_set.iloc[:len(train) + len(test)]),
train_idx=data.index[: len(train)],
- test_idx=data.index[-len(test) :],
+ test_idx=data.index[-len(test):],
n_cols=len(get_cols(y_train)),
)
if holdout is not None:
- holdout = complete_set.iloc[len(train) + len(test) :]
+ holdout = complete_set.iloc[len(train) + len(test):]
return container, holdout
@@ -819,20 +894,27 @@ def available_models(self) -> pd.DataFrame:
Returns
-------
pd.DataFrame
- Information about the available [predefined models][]. Columns
- include:
+ Tags of the available [predefined models][]. The columns
+ depend on the task, but can include:
- **acronym:** Model's acronym (used to call the model).
- - **model:** Name of the model's class.
- - **estimator:** The model's underlying estimator.
+ - **fullname:** Name of the model's class.
+ - **estimator:** Class of the model's underlying estimator.
- **module:** The estimator's module.
+ - **handles_missing:** Whether the model can handle `NaN` values
+ without preprocessing.
- **needs_scaling:** Whether the model requires feature scaling.
- **accepts_sparse:** Whether the model accepts sparse matrices.
+ - **uses_exogenous:** Whether the model uses exogenous variables.
+ - **in_sample_prediction:** Whether the model can do predictions
+ on the training set.
- **native_multilabel:** Whether the model has native support
for [multilabel][] tasks.
- **native_multioutput:** Whether the model has native support
for [multioutput tasks][].
- - **has_validation:** Whether the model has [in-training validation][].
+ - **native_multivariate:** Whether the model has native support
+ for [multivariate][] tasks.
+ - **validation:** Whether the model has [in-training validation][].
- **supports_engines:** Engines supported by the model.
"""
@@ -840,20 +922,7 @@ def available_models(self) -> pd.DataFrame:
for model in MODELS:
m = model(goal=self._goal)
if self._goal.name in m._estimators:
- rows.append(
- {
- "acronym": m.acronym,
- "model": m.fullname,
- "estimator": m._est_class.__name__,
- "module": m._est_class.__module__.split(".")[0] + m._module,
- "needs_scaling": m.needs_scaling,
- "accepts_sparse": m.accepts_sparse,
- "native_multilabel": m.native_multilabel,
- "native_multioutput": m.native_multioutput,
- "has_validation": bool(m.has_validation),
- "supports_engines": ", ".join(m.supports_engines),
- }
- )
+ rows.append(m.get_tags())
return pd.DataFrame(rows)
@@ -1051,6 +1120,94 @@ def get_sample_weight(self, rows: RowSelector = "train") -> Series:
weights = compute_sample_weight("balanced", y=y)
return bk.Series(weights, name="sample_weight").round(3)
+ @available_if(has_task("forecast"))
+ @composed(crash, beartype)
+ def get_seasonal_period(
+ self,
+ max_sp: Int | None = None,
+ harmonics: HarmonicsSelector | None = None,
+ ) -> int:
+ """Get the seasonal periods of the time series.
+
+ Use the data in the training set to calculate the seasonal
+ period. The data is internally differentiated before the
+ seasonality is detected using ACF.
+
+ !!! tip
+ Read more about seasonality in the [user guide][seasonality].
+
+ Parameters
+ ----------
+ max_sp: int or None, default=None
+ Maximum seasonal period to consider. If None, the maximum
+ period is given by `(len(y_train) - 1) // 2`.
+
+ harmonics: str or None, default=None
+ Defines the strategy on how to deal with harmonics from the
+ detected seasonal periods. Choose from the following options:
+
+ - None: The detected seasonal periods are left unchanged
+ (no harmonic removal).
+ - "drop": Remove all harmonics.
+ - "raw_strength": Keep the highest order harmonics, maintaining
+ the order of significance.
+ - "harmonic_strength": Replace seasonal periods with their highest
+ harmonic.
+
+ E.g., if the detected seasonal periods in strength order are
+ `[2, 3, 4, 7, 8]` (note that 4 and 8 are harmonics of 2), then:
+
+ - If "drop", result=[2, 3, 7]
+ - If "raw_strength", result=[3, 7, 8]
+ - If "harmonic_strength", result=[8, 3, 7]
+
+ Returns
+ -------
+ list of int
+ Seasonal periods, ordered by significance.
+
+ """
+ yt = self.y_train.copy()
+ max_sp = max_sp or (len(yt) - 1) // 2
+
+ for _ in np.arange(ndiffs(yt)):
+ yt = Differencer().fit_transform(yt)
+
+ acf = SeasonalityACF(nlags=max_sp).fit(pd.DataFrame(yt))
+ seasonal_periods = acf.get_fitted_params().get("sp_significant")
+
+ if harmonics and len(seasonal_periods) > 1:
+ # Create a dictionary of the seasonal periods and their harmonics
+ harmonic_dict: dict[int, list[int]] = {}
+ for sp in seasonal_periods:
+ for k in harmonic_dict:
+ if sp % k == 0:
+ harmonic_dict[k].append(sp)
+ break
+ else:
+ harmonic_dict[sp] = []
+
+ # For periods without harmonics, simplify operations
+ # by setting the value of the key to itself
+ harmonic_dict = {k: (v or [k]) for k, v in harmonic_dict.items()}
+
+ if harmonics == "drop":
+ seasonal_periods = list(harmonic_dict.keys())
+ elif harmonics == "raw_strength":
+ seasonal_periods = [
+ sp for sp in seasonal_periods
+ if any(max(v) == sp for v in harmonic_dict.values())
+ ]
+ elif harmonics == "harmonic_strength":
+ seasonal_periods = [max(v) for v in harmonic_dict.values()]
+
+ if not (seasonal_periods := [int(sp) for sp in seasonal_periods if sp <= max_sp]):
+ raise ValueError(
+ "No seasonal periods were detected. Try decreasing the max_sp parameter."
+ )
+
+ return flt(seasonal_periods)
+
@composed(crash, method_to_log, beartype)
def merge(self, other: BaseRunner, /, suffix: str = "2"):
"""Merge another instance of the same class into this one.
diff --git a/atom/basetrainer.py b/atom/basetrainer.py
index 63e730cc0..0a9f63110 100644
--- a/atom/basetrainer.py
+++ b/atom/basetrainer.py
@@ -182,7 +182,7 @@ def _prepare_parameters(self):
raise ValueError(
f"Invalid value for the models parameter, got {m}. "
"Note that tags must be separated by an underscore. "
- f"Available model are:\n"
+ "Available model are:\n" +
"\n".join(
[
f" --> {m.__name__} ({m.acronym})"
@@ -195,8 +195,11 @@ def _prepare_parameters(self):
# Check if libraries for non-sklearn models are available
dependencies = {
"ARIMA": "pmdarima",
- "Catb": "castboost",
+ "AutoARIMA": "pmdarima",
+ "BATS": "tbats",
+ "CatB": "catboost",
"LGB": "lightgbm",
+ "TBATS": "tbats",
"XGB": "xgboost",
}
if cls.acronym in dependencies:
diff --git a/atom/basetransformer.py b/atom/basetransformer.py
index c75501c2f..f23180cc1 100644
--- a/atom/basetransformer.py
+++ b/atom/basetransformer.py
@@ -353,10 +353,11 @@ def _device_id(self) -> int:
# Methods ====================================================== >>
def _inherit(self, obj: T_Estimator) -> T_Estimator:
- """Inherit n_jobs and/or random_state from parent.
+ """Inherit parameters from parent.
- Utility method to set the n_jobs and random_state parameters
- of an estimator (if available) equal to that of this instance.
+ Utility method to set the sp (seasonal period), n_jobs and
+ random_state parameters of an estimator (if available) equal
+ to that of this instance.
Parameters
----------
@@ -370,9 +371,9 @@ def _inherit(self, obj: T_Estimator) -> T_Estimator:
"""
signature = sign(obj.__init__) # type: ignore[misc]
- for p in ("n_jobs", "random_state"):
+ for p in ("sp", "n_jobs", "random_state"):
if p in signature and getattr(obj, p, "") == signature[p]._default:
- setattr(obj, p, getattr(self, p))
+ setattr(obj, p, getattr(self, p, signature[p]._default))
return obj
diff --git a/atom/data_cleaning.py b/atom/data_cleaning.py
index 7fd488e2e..cf276d987 100644
--- a/atom/data_cleaning.py
+++ b/atom/data_cleaning.py
@@ -274,13 +274,13 @@ class Balancer(TransformerMixin):
Target values mapped to their respective encoded integers.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
target_names_in_: np.ndarray
- Names of the target column seen during fit.
+ Names of the target column seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -527,9 +527,9 @@ def log_changes(y):
]
# Select the new samples and assign the new indices
- X_new = X_new.iloc[-len(X_new) + len(o_samples) :]
+ X_new = X_new.iloc[-len(X_new) + len(o_samples):]
X_new.index = n_idx
- y_new = y_new.iloc[-len(y_new) + len(o_samples) :]
+ y_new = y_new.iloc[-len(y_new) + len(o_samples):]
y_new.index = n_idx
# First, output the samples created
@@ -603,21 +603,22 @@ class Cleaner(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -646,13 +647,13 @@ class Cleaner(TransformerMixin):
available if encode_target=True.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
target_names_in_: np.ndarray
- Names of the target column(s) seen during fit.
+ Names of the target column(s) seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -1031,21 +1032,22 @@ class Discretizer(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -1068,10 +1070,10 @@ class Discretizer(TransformerMixin):
Attributes
----------
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -1398,10 +1400,10 @@ class Encoder(TransformerMixin):
the key to its mapping dictionary. Only for ordinal encoding.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -1731,21 +1733,22 @@ class Imputer(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -1775,10 +1778,10 @@ class Imputer(TransformerMixin):
with sklearn estimators.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -2132,21 +2135,22 @@ class Normalizer(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -2174,10 +2178,10 @@ class Normalizer(TransformerMixin):
`normalizer.yeojohnson` for the default strategy.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -2453,10 +2457,10 @@ class Pruner(TransformerMixin):
isolation forest strategy. Not available for strategy="zscore".
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -2708,21 +2712,22 @@ class Scaler(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -2746,10 +2751,10 @@ class Scaler(TransformerMixin):
`scaler.standard` for the default strategy.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
diff --git a/atom/feature_engineering.py b/atom/feature_engineering.py
index 6d2aba266..65a284c89 100644
--- a/atom/feature_engineering.py
+++ b/atom/feature_engineering.py
@@ -109,10 +109,10 @@ class FeatureExtractor(TransformerMixin):
Attributes
----------
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -360,10 +360,10 @@ class FeatureGenerator(TransformerMixin):
- **fitness:** Fitness score.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -487,7 +487,7 @@ def fit(self, X: DataFrame, y: Pandas | None = None) -> Self:
)
# Select the new features (dfs also returns originals)
- self._dfs = self._dfs[X.shape[1] - 1 :]
+ self._dfs = self._dfs[X.shape[1] - 1:]
# Get a random selection of features
if self.n_features and self.n_features < len(self._dfs):
@@ -638,10 +638,10 @@ class FeatureGrouper(TransformerMixin):
Attributes
----------
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -976,10 +976,10 @@ class FeatureSelector(TransformerMixin):
strategy.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
diff --git a/atom/models/__init__.py b/atom/models/__init__.py
index 7bdbc8ab7..3e5e4a5b5 100644
--- a/atom/models/__init__.py
+++ b/atom/models/__init__.py
@@ -3,73 +3,6 @@
Author: Mavs
Description: Module for models.
-To add new models, note the following:
-
-1. Add the class in the right file depending on the task.
-2. Models are ordered alphabetically.
-3. Models have the following structure:
-
-Class attributes
-----------------
-acronym: str
- Acronym of the model's name.
-
-needs_scaling: bool
- Whether the model needs scaled features.
-
-accepts_sparse: bool
- Whether the model has native support for sparse matrices.
-
-native_multilabel: bool
- Whether the model has native support for multilabel tasks.
-
-native_multioutput: bool
- Whether the model has native support for multioutput tasks.
-
-has_validation: str or None
- Whether the model allows in-training validation. If str,
- name of the estimator's parameter that states the number
- of iterations. If None, no support for in-training
- validation.
-
-supports_engines: list
- Engines that can be used to run this model.
-
-_module: str
- Module from which to load the class. If one of engines,
- ignore the engine name, i.e., use "ensemble" instead of
- "sklearn.ensemble".
-
-_estimators: dict
- Name of the estimators per goal.
-
-Instance attributes
--------------------
-name: str
- Name of the model. Defaults to the same as the acronym
- but can be different if the same model is called multiple
- times. The name is assigned in the basemodel.py module.
-
-Methods
--------
-_get_parameters(self, x) -> dict:
- Return the trial's suggestions with (optionally) custom changes
- to the params. Don't implement if the parent's implementation
- is sufficient.
-
-_trial_to_est(self, params) -> dict:
- Convert trial's hyperparameters to parameters for the
- estimator. Only implement for models whose study params are
- different from those for the estimator.
-
-_fit_estimator(self, estimator, data, est_params_fit, validation, trial):
- This method is called to fit the estimator. Implement only
- to customize the fit.
-
-_get_distributions(self) -> dict:
- Return a list of the hyperparameter distributions for
- optimization.
-
"""
from atom.models.classreg import (
@@ -87,8 +20,8 @@
from atom.models.custom import CustomModel
from atom.models.ensembles import Stacking, Voting
from atom.models.ts import (
- ARIMA, ETS, AutoARIMA, ExponentialSmoothing, NaiveForecaster,
- PolynomialTrend,
+ ARIMA, BATS, ETS, STL, TBATS, AutoARIMA, Croston, ExponentialSmoothing,
+ NaiveForecaster, PolynomialTrend, Theta,
)
from atom.utils.types import Predictor
from atom.utils.utils import ClassMap
@@ -101,11 +34,13 @@
AutoARIMA,
AutomaticRelevanceDetermination,
Bagging,
+ BATS,
BayesianRidge,
BernoulliNB,
CatBoost,
CategoricalNB,
ComplementNB,
+ Croston,
DecisionTree,
Dummy,
ElasticNet,
@@ -137,8 +72,11 @@
RadiusNearestNeighbors,
RandomForest,
Ridge,
+ STL,
StochasticGradientDescent,
SupportVectorMachine,
+ TBATS,
+ Theta,
XGBoost,
key="acronym",
)
diff --git a/atom/models/classreg.py b/atom/models/classreg.py
index 12e091f8b..994ce35f6 100644
--- a/atom/models/classreg.py
+++ b/atom/models/classreg.py
@@ -63,11 +63,12 @@ class AdaBoost(ClassRegModel):
"""
acronym = "AdaB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "ensemble"
@@ -133,11 +134,12 @@ class AutomaticRelevanceDetermination(ClassRegModel):
"""
acronym = "ARD"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -201,11 +203,12 @@ class Bagging(ClassRegModel):
"""
acronym = "Bag"
+ handles_missing = True
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "ensemble"
@@ -267,11 +270,12 @@ class BayesianRidge(ClassRegModel):
"""
acronym = "BR"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -331,11 +335,12 @@ class BernoulliNB(ClassRegModel):
"""
acronym = "BNB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "naive_bayes"
@@ -410,11 +415,12 @@ class CatBoost(ClassRegModel):
"""
acronym = "CatB"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = "n_estimators"
+ validation = "n_estimators"
supports_engines = ("catboost",)
_module = "catboost"
@@ -532,8 +538,8 @@ def _fit_estimator(
if trial and len(self._metric) == 1 and cb._pruned:
# Add the pruned step to the output
step = len(self.evals[f"{m}_train"])
- steps = estimator.get_params()[self.has_validation]
- trial.params[self.has_validation] = f"{step}/{steps}"
+ steps = estimator.get_params()[self.validation]
+ trial.params[self.validation] = f"{step}/{steps}"
trial.set_user_attr("estimator", estimator)
raise TrialPruned(cb._message)
@@ -597,11 +603,12 @@ class CategoricalNB(ClassRegModel):
"""
acronym = "CatNB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "naive_bayes"
@@ -657,11 +664,12 @@ class ComplementNB(ClassRegModel):
"""
acronym = "CNB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "naive_bayes"
@@ -717,11 +725,12 @@ class DecisionTree(ClassRegModel):
"""
acronym = "Tree"
+ handles_missing = True
needs_scaling = False
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "tree"
@@ -792,11 +801,12 @@ class Dummy(ClassRegModel):
"""
acronym = "Dummy"
+ handles_missing = False
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "dummy"
@@ -859,11 +869,12 @@ class ElasticNet(ClassRegModel):
"""
acronym = "EN"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "linear_model"
@@ -924,11 +935,12 @@ class ExtraTree(ClassRegModel):
"""
acronym = "ETree"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "tree"
@@ -998,11 +1010,12 @@ class ExtraTrees(ClassRegModel):
"""
acronym = "ET"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "ensemble"
@@ -1027,9 +1040,8 @@ def _get_parameters(self, trial: Trial) -> dict:
"""
params = super()._get_parameters(trial)
- if not self._get_param("bootstrap", params):
- if "max_samples" in params:
- params["max_samples"] = None
+ if not self._get_param("bootstrap", params) and "max_samples" in params:
+ params["max_samples"] = None
return params
@@ -1094,11 +1106,12 @@ class GaussianNB(ClassRegModel):
"""
acronym = "GNB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "naive_bayes"
@@ -1153,11 +1166,12 @@ class GaussianProcess(ClassRegModel):
"""
acronym = "GP"
+ handles_missing = False
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "gaussian_process"
@@ -1209,11 +1223,12 @@ class GradientBoostingMachine(ClassRegModel):
"""
acronym = "GBM"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "ensemble"
@@ -1288,11 +1303,12 @@ class HuberRegression(ClassRegModel):
"""
acronym = "Huber"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -1354,11 +1370,12 @@ class HistGradientBoosting(ClassRegModel):
"""
acronym = "hGBM"
+ handles_missing = True
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "ensemble"
@@ -1430,11 +1447,12 @@ class KNearestNeighbors(ClassRegModel):
"""
acronym = "KNN"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "neighbors"
@@ -1502,11 +1520,12 @@ class Lasso(ClassRegModel):
"""
acronym = "Lasso"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "linear_model"
@@ -1565,11 +1584,12 @@ class LeastAngleRegression(ClassRegModel):
"""
acronym = "Lars"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -1620,11 +1640,12 @@ class LightGBM(ClassRegModel):
"""
acronym = "LGB"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = "n_estimators"
+ validation = "n_estimators"
supports_engines = ("lightgbm",)
_module = "lightgbm.sklearn"
@@ -1720,8 +1741,8 @@ def _fit_estimator(
# Add the pruned step to the output
step = str(ex).split(" ")[-1][:-1]
- steps = estimator.get_params()[self.has_validation]
- trial.params[self.has_validation] = f"{step}/{steps}"
+ steps = estimator.get_params()[self.validation]
+ trial.params[self.validation] = f"{step}/{steps}"
trial.set_user_attr("estimator", estimator)
raise ex
@@ -1793,11 +1814,12 @@ class LinearDiscriminantAnalysis(ClassRegModel):
"""
acronym = "LDA"
+ handles_missing = False
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "discriminant_analysis"
@@ -1819,9 +1841,8 @@ def _get_parameters(self, trial: Trial) -> dict:
"""
params = super()._get_parameters(trial)
- if self._get_param("solver", params) == "svd":
- if "shrinkage" in params:
- params["shrinkage"] = None
+ if self._get_param("solver", params) == "svd" and "shrinkage" in params:
+ params["shrinkage"] = None
return params
@@ -1877,11 +1898,12 @@ class LinearSVM(ClassRegModel):
"""
acronym = "lSVM"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "svm"
@@ -1916,11 +1938,9 @@ def _get_parameters(self, trial: Trial) -> dict:
params["dual"] = True
elif self._get_param("loss", params) == "squared_hinge":
# l1 regularization can't be combined with squared_hinge when dual=True
- if self._get_param("penalty", params) == "l1":
- if "dual" in params:
- params["dual"] = False
- elif self._get_param("loss", params) == "epsilon_insensitive":
- if "dual" in params:
+ if self._get_param("penalty", params) == "l1" and "dual" in params:
+ params["dual"] = False
+ elif self._get_param("loss", params) == "epsilon_insensitive" and "dual" in params:
params["dual"] = True
return params
@@ -2006,11 +2026,12 @@ class LogisticRegression(ClassRegModel):
"""
acronym = "LR"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "linear_model"
@@ -2039,9 +2060,8 @@ def _get_parameters(self, trial: Trial) -> dict:
cond_2 = penalty == "l1" and solver not in ("liblinear", "saga")
cond_3 = penalty == "elasticnet" and solver != "saga"
- if cond_1 or cond_2 or cond_3:
- if "penalty" in params:
- params["penalty"] = "l2" # Change to default value
+ if cond_1 or cond_2 or cond_3 and "penalty" in params:
+ params["penalty"] = "l2" # Change to default value
return params
@@ -2113,11 +2133,12 @@ class MultiLayerPerceptron(ClassRegModel):
"""
acronym = "MLP"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = True
native_multioutput = False
- has_validation = "max_iter"
+ validation = "max_iter"
supports_engines = ("sklearn",)
_module = "neural_network"
@@ -2219,11 +2240,12 @@ class MultinomialNB(ClassRegModel):
"""
acronym = "MNB"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "cuml")
_module = "naive_bayes"
@@ -2281,11 +2303,12 @@ class OrdinaryLeastSquares(ClassRegModel):
"""
acronym = "OLS"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "linear_model"
@@ -2326,11 +2349,12 @@ class OrthogonalMatchingPursuit(ClassRegModel):
"""
acronym = "OMP"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -2373,11 +2397,12 @@ class PassiveAggressive(ClassRegModel):
"""
acronym = "PA"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = "max_iter"
+ validation = "max_iter"
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -2449,11 +2474,12 @@ class Perceptron(ClassRegModel):
"""
acronym = "Perc"
+ handles_missing = False
needs_scaling = True
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = "max_iter"
+ validation = "max_iter"
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -2514,11 +2540,12 @@ class QuadraticDiscriminantAnalysis(ClassRegModel):
"""
acronym = "QDA"
+ handles_missing = False
needs_scaling = False
accepts_sparse = False
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "discriminant_analysis"
@@ -2584,11 +2611,12 @@ class RadiusNearestNeighbors(ClassRegModel):
"""
acronym = "RNN"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn",)
_module = "neighbors"
@@ -2660,11 +2688,12 @@ class RandomForest(ClassRegModel):
"""
acronym = "RF"
+ handles_missing = False
needs_scaling = False
accepts_sparse = True
native_multilabel = True
native_multioutput = True
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "ensemble"
@@ -2689,9 +2718,8 @@ def _get_parameters(self, trial: Trial) -> dict:
"""
params = super()._get_parameters(trial)
- if not self._get_param("bootstrap", params):
- if "max_samples" in params:
- params["max_samples"] = None
+ if not self._get_param("bootstrap", params) and "max_samples" in params:
+ params["max_samples"] = None
return params
@@ -2775,11 +2803,12 @@ class Ridge(ClassRegModel):
"""
acronym = "Ridge"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = True
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "linear_model"
@@ -2848,11 +2877,12 @@ class StochasticGradientDescent(ClassRegModel):
"""
acronym = "SGD"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = "max_iter"
+ validation = "max_iter"
supports_engines = ("sklearn",)
_module = "linear_model"
@@ -2933,11 +2963,12 @@ class SupportVectorMachine(ClassRegModel):
"""
acronym = "SVM"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = None
+ validation = None
supports_engines = ("sklearn", "sklearnex", "cuml")
_module = "svm"
@@ -2959,9 +2990,8 @@ def _get_parameters(self, trial: Trial) -> dict:
"""
params = super()._get_parameters(trial)
- if self._get_param("kernel", params) == "poly":
- if "gamma" in params:
- params["gamma"] = "scale" # Crashes in combination with "auto"
+ if self._get_param("kernel", params) == "poly" and "gamma" in params:
+ params["gamma"] = "scale" # Crashes in combination with "auto"
return params
@@ -3053,11 +3083,12 @@ class XGBoost(ClassRegModel):
"""
acronym = "XGB"
+ handles_missing = False
needs_scaling = True
accepts_sparse = True
native_multilabel = False
native_multioutput = False
- has_validation = "n_estimators"
+ validation = "n_estimators"
supports_engines = ("xgboost",)
_module = "xgboost"
@@ -3174,8 +3205,8 @@ def _fit_estimator(
# Add the pruned step to the output
step = str(ex).split(" ")[-1][:-1]
- steps = estimator.get_params()[self.has_validation]
- trial.params[self.has_validation] = f"{step}/{steps}"
+ steps = estimator.get_params()[self.validation]
+ trial.params[self.validation] = f"{step}/{steps}"
trial.set_user_attr("estimator", estimator)
raise ex
diff --git a/atom/models/custom.py b/atom/models/custom.py
index 85bd79b72..4697f1d7f 100644
--- a/atom/models/custom.py
+++ b/atom/models/custom.py
@@ -41,10 +41,11 @@ def __init__(self, **kwargs):
f"the model's acronym."
)
+ self.handles_missing = getattr(est, "handles_missing", False)
self.needs_scaling = getattr(est, "needs_scaling", False)
self.native_multilabel = getattr(est, "native_multilabel", False)
self.native_multioutput = getattr(est, "native_multioutput", False)
- self.has_validation = getattr(est, "has_validation", None)
+ self.validation = getattr(est, "validation", None)
super().__init__(name=name, **kwargs)
diff --git a/atom/models/ensembles.py b/atom/models/ensembles.py
index 5c8e023dc..29224d57b 100644
--- a/atom/models/ensembles.py
+++ b/atom/models/ensembles.py
@@ -28,8 +28,9 @@ class Stacking(ClassRegModel):
"""
acronym = "Stack"
+ handles_missing = False
needs_scaling = False
- has_validation = None
+ validation = None
native_multilabel = False
native_multioutput = False
supports_engines = ()
@@ -83,8 +84,9 @@ class Voting(ClassRegModel):
"""
acronym = "Vote"
+ handles_missing = False
needs_scaling = False
- has_validation = None
+ validation = None
native_multilabel = False
native_multioutput = False
supports_engines = ()
diff --git a/atom/models/ts.py b/atom/models/ts.py
index 2e732e7ab..b10fe99cc 100644
--- a/atom/models/ts.py
+++ b/atom/models/ts.py
@@ -11,10 +11,12 @@
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution as Cat
+from optuna.distributions import FloatDistribution as Float
from optuna.distributions import IntDistribution as Int
from optuna.trial import Trial
from atom.basemodel import ForecastModel
+from atom.utils.types import Predictor
class ARIMA(ForecastModel):
@@ -23,7 +25,7 @@ class ARIMA(ForecastModel):
Seasonal ARIMA models and exogenous input is supported, hence this
estimator is capable of fitting SARIMA, ARIMAX, and SARIMAX.
- An ARIMA model, is a generalization of an autoregressive moving
+ An ARIMA model is a generalization of an autoregressive moving
average (ARMA) model, and is fitted to time-series data in an effort
to forecast future points. ARIMA models can be especially
efficacious in cases where data shows evidence of non-stationarity.
@@ -72,18 +74,17 @@ class ARIMA(ForecastModel):
"""
acronym = "ARIMA"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = True
+ uses_exogenous = True
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.arima"
_estimators: ClassVar[dict[str, str]] = {"forecast": "ARIMA"}
_order = ("p", "d", "q")
- _sorder = ("P", "D", "Q", "S")
+ _seasonal_order = ("P", "D", "Q", "S")
def _get_parameters(self, trial: Trial) -> dict[str, BaseDistribution]:
"""Get the trial's hyperparameters.
@@ -103,7 +104,7 @@ def _get_parameters(self, trial: Trial) -> dict[str, BaseDistribution]:
# If no seasonal periodicity, set seasonal components to zero
if self._get_param("S", params) == 0:
- for p in self._sorder:
+ for p in self._seasonal_order:
if p in params:
params[p] = 0
@@ -128,8 +129,8 @@ def _trial_to_est(self, params: dict[str, Any]) -> dict[str, Any]:
# Convert params to hyperparameters 'order' and 'seasonal_order'
if all(p in params for p in self._order):
params["order"] = tuple(params.pop(p) for p in self._order)
- if all(p in params for p in self._sorder):
- params["seasonal_order"] = tuple(params.pop(p) for p in self._sorder)
+ if all(p in params for p in self._seasonal_order):
+ params["seasonal_order"] = tuple(params.pop(p) for p in self._seasonal_order)
return params
@@ -162,7 +163,7 @@ def _get_distributions(self) -> dict[str, BaseDistribution]:
for p in self._order:
dist.pop(p)
if "seasonal_order" in self._est_params:
- for p in self._sorder:
+ for p in self._seasonal_order:
dist.pop(p)
return dist
@@ -214,11 +215,10 @@ class AutoARIMA(ForecastModel):
"""
acronym = "AutoARIMA"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = True
+ uses_exogenous = True
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.arima"
@@ -243,12 +243,161 @@ def _get_distributions() -> dict[str, BaseDistribution]:
}
-class ExponentialSmoothing(ForecastModel):
- """Exponential Smoothing forecaster.
+class BATS(ForecastModel):
+ """BATS forecaster with multiple seasonality.
+
+ BATS is acronym for:
+
+ - Box-Cox transformation
+ - ARMA errors
+ - Trend
+ - Seasonal components
- Holt-Winters exponential smoothing forecaster. The default settings
- use simple exponential smoothing, without trend and seasonality
- components.
+ BATS was designed to forecast time series with multiple seasonal
+ periods. For example, daily data may have a weekly pattern as well
+ as an annual pattern. Or hourly data can have three seasonal periods:
+ a daily pattern, a weekly pattern, and an annual pattern.
+
+ In BATS, a [Box-Cox transformation][boxcox] is applied to the
+ original time series, and then this is modeled as a linear
+ combination of an exponentially smoothed trend, a seasonal
+ component and an ARMA component. BATS conducts some hyperparameter
+ tuning (e.g., which of these components to keep and which to discard)
+ using AIC.
+
+ Corresponding estimators are:
+
+ - [BATS][batsclass] for forecasting tasks.
+
+ See Also
+ --------
+ atom.models:ARIMA
+ atom.models:AutoARIMA
+ atom.models:TBATS
+
+ Examples
+ --------
+ ```pycon
+ from atom import ATOMForecaster
+ from sktime.datasets import load_airline
+
+ y = load_airline()
+
+ atom = ATOMForecaster(y, random_state=1)
+ atom.run(models="BATS", verbose=2)
+ ```
+
+ """
+
+ acronym = "BATS"
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
+ supports_engines = ("sktime",)
+
+ _module = "sktime.forecasting.bats"
+ _estimators: ClassVar[dict[str, str]] = {"forecast": "BATS"}
+
+ def _get_est(self, params: dict[str, Any]) -> Predictor:
+ """Get the model's estimator with unpacked parameters.
+
+ Parameters
+ ----------
+ params: dict
+ Hyperparameters for the estimator.
+
+ Returns
+ -------
+ Predictor
+ Estimator instance.
+
+ """
+ return self._est_class(
+ show_warnings=params.pop("show_warnings", self.warnings in ("always", "default")),
+ n_jobs=params.pop("n_jobs", self.n_jobs),
+ **params,
+ )
+
+ @staticmethod
+ def _get_distributions() -> dict[str, BaseDistribution]:
+ """Get the predefined hyperparameter distributions.
+
+ Returns
+ -------
+ dict
+ Hyperparameter distributions.
+
+ """
+ return {
+ "use_box_cox": Cat([True, False, None]),
+ "use_trend": Cat([True, False, None]),
+ "use_damped_trend": Cat([True, False, None]),
+ "use_arma_errors": Cat([True, False]),
+ }
+
+
+class Croston(ForecastModel):
+ """Croston's method for forecasting.
+
+ Croston's method is a modification of (vanilla) exponential
+ smoothing to handle intermittent time series. A time series is
+ considered intermittent if many of its values are zero and the
+ gaps between non-zero entries are not periodic.
+
+ Croston's method will predict a constant value for all future
+ times, so Croston's method essentially provides another notion
+ for the average value of a time series.
+
+ Corresponding estimators are:
+
+ - [Croston][crostonclass] for forecasting tasks.
+
+ See Also
+ --------
+ atom.models:ExponentialSmoothing
+ atom.models:ETS
+ atom.models:NaiveForecaster
+
+ Examples
+ --------
+ ```pycon
+ from atom import ATOMForecaster
+ from sktime.datasets import load_airline
+
+ y = load_airline()
+
+ atom = ATOMForecaster(y, random_state=1)
+ atom.run(models="Croston", verbose=2)
+ ```
+
+ """
+
+ acronym = "Croston"
+ handles_missing = False
+ uses_exogenous = True
+ in_sample_prediction = True
+ native_multivariate = False
+ supports_engines = ("sktime",)
+
+ _module = "sktime.forecasting.croston"
+ _estimators: ClassVar[dict[str, str]] = {"forecast": "Croston"}
+
+ @staticmethod
+ def _get_distributions() -> dict[str, BaseDistribution]:
+ """Get the predefined hyperparameter distributions.
+
+ Returns
+ -------
+ dict
+ Hyperparameter distributions.
+
+ """
+ return {"smoothing": Float(0, 1, step=0.1)}
+
+
+class ExponentialSmoothing(ForecastModel):
+ """Holt-Winters Exponential Smoothing forecaster.
Corresponding estimators are:
@@ -275,16 +424,36 @@ class ExponentialSmoothing(ForecastModel):
"""
acronym = "ES"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.exp_smoothing"
_estimators: ClassVar[dict[str, str]] = {"forecast": "ExponentialSmoothing"}
+ def _get_parameters(self, trial: Trial) -> dict:
+ """Get the trial's hyperparameters.
+
+ Parameters
+ ----------
+ trial: [Trial][]
+ Current trial.
+
+ Returns
+ -------
+ dict
+ Trial's hyperparameters.
+
+ """
+ params = super()._get_parameters(trial)
+
+ if not self._get_param("trend", params) and "damped_trend" in params:
+ params["damped_trend"] = False
+
+ return params
+
@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
"""Get the predefined hyperparameter distributions.
@@ -295,8 +464,6 @@ def _get_distributions() -> dict[str, BaseDistribution]:
Hyperparameter distributions.
"""
- methods = ["L-BFGS-B", "TNC", "SLSQP", "Powell", "trust-constr", "bh", "ls"]
-
return {
"trend": Cat(["add", "mul", None]),
"damped_trend": Cat([True, False]),
@@ -304,7 +471,8 @@ def _get_distributions() -> dict[str, BaseDistribution]:
"sp": Cat([4, 6, 7, 12, None]),
"use_boxcox": Cat([True, False]),
"initialization_method": Cat(["estimated", "heuristic"]),
- "method": Cat(methods),
+ "method": Cat(["L-BFGS-B", "TNC", "SLSQP", "Powell", "trust-constr", "bh", "ls"]),
+ "use_brute": Cat([True, False]),
}
@@ -342,16 +510,36 @@ class ETS(ForecastModel):
"""
acronym = "ETS"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = True
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.ets"
_estimators: ClassVar[dict[str, str]] = {"forecast": "AutoETS"}
+ def _get_parameters(self, trial: Trial) -> dict:
+ """Get the trial's hyperparameters.
+
+ Parameters
+ ----------
+ trial: [Trial][]
+ Current trial.
+
+ Returns
+ -------
+ dict
+ Trial's hyperparameters.
+
+ """
+ params = super()._get_parameters(trial)
+
+ if not self._get_param("trend", params) and "damped_trend" in params:
+ params["damped_trend"] = False
+
+ return params
+
@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
"""Get the predefined hyperparameter distributions.
@@ -372,6 +560,10 @@ def _get_distributions() -> dict[str, BaseDistribution]:
"maxiter": Int(500, 2000, step=100),
"auto": Cat([True, False]),
"information_criterion": Cat(["aic", "bic", "aicc"]),
+ "allow_multiplicative_trend": Cat([True, False]),
+ "restrict": Cat([True, False]),
+ "additive_only": Cat([True, False]),
+ "ignore_inf_ic": Cat([True, False]),
}
@@ -409,11 +601,10 @@ class NaiveForecaster(ForecastModel):
"""
acronym = "NF"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = True
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.naive"
@@ -464,11 +655,10 @@ class PolynomialTrend(ForecastModel):
"""
acronym = "PT"
- needs_scaling = False
- accepts_sparse = False
- native_multilabel = False
- native_multioutput = True
- has_validation = None
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
supports_engines = ("sktime",)
_module = "sktime.forecasting.trend"
@@ -488,3 +678,221 @@ def _get_distributions() -> dict[str, BaseDistribution]:
"degree": Int(1, 5),
"with_intercept": Cat([True, False]),
}
+
+
+class STL(ForecastModel):
+ """Seasonal-Trend decomposition using Loess.
+
+ STL is a technique commonly used for decomposing time series data
+ into components like trend, seasonality, and residuals.
+
+ Corresponding estimators are:
+
+ - [STLForecaster][] for forecasting tasks.
+
+ See Also
+ --------
+ atom.models:Croston
+ atom.models:ETS
+ atom.models:Theta
+
+ Examples
+ --------
+ ```pycon
+ from atom import ATOMForecaster
+ from sktime.datasets import load_airline
+
+ y = load_airline()
+
+ atom = ATOMForecaster(y, random_state=1)
+ atom.run(models="STL", verbose=2)
+ ```
+
+ """
+
+ acronym = "STL"
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
+ supports_engines = ("sktime",)
+
+ _module = "sktime.forecasting.trend"
+ _estimators: ClassVar[dict[str, str]] = {"forecast": "STLForecaster"}
+
+ @staticmethod
+ def _get_distributions() -> dict[str, BaseDistribution]:
+ """Get the predefined hyperparameter distributions.
+
+ Returns
+ -------
+ dict
+ Hyperparameter distributions.
+
+ """
+ return {
+ "seasonal": Int(3, 11, step=2),
+ "seasonal_deg": Cat([0, 1]),
+ "trend_deg": Cat([0, 1]),
+ "low_pass_deg": Cat([0, 1]),
+ "robust": Cat([True, False]),
+ }
+
+
+class TBATS(ForecastModel):
+ """TBATS forecaster with multiple seasonality.
+
+ TBATS is acronym for:
+
+ - Trigonometric seasonality
+ - Box-Cox transformation
+ - ARMA errors
+ - Trend
+ - Seasonal components
+
+ TBATS was designed to forecast time series with multiple seasonal
+ periods. For example, daily data may have a weekly pattern as well
+ as an annual pattern. Or hourly data can have three seasonal periods:
+ a daily pattern, a weekly pattern, and an annual pattern.
+
+ In BATS, a [Box-Cox transformation][boxcox] is applied to the
+ original time series, and then this is modeled as a linear
+ combination of an exponentially smoothed trend, a seasonal
+ component and an ARMA component. The seasonal components are
+ modeled by trigonometric functions via Fourier series. TBATS
+ conducts some hyper-parameter tuning (e.g. which of these
+ components to keep and which to discard) using AIC.
+
+ Corresponding estimators are:
+
+ - [TBATS][tbatsclass] for forecasting tasks.
+
+ See Also
+ --------
+ atom.models:BATS
+ atom.models:ARIMA
+ atom.models:AutoARIMA
+
+ Examples
+ --------
+ ```pycon
+ from atom import ATOMForecaster
+ from sktime.datasets import load_airline
+
+ y = load_airline()
+
+ atom = ATOMForecaster(y, random_state=1)
+ atom.run(models="TBATS", verbose=2)
+ ```
+
+ """
+
+ acronym = "TBATS"
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
+ supports_engines = ("sktime",)
+
+ _module = "sktime.forecasting.tbats"
+ _estimators: ClassVar[dict[str, str]] = {"forecast": "TBATS"}
+
+ def _get_est(self, params: dict[str, Any]) -> Predictor:
+ """Get the model's estimator with unpacked parameters.
+
+ Parameters
+ ----------
+ params: dict
+ Hyperparameters for the estimator.
+
+ Returns
+ -------
+ Predictor
+ Estimator instance.
+
+ """
+ return self._est_class(
+ show_warnings=params.pop("show_warnings", self.warnings in ("always", "default")),
+ n_jobs=params.pop("n_jobs", self.n_jobs),
+ **params,
+ )
+
+ @staticmethod
+ def _get_distributions() -> dict[str, BaseDistribution]:
+ """Get the predefined hyperparameter distributions.
+
+ Returns
+ -------
+ dict
+ Hyperparameter distributions.
+
+ """
+ return {
+ "use_box_cox": Cat([True, False, None]),
+ "use_trend": Cat([True, False, None]),
+ "use_damped_trend": Cat([True, False, None]),
+ "use_arma_errors": Cat([True, False]),
+ }
+
+
+class Theta(ForecastModel):
+ """Theta method for forecasting.
+
+ The theta method is equivalent to simple [ExponentialSmoothing][]
+ with drift. The series is tested for seasonality, and, if deemed
+ seasonal, the series is seasonally adjusted using a classical
+ multiplicative decomposition before applying the theta method. The
+ resulting forecasts are then reseasonalised.
+
+ In cases where ES results in a constant forecast, the theta
+ forecaster will revert to predicting the SES constant plus a linear
+ trend derived from the training data.
+
+ Prediction intervals are computed using the underlying state space
+ model.
+
+ Corresponding estimators are:
+
+ - [ThetaForecaster][] for forecasting tasks.
+
+ See Also
+ --------
+ atom.models:Croston
+ atom.models:ExponentialSmoothing
+ atom.models:PolynomialTrend
+
+ Examples
+ --------
+ ```pycon
+ from atom import ATOMForecaster
+ from sktime.datasets import load_airline
+
+ y = load_airline()
+
+ atom = ATOMForecaster(y, random_state=1)
+ atom.run(models="Theta", verbose=2)
+ ```
+
+ """
+
+ acronym = "Theta"
+ handles_missing = False
+ uses_exogenous = False
+ in_sample_prediction = True
+ native_multivariate = False
+ supports_engines = ("sktime",)
+
+ _module = "sktime.forecasting.theta"
+ _estimators: ClassVar[dict[str, str]] = {"forecast": "ThetaForecaster"}
+
+ @staticmethod
+ def _get_distributions() -> dict[str, BaseDistribution]:
+ """Get the predefined hyperparameter distributions.
+
+ Returns
+ -------
+ dict
+ Hyperparameter distributions.
+
+ """
+ return {"deseasonalize": Cat([False, True])}
diff --git a/atom/nlp.py b/atom/nlp.py
index 28ab2e61f..418b6279c 100644
--- a/atom/nlp.py
+++ b/atom/nlp.py
@@ -374,10 +374,10 @@ class TextNormalizer(TransformerMixin):
Attributes
----------
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -606,10 +606,10 @@ class Tokenizer(TransformerMixin):
Created quadgrams and their frequencies.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
--------
@@ -808,21 +808,22 @@ class Vectorizer(TransformerMixin):
`#!python device="gpu"` to use the GPU. Read more in the
[user guide][gpu-acceleration].
- engine: dict, default={"data": "numpy", "estimator": "sklearn"}
+ engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
- corresponding choice as values. Choose from:
+ corresponding choice as values. If None, the default values
+ are used.Choose from:
- "data":
- - "numpy"
+ - "numpy" (default)
- "pyarrow"
- "modin"
- "estimator":
- - "sklearn"
+ - "sklearn" (default)
- "cuml"
verbose: int, default=0
@@ -847,10 +848,10 @@ class Vectorizer(TransformerMixin):
corpus, e.g., `vectorizer.tfidf` for the tfidf strategy.
feature_names_in_: np.ndarray
- Names of features seen during fit.
+ Names of features seen during `fit`.
n_features_in_: int
- Number of features seen during fit.
+ Number of features seen during `fit`.
See Also
diff --git a/atom/plots/dataplot.py b/atom/plots/dataplot.py
index e221e0eaf..a05983456 100644
--- a/atom/plots/dataplot.py
+++ b/atom/plots/dataplot.py
@@ -652,7 +652,7 @@ def get_text(column: Series) -> Series:
fig.add_trace(
go.Bar(
- x=(data := series[-self._get_show(show, len(series)) :]),
+ x=(data := series[-self._get_show(show, len(series)):]),
y=data.index,
orientation="h",
marker={
diff --git a/atom/utils/types.py b/atom/utils/types.py
index 4b0a95d16..a606fb7c1 100644
--- a/atom/utils/types.py
+++ b/atom/utils/types.py
@@ -159,14 +159,6 @@ def predict(self, *args, **kwargs) -> Pandas: ...
Scalar: TypeAlias = Int | Float
Segment: TypeAlias = range | slice
Index: TypeAlias = pd.Index | md.Index
-TSIndex: TypeAlias = (
- pd.PeriodIndex
- | md.PeriodIndex
- | pd.DatetimeIndex
- | md.DatetimeIndex
- | pd.TimedeltaIndex
- | md.TimedeltaIndex
-)
Series: TypeAlias = pd.Series | md.Series
DataFrame: TypeAlias = pd.DataFrame | md.DataFrame
Pandas: TypeAlias = Series | DataFrame
@@ -262,6 +254,7 @@ def predict(self, *args, **kwargs) -> Pandas: ...
| dict[str, IntLargerEqualZero]
| Sequence[IntLargerEqualZero]
)
+HarmonicsSelector: TypeAlias = Literal["drop", "raw_strength", "harmonic_strength"]
# Allowed values for method selection
PredictionMethods: TypeAlias = Literal[
@@ -310,7 +303,6 @@ def predict(self, *args, **kwargs) -> Pandas: ...
float_t = (float, np.floating)
segment_t = (slice, range)
index_t = (pd.Index, md.Index)
-tsindex_t = TSIndex.__args__
series_t = (pd.Series, md.Series)
sequence_t = (range, list, tuple, np.ndarray, *index_t, *series_t)
dataframe_t = (pd.DataFrame, md.DataFrame)
diff --git a/atom/utils/utils.py b/atom/utils/utils.py
index bbdbbc4d4..c2713de39 100644
--- a/atom/utils/utils.py
+++ b/atom/utils/utils.py
@@ -16,7 +16,7 @@
from contextlib import contextmanager
from copy import copy
from dataclasses import dataclass
-from enum import Enum
+from enum import Enum, IntEnum
from functools import cached_property, wraps
from importlib import import_module
from importlib.util import find_spec
@@ -178,6 +178,29 @@ def is_multioutput(self) -> bool:
return self.value in (2, 3, 5, 7)
+class SeasonalPeriod(IntEnum):
+ """Seasonal periodicity.
+
+ Covers pandas' aliases for periods.
+ See: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#period-aliases
+
+ """
+
+ B = 5 # business day
+ D = 7 # calendar day
+ W = 52 # week
+ M = 12 # month
+ Q = 4 # quarter
+ A = 1 # year
+ Y = 1 # year
+ H = 24 # hours
+ T = 60 # minutes
+ S = 60 # seconds
+ L = 1e3 # milliseconds
+ U = 1e6 # microseconds
+ N = 1e9 # nanoseconds
+
+
@dataclass
class DataContainer:
"""Stores a branch's data."""
@@ -222,6 +245,7 @@ class DataConfig:
index: IndexSelector = True
ignore: tuple[str, ...] = ()
+ sp: int | list[int] | None = None
shuffle: Bool = False
stratify: IndexSelector = True
n_rows: Scalar = 1
diff --git a/docs_sources/api/models/adab.md b/docs_sources/api/models/adab.md
index aac1b7ca9..ceeefc253 100644
--- a/docs_sources/api/models/adab.md
+++ b/docs_sources/api/models/adab.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/ard.md b/docs_sources/api/models/ard.md
index 5c1ae30c5..b2b7846a8 100644
--- a/docs_sources/api/models/ard.md
+++ b/docs_sources/api/models/ard.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/arima.md b/docs_sources/api/models/arima.md
index bd7d0a355..323bb3cce 100644
--- a/docs_sources/api/models/arima.md
+++ b/docs_sources/api/models/arima.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/autoarima.md b/docs_sources/api/models/autoarima.md
index 6a2cc5d3a..31ae376db 100644
--- a/docs_sources/api/models/autoarima.md
+++ b/docs_sources/api/models/autoarima.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/bag.md b/docs_sources/api/models/bag.md
index 02820e833..03713c540 100644
--- a/docs_sources/api/models/bag.md
+++ b/docs_sources/api/models/bag.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/bats.md b/docs_sources/api/models/bats.md
new file mode 100644
index 000000000..15ce1fed2
--- /dev/null
+++ b/docs_sources/api/models/bats.md
@@ -0,0 +1,80 @@
+# BATS
+------
+
+:: atom.models:BATS
+ :: tags
+ :: description
+ :: see also
+
+
+
+## Example
+
+:: examples
+
+
+
+## Hyperparameters
+
+:: hyperparameters
+
+
+
+## Attributes
+
+### Data attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - pipeline
+ - atom.branch:Branch.mapping
+ - dataset
+ - train
+ - test
+ - X
+ - y
+ - X_train
+ - y_train
+ - X_test
+ - atom.branch:Branch.y_test
+ - X_holdout
+ - y_holdout
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
+ - atom.branch:Branch.target
+
+
+
+### Utility attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - name
+ - run
+ - study
+ - trials
+ - best_trial
+ - best_params
+ - estimator
+ - bootstrap
+ - results
+ - feature_importance
+
+
+
+## Methods
+
+The [plots][available-plots] can be called directly from the model.
+The remaining utility methods can be found hereunder.
+
+:: methods:
+ toc_only: False
+ exclude:
+ - plot_.*
diff --git a/docs_sources/api/models/bnb.md b/docs_sources/api/models/bnb.md
index ddad46cef..0082f8ea0 100644
--- a/docs_sources/api/models/bnb.md
+++ b/docs_sources/api/models/bnb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/br.md b/docs_sources/api/models/br.md
index e14cf12e0..780b024f8 100644
--- a/docs_sources/api/models/br.md
+++ b/docs_sources/api/models/br.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/catb.md b/docs_sources/api/models/catb.md
index 1ffc2d37a..c230c61e2 100644
--- a/docs_sources/api/models/catb.md
+++ b/docs_sources/api/models/catb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/catnb.md b/docs_sources/api/models/catnb.md
index 421703f2a..f00783e7b 100644
--- a/docs_sources/api/models/catnb.md
+++ b/docs_sources/api/models/catnb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/cnb.md b/docs_sources/api/models/cnb.md
index 20a45e626..33cb1ca65 100644
--- a/docs_sources/api/models/cnb.md
+++ b/docs_sources/api/models/cnb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/croston.md b/docs_sources/api/models/croston.md
new file mode 100644
index 000000000..01afd1f9b
--- /dev/null
+++ b/docs_sources/api/models/croston.md
@@ -0,0 +1,80 @@
+# Croston
+---------
+
+:: atom.models:Croston
+ :: tags
+ :: description
+ :: see also
+
+
+
+## Example
+
+:: examples
+
+
+
+## Hyperparameters
+
+:: hyperparameters
+
+
+
+## Attributes
+
+### Data attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - pipeline
+ - atom.branch:Branch.mapping
+ - dataset
+ - train
+ - test
+ - X
+ - y
+ - X_train
+ - y_train
+ - X_test
+ - atom.branch:Branch.y_test
+ - X_holdout
+ - y_holdout
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
+ - atom.branch:Branch.target
+
+
+
+### Utility attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - name
+ - run
+ - study
+ - trials
+ - best_trial
+ - best_params
+ - estimator
+ - bootstrap
+ - results
+ - feature_importance
+
+
+
+## Methods
+
+The [plots][available-plots] can be called directly from the model.
+The remaining utility methods can be found hereunder.
+
+:: methods:
+ toc_only: False
+ exclude:
+ - plot_.*
diff --git a/docs_sources/api/models/dummy.md b/docs_sources/api/models/dummy.md
index d0204e44e..d190ad628 100644
--- a/docs_sources/api/models/dummy.md
+++ b/docs_sources/api/models/dummy.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/en.md b/docs_sources/api/models/en.md
index aecaa94e5..efe731da3 100644
--- a/docs_sources/api/models/en.md
+++ b/docs_sources/api/models/en.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/es.md b/docs_sources/api/models/es.md
index 3839dce5c..73fa78a15 100644
--- a/docs_sources/api/models/es.md
+++ b/docs_sources/api/models/es.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/et.md b/docs_sources/api/models/et.md
index 8ecb0dd85..4528143f5 100644
--- a/docs_sources/api/models/et.md
+++ b/docs_sources/api/models/et.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/etree.md b/docs_sources/api/models/etree.md
index 0b99c0ea3..66b432072 100644
--- a/docs_sources/api/models/etree.md
+++ b/docs_sources/api/models/etree.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/ets.md b/docs_sources/api/models/ets.md
index b918f5f0c..9da10b282 100644
--- a/docs_sources/api/models/ets.md
+++ b/docs_sources/api/models/ets.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/gbm.md b/docs_sources/api/models/gbm.md
index 8d7de985b..0e82b1d69 100644
--- a/docs_sources/api/models/gbm.md
+++ b/docs_sources/api/models/gbm.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/gnb.md b/docs_sources/api/models/gnb.md
index 9602f5667..3c30521a9 100644
--- a/docs_sources/api/models/gnb.md
+++ b/docs_sources/api/models/gnb.md
@@ -35,11 +35,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/gp.md b/docs_sources/api/models/gp.md
index 8d25d5fe1..3deec170e 100644
--- a/docs_sources/api/models/gp.md
+++ b/docs_sources/api/models/gp.md
@@ -35,11 +35,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/hgbm.md b/docs_sources/api/models/hgbm.md
index 6e62c56b6..629b487cd 100644
--- a/docs_sources/api/models/hgbm.md
+++ b/docs_sources/api/models/hgbm.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/huber.md b/docs_sources/api/models/huber.md
index eef82d673..08591ce80 100644
--- a/docs_sources/api/models/huber.md
+++ b/docs_sources/api/models/huber.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/knn.md b/docs_sources/api/models/knn.md
index ec9a370b9..9f6e2cd41 100644
--- a/docs_sources/api/models/knn.md
+++ b/docs_sources/api/models/knn.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lars.md b/docs_sources/api/models/lars.md
index f47b0f5f8..fa75faaa0 100644
--- a/docs_sources/api/models/lars.md
+++ b/docs_sources/api/models/lars.md
@@ -35,11 +35,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lasso.md b/docs_sources/api/models/lasso.md
index 951d50960..61c2811f5 100644
--- a/docs_sources/api/models/lasso.md
+++ b/docs_sources/api/models/lasso.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lda.md b/docs_sources/api/models/lda.md
index adebdf66d..72e7b093f 100644
--- a/docs_sources/api/models/lda.md
+++ b/docs_sources/api/models/lda.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lgb.md b/docs_sources/api/models/lgb.md
index 1d3f93fb5..601cebcdf 100644
--- a/docs_sources/api/models/lgb.md
+++ b/docs_sources/api/models/lgb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lr.md b/docs_sources/api/models/lr.md
index aae69d158..8dfd23c47 100644
--- a/docs_sources/api/models/lr.md
+++ b/docs_sources/api/models/lr.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/lsvm.md b/docs_sources/api/models/lsvm.md
index 27552d3d6..5f3861385 100644
--- a/docs_sources/api/models/lsvm.md
+++ b/docs_sources/api/models/lsvm.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/mlp.md b/docs_sources/api/models/mlp.md
index 433205d05..4dc28b52e 100644
--- a/docs_sources/api/models/mlp.md
+++ b/docs_sources/api/models/mlp.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/mnb.md b/docs_sources/api/models/mnb.md
index 6d9d56c46..fdc01fa21 100644
--- a/docs_sources/api/models/mnb.md
+++ b/docs_sources/api/models/mnb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/nf.md b/docs_sources/api/models/nf.md
index c4bfb45d9..12c8ed8a6 100644
--- a/docs_sources/api/models/nf.md
+++ b/docs_sources/api/models/nf.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/ols.md b/docs_sources/api/models/ols.md
index 03e7f8a34..b5089aa8e 100644
--- a/docs_sources/api/models/ols.md
+++ b/docs_sources/api/models/ols.md
@@ -35,11 +35,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/omp.md b/docs_sources/api/models/omp.md
index 00c396dfb..bc040d7f0 100644
--- a/docs_sources/api/models/omp.md
+++ b/docs_sources/api/models/omp.md
@@ -35,11 +35,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/pa.md b/docs_sources/api/models/pa.md
index a5391c3b1..3115976ff 100644
--- a/docs_sources/api/models/pa.md
+++ b/docs_sources/api/models/pa.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/perc.md b/docs_sources/api/models/perc.md
index ea705bc68..673a89eed 100644
--- a/docs_sources/api/models/perc.md
+++ b/docs_sources/api/models/perc.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/pt.md b/docs_sources/api/models/pt.md
index 4cdbe2d53..4f96e83a4 100644
--- a/docs_sources/api/models/pt.md
+++ b/docs_sources/api/models/pt.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/qda.md b/docs_sources/api/models/qda.md
index d6a0c98ea..01e37edc8 100644
--- a/docs_sources/api/models/qda.md
+++ b/docs_sources/api/models/qda.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/rf.md b/docs_sources/api/models/rf.md
index ecd381f80..3372776b6 100644
--- a/docs_sources/api/models/rf.md
+++ b/docs_sources/api/models/rf.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/ridge.md b/docs_sources/api/models/ridge.md
index f856522c1..d605b7b40 100644
--- a/docs_sources/api/models/ridge.md
+++ b/docs_sources/api/models/ridge.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/rnn.md b/docs_sources/api/models/rnn.md
index d0a41a0eb..5c13d45cb 100644
--- a/docs_sources/api/models/rnn.md
+++ b/docs_sources/api/models/rnn.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/sgd.md b/docs_sources/api/models/sgd.md
index 317c28060..28a514681 100644
--- a/docs_sources/api/models/sgd.md
+++ b/docs_sources/api/models/sgd.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/stl.md b/docs_sources/api/models/stl.md
new file mode 100644
index 000000000..82477bbcd
--- /dev/null
+++ b/docs_sources/api/models/stl.md
@@ -0,0 +1,80 @@
+# STL
+-----
+
+:: atom.models:STL
+ :: tags
+ :: description
+ :: see also
+
+
+
+## Example
+
+:: examples
+
+
+
+## Hyperparameters
+
+:: hyperparameters
+
+
+
+## Attributes
+
+### Data attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - pipeline
+ - atom.branch:Branch.mapping
+ - dataset
+ - train
+ - test
+ - X
+ - y
+ - X_train
+ - y_train
+ - X_test
+ - atom.branch:Branch.y_test
+ - X_holdout
+ - y_holdout
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
+ - atom.branch:Branch.target
+
+
+
+### Utility attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - name
+ - run
+ - study
+ - trials
+ - best_trial
+ - best_params
+ - estimator
+ - bootstrap
+ - results
+ - feature_importance
+
+
+
+## Methods
+
+The [plots][available-plots] can be called directly from the model.
+The remaining utility methods can be found hereunder.
+
+:: methods:
+ toc_only: False
+ exclude:
+ - plot_.*
diff --git a/docs_sources/api/models/svm.md b/docs_sources/api/models/svm.md
index 6db11ab25..e34159225 100644
--- a/docs_sources/api/models/svm.md
+++ b/docs_sources/api/models/svm.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/tbats.md b/docs_sources/api/models/tbats.md
new file mode 100644
index 000000000..b5759ca48
--- /dev/null
+++ b/docs_sources/api/models/tbats.md
@@ -0,0 +1,80 @@
+# TBATS
+-------
+
+:: atom.models:TBATS
+ :: tags
+ :: description
+ :: see also
+
+
+
+## Example
+
+:: examples
+
+
+
+## Hyperparameters
+
+:: hyperparameters
+
+
+
+## Attributes
+
+### Data attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - pipeline
+ - atom.branch:Branch.mapping
+ - dataset
+ - train
+ - test
+ - X
+ - y
+ - X_train
+ - y_train
+ - X_test
+ - atom.branch:Branch.y_test
+ - X_holdout
+ - y_holdout
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
+ - atom.branch:Branch.target
+
+
+
+### Utility attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - name
+ - run
+ - study
+ - trials
+ - best_trial
+ - best_params
+ - estimator
+ - bootstrap
+ - results
+ - feature_importance
+
+
+
+## Methods
+
+The [plots][available-plots] can be called directly from the model.
+The remaining utility methods can be found hereunder.
+
+:: methods:
+ toc_only: False
+ exclude:
+ - plot_.*
diff --git a/docs_sources/api/models/theta.md b/docs_sources/api/models/theta.md
new file mode 100644
index 000000000..906667ec2
--- /dev/null
+++ b/docs_sources/api/models/theta.md
@@ -0,0 +1,80 @@
+# Theta
+-------
+
+:: atom.models:Theta
+ :: tags
+ :: description
+ :: see also
+
+
+
+## Example
+
+:: examples
+
+
+
+## Hyperparameters
+
+:: hyperparameters
+
+
+
+## Attributes
+
+### Data attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - pipeline
+ - atom.branch:Branch.mapping
+ - dataset
+ - train
+ - test
+ - X
+ - y
+ - X_train
+ - y_train
+ - X_test
+ - atom.branch:Branch.y_test
+ - X_holdout
+ - y_holdout
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
+ - atom.branch:Branch.target
+
+
+
+### Utility attributes
+
+:: table:
+ - attributes:
+ from_docstring: False
+ include:
+ - name
+ - run
+ - study
+ - trials
+ - best_trial
+ - best_params
+ - estimator
+ - bootstrap
+ - results
+ - feature_importance
+
+
+
+## Methods
+
+The [plots][available-plots] can be called directly from the model.
+The remaining utility methods can be found hereunder.
+
+:: methods:
+ toc_only: False
+ exclude:
+ - plot_.*
diff --git a/docs_sources/api/models/tree.md b/docs_sources/api/models/tree.md
index 730ed02f4..a59618bd8 100644
--- a/docs_sources/api/models/tree.md
+++ b/docs_sources/api/models/tree.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/api/models/xgb.md b/docs_sources/api/models/xgb.md
index 0021f5c82..532b077b3 100644
--- a/docs_sources/api/models/xgb.md
+++ b/docs_sources/api/models/xgb.md
@@ -41,11 +41,11 @@
- atom.branch:Branch.y_test
- X_holdout
- y_holdout
- - atom.branch:Branch.shape
- - atom.branch:Branch.columns
- - atom.branch:Branch.n_columns
- - atom.branch:Branch.features
- - atom.branch:Branch.n_features
+ - shape
+ - columns
+ - n_columns
+ - features
+ - n_features
- atom.branch:Branch.target
diff --git a/docs_sources/contributing.md b/docs_sources/contributing.md
index 6894372fe..108de5968 100644
--- a/docs_sources/contributing.md
+++ b/docs_sources/contributing.md
@@ -91,7 +91,7 @@ and accept your changes.
* Update the documentation so all of your changes are reflected there.
* Adhere to [PEP 8](https://peps.python.org/pep-0008/) standards.
* Use a maximum of 99 characters per line. Try to keep docstrings below
- 74 characters.
+ 80 characters.
* Update the project unit tests to test your code changes as thoroughly
as possible.
* Make sure that your code is properly commented with docstrings and
diff --git a/docs_sources/dependencies.md b/docs_sources/dependencies.md
index fde131cb7..2a4b46020 100644
--- a/docs_sources/dependencies.md
+++ b/docs_sources/dependencies.md
@@ -66,6 +66,7 @@ additional libraries. You can install all the optional dependencies using
* **[pmdarima](http://alkaline-ml.com/pmdarima/)** (>=2.0.3)
* **[schemdraw](https://schemdraw.readthedocs.io/en/latest/index.html)** (>=0.16)
* **[sweetviz](https://github.com/fbdesignpro/sweetviz)** (>=2.3.1)
+* **[tbats](https://github.com/intive-DataScience/tbats)** (>=1.1.3)
* **[wordcloud](http://amueller.github.io/word_cloud/)** (>=1.9.2)
* **[xgboost](https://xgboost.readthedocs.io/en/latest/)** (>=2.0.0)
@@ -75,7 +76,7 @@ additional libraries. You can install all the optional dependencies using
The development dependencies are not installed with the package, and are
not required for any of its functionalities. These libraries are only
necessary to [contribute][contributing] to the project. Install them
-running `pdm install --dev` (remember to install [pdm](https://pdm-project.org/latest/) with
+running `pdm install --dev` (remember to install [pdm](https://pdm-project.org/latest/) first with
`pip install -U pdm`).
**Linting**
diff --git a/docs_sources/examples/deep_learning.ipynb b/docs_sources/examples/deep_learning.ipynb
index 03735284c..ed7302b47 100644
--- a/docs_sources/examples/deep_learning.ipynb
+++ b/docs_sources/examples/deep_learning.ipynb
@@ -113,7 +113,7 @@
" estimator=ConvNN(verbose=0),\n",
" acronym=\"CNN\",\n",
" needs_scaling=True, # Applies automated feature scaling before fitting\n",
- " has_validation=\"epochs\", # Applies in-training validation on parameter epochs\n",
+ " validation=\"epochs\", # Applies in-training validation on parameter epochs\n",
")"
]
},
diff --git a/docs_sources/examples/in_training_validation.ipynb b/docs_sources/examples/in_training_validation.ipynb
index 31c5c31cf..394f11407 100644
--- a/docs_sources/examples/in_training_validation.ipynb
+++ b/docs_sources/examples/in_training_validation.ipynb
@@ -106,7 +106,7 @@
"
{name} | {func.get_summary()} |