Skip to content

Commit

Permalink
new models
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Dec 17, 2023
1 parent eabeea8 commit bafd417
Show file tree
Hide file tree
Showing 15 changed files with 426 additions and 86 deletions.
7 changes: 3 additions & 4 deletions atom/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

from atom.atom import ATOM
from atom.utils.types import (
Backend, Bool, ColumnSelector, Engine, IndexSelector, Int,
IntLargerEqualZero, NJobs, Predictor, Scalar, Sequence, Verbose, Warnings,
YSelector,
Backend, Bool, ColumnSelector, Engine, IndexSelector, IntLargerEqualZero,
NJobs, Predictor, Scalar, Seasonality, Verbose, Warnings, YSelector,
)
from atom.utils.utils import Goal

Expand Down Expand Up @@ -611,7 +610,7 @@ def __init__(
*arrays,
y: YSelector = -1,
ignore: ColumnSelector | None = None,
sp: Int | str | Sequence[Int | str] | None = None,
sp: Seasonality = None,
n_rows: Scalar = 1,
test_size: Scalar = 0.2,
holdout_size: Scalar | None = None,
Expand Down
1 change: 1 addition & 0 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2901,6 +2901,7 @@ def get_tags(self) -> dict[str, Any]:
"module": self._est_class.__module__.split(".")[0] + self._module,
"handles_missing": self.handles_missing,
"in_sample_prediction": self.in_sample_prediction,
"multiple_seasonality": self.multiple_seasonality,
"native_multivariate": self.native_multivariate,
"supports_engines": ", ".join(self.supports_engines),
}
Expand Down
24 changes: 15 additions & 9 deletions atom/baserunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
from atom.pipeline import Pipeline
from atom.utils.constants import DF_ATTRS
from atom.utils.types import (
Bool, DataFrame, FloatZeroToOneExc, HarmonicsSelector, Int,
Bool, DataFrame, FloatZeroToOneExc, HarmonicsSelector, Int, IntLargerOne,
MetricConstructor, Model, ModelSelector, ModelsSelector, Pandas,
RowSelector, Scalar, Segment, Sequence, Series, YSelector, dataframe_t,
int_t, segment_t, sequence_t,
RowSelector, Scalar, Seasonality, Segment, Sequence, Series,
TargetSelector, YSelector, dataframe_t, int_t, segment_t, sequence_t,
)
from atom.utils.utils import (
ClassMap, DataContainer, SeasonalPeriod, Task, bk, check_is_fitted,
Expand Down Expand Up @@ -166,7 +166,7 @@ def sp(self) -> int | list[int] | None:
return self._sp

@sp.setter
def sp(self, sp: Int | str | Sequence[Int | str] | None):
def sp(self, sp: Seasonality):
"""Convert seasonal period to integer value."""
if sp is None:
self._sp = None
Expand All @@ -177,7 +177,7 @@ def sp(self, sp: Int | str | Sequence[Int | str] | None):
f"The dataset's index has no attribute freqstr."
)
else:
self._sp = self.dataset.index.freqstr
self._sp = self._get_sp(self.dataset.index.freqstr)
elif sp == "infer":
self._sp = self.get_seasonal_period()
else:
Expand Down Expand Up @@ -908,6 +908,8 @@ def available_models(self) -> pd.DataFrame:
- **uses_exogenous:** Whether the model uses exogenous variables.
- **in_sample_prediction:** Whether the model can do predictions
on the training set.
- **multiple_seasonality:** Whether the model can handle more than
one [seasonality periods][seasonality].
- **native_multilabel:** Whether the model has native support
for [multilabel][] tasks.
- **native_multioutput:** Whether the model has native support
Expand Down Expand Up @@ -1124,9 +1126,10 @@ def get_sample_weight(self, rows: RowSelector = "train") -> Series:
@composed(crash, beartype)
def get_seasonal_period(
self,
max_sp: Int | None = None,
max_sp: IntLargerOne | None = None,
harmonics: HarmonicsSelector | None = None,
) -> int:
target: TargetSelector = 0,
) -> int | list[int]:
"""Get the seasonal periods of the time series.
Use the data in the training set to calculate the seasonal
Expand Down Expand Up @@ -1161,13 +1164,16 @@ def get_seasonal_period(
- If "raw_strength", result=[3, 7, 8]
- If "harmonic_strength", result=[8, 3, 7]
target: int or str, default=0
Target column to look at. Only for [multivariate][] tasks.
Returns
-------
list of int
int or list of int
Seasonal periods, ordered by significance.
"""
yt = self.y_train.copy()
yt = self.dataset[self.branch._get_target(target, only_columns=True)]

Check notice on line 1176 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _get_target of a class
max_sp = max_sp or (len(yt) - 1) // 2

for _ in np.arange(ndiffs(yt)):
Expand Down
2 changes: 0 additions & 2 deletions atom/basetrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,6 @@ def _prepare_parameters(self):

# Check if libraries for non-sklearn models are available
dependencies = {
"ARIMA": "pmdarima",
"AutoARIMA": "pmdarima",
"BATS": "tbats",
"CatB": "catboost",
"LGB": "lightgbm",
Expand Down
55 changes: 22 additions & 33 deletions atom/models/classreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,17 +472,17 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
if getattr(self, "_metric", None) and not self._gpu:
eval_metric = CatBMetric(self._metric[0], task=self.task)

return self._est_class(
eval_metric=params.pop("eval_metric", eval_metric),
train_dir=params.pop("train_dir", ""),
allow_writing_files=params.pop("allow_writing_files", False),
thread_count=params.pop("n_jobs", self.n_jobs),
task_type=params.pop("task_type", "GPU" if self._gpu else "CPU"),
devices=str(self._device_id),
verbose=params.pop("verbose", False),
random_state=params.pop("random_state", self.random_state),
**params,
)
default = {
"eval_metric": eval_metric,
"train_dir": "",
"allow_writing_files": False,
"thread_count": self.n_jobs,
"task_type": "GPU" if self._gpu else "CPU",
"devices": str(self._device_id),
"verbose": False,
}

return super()._get_est(default | params)

def _fit_estimator(
self,
Expand Down Expand Up @@ -1672,14 +1672,13 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
# PYTHONWarnings doesn't work since they go from C/C++ code to stdout
warns = {"always": 2, "default": 1, "once": 0, "error": 0, "ignore": -1}

return self._est_class(
verbose=params.pop("verbose", warns.get(self.warnings, -1)),
n_jobs=params.pop("n_jobs", self.n_jobs),
device=params.pop("device", "gpu" if self._gpu else "cpu"),
gpu_device_id=params.pop("gpu_device_id", self._device_id or -1),
random_state=params.pop("random_state", self.random_state),
**params,
)
default = {
"verbose": warns.get(self.warnings, -1),
"device": "gpu" if self._gpu else "cpu",
"gpu_device_id": self._device_id or -1,
}

return super()._get_est(default | params)

def _fit_estimator(
self,
Expand Down Expand Up @@ -1960,7 +1959,7 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
"""
if self.engine.get("estimator") == "cuml" and self._goal is Goal.classification:
return self._est_class(probability=params.pop("probability", True), **params)
return super()._get_est({"probability": True} | params)
else:
return super()._get_est(params)

Expand Down Expand Up @@ -3010,11 +3009,7 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
"""
if self.engine.get("estimator") == "cuml" and self._goal is Goal.classification:
return self._est_class(
probability=params.pop("probability", True),
random_state=params.pop("random_state", self.random_state),
**params,
)
return super()._get_est({"probability": True} | params)
else:
return super()._get_est(params)

Expand Down Expand Up @@ -3142,14 +3137,8 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
if getattr(self, "_metric", None):
eval_metric = XGBMetric(self._metric[0], task=self.task)

return self._est_class(
eval_metric=params.pop("eval_metric", eval_metric),
n_jobs=params.pop("n_jobs", self.n_jobs),
device=params.pop("device", self.device),
verbosity=params.pop("verbosity", 0),
random_state=params.pop("random_state", self.random_state),
**params,
)
default = {"eval_metric": eval_metric, "device": self.device, "verbosity": 0}
return super()._get_est(default | params)

def _fit_estimator(
self,
Expand Down
6 changes: 4 additions & 2 deletions atom/models/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
"""
return self._est_class(
estimators=[
(m.name, m.export_pipeline() if m.scaler else m.estimator) for m in self._models
(m.name, m.export_pipeline() if m.scaler else m.estimator)
for m in self._models
],
n_jobs=params.pop("n_jobs", self.n_jobs),
**params,
Expand Down Expand Up @@ -128,7 +129,8 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
"""
return self._est_class(
estimators=[
(m.name, m.export_pipeline() if m.scaler else m.estimator) for m in self._models
(m.name, m.export_pipeline() if m.scaler else m.estimator)
for m in self._models
],
n_jobs=params.pop("n_jobs", self.n_jobs),
**params,
Expand Down
Loading

0 comments on commit bafd417

Please sign in to comment.