Skip to content

Commit

Permalink
plot_bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Jan 18, 2024
1 parent 7b810c2 commit c6eb43f
Show file tree
Hide file tree
Showing 36 changed files with 18,226 additions and 3,211 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install -U pip setuptools
pip install -U pytest pytest-mock pytest-cov
pip install -U pytest pytest-mock pytest-cov pytest-xdist
pip install -e .[full]
- name: Run tests w/ coverage
run: pytest --cov=atom --cov-report=xml tests/
run: pytest -n=auto --cov=atom --cov-report=xml tests/
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' }}
Expand Down
23 changes: 8 additions & 15 deletions atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ def stats(self, _vb: Int = -2, /):
if hasattr(self.X, "sparse"): # All columns are sparse
self._log(f"Density: {100. * self.X.sparse.density:.2f}%", _vb)
else: # Not all columns are sparse
n_sparse = sum([pd.api.types.is_sparse(self.X[c]) for c in self.X])
n_sparse = sum(isinstance(self[c].dtype, pd.SparseDtype) for c in self.features)
n_dense = self.n_features - n_sparse
p_sparse = round(100 * n_sparse / self.n_features, 1)
p_dense = round(100 * n_dense / self.n_features, 1)
Expand Down Expand Up @@ -1193,14 +1193,13 @@ def _add_transformer(
transformed and the transformer is added to atom's
pipeline.
If the transformer has the n_jobs and/or random_state
parameters and they are left to their default value,
they adopt atom's values.
Parameters
----------
transformer: Transformer
Estimator to add. Should implement a `transform` method.
If a class is provided (instead of an instance), and it
has the `n_jobs` and/or `random_state` parameters, it
adopts atom's values.
columns: int, str, segment, sequence or None, default=None
Columns in the dataset to transform. If None, transform
Expand All @@ -1220,7 +1219,7 @@ def _add_transformer(
"""
if callable(transformer):
transformer_c = transformer()
transformer_c = self._inherit(transformer())
else:
transformer_c = transformer

Expand All @@ -1231,9 +1230,6 @@ def _add_transformer(
"new branch to continue the pipeline."
)

# Add BaseTransformer params to the estimator if left to default
transformer_c = self._inherit(transformer_c)

if not hasattr(transformer_c, "_train_only"):
transformer_c._train_only = train_only

Expand Down Expand Up @@ -1385,16 +1381,13 @@ def add(
This means that the transformer should not add, remove or
shuffle rows unless it returns a dataframe.
!!! note
If the transformer has a `n_jobs` and/or `random_state`
parameter that is left to its default value, it adopts
atom's value.
Parameters
----------
transformer: Transformer
Estimator to add to the pipeline. Should implement a
`transform` method.
`transform` method. If a class is provided (instead of an
instance), and it has the `n_jobs` and/or `random_state`
parameters, it adopts atom's values.
columns: int, str, segment, sequence or None, default=None
[Selection of columns][row-and-column-selection] to
Expand Down
38 changes: 22 additions & 16 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,32 +448,38 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
"""
# Separate the params for the estimator from those in sub-estimators
base_params, sub_params = {}, {}
if params:
for name, value in params.items():
if "__" not in name:
base_params[name] = value
else:
sub_params[name] = value
for name, value in params.items():
if "__" not in name:
base_params[name] = value
else:
sub_params[name] = value

estimator = self._est_class(**base_params).set_params(**sub_params)

fixed = tuple(params)
if hasattr(self, "task"):
if self.task is Task.multilabel_classification:
if self.task.is_forecast and self._goal.name not in self._estimators:
fixed = tuple(f"estimator__{f}" for f in fixed)

# Forecasting task with a regressor
if self.task.is_multioutput:
estimator = make_reduction(estimator, strategy="multioutput")
else:
estimator = make_reduction(estimator, strategy="recursive")

elif self.task is Task.multilabel_classification:
if not self.native_multilabel:
fixed = tuple(f"base_estimator__{f}" for f in fixed)
estimator = ClassifierChain(estimator)

elif self.task.is_multioutput and not self.native_multioutput:
fixed = tuple(f"estimator__{f}" for f in fixed)
if self.task.is_classification:
estimator = MultiOutputClassifier(estimator)
elif self.task.is_regression:
estimator = MultiOutputRegressor(estimator)
elif self.task.is_forecast and self._goal.name not in self._estimators:
# Forecasting task with a regressor
if self.native_multioutput:
estimator = make_reduction(estimator, strategy="multioutput")
else:
estimator = make_reduction(estimator, strategy="recursive")

return self._inherit(estimator)
return self._inherit(estimator, fixed=fixed)

def _fit_estimator(
self,
Expand Down Expand Up @@ -1550,7 +1556,7 @@ def results(self) -> pd.Series:
data[f"{met}_ht"] = self.trials.loc[self.best_trial.number, met]
data["time_ht"] = self.trials.iloc[-1, -2]
for met in self._metric:
for ds in ("train", "test"):
for ds in ["train", "test"] + ([] if self.holdout is None else ["holdout"]):
data[f"{met.name}_{ds}"] = self._get_score(met, ds)
data["time_fit"] = self._time_fit
if self._bootstrap is not None:
Expand Down Expand Up @@ -1975,7 +1981,7 @@ def cross_validate(self, **kwargs) -> pd.DataFrame:
**kwargs
Additional keyword arguments for one of these functions.
- For forecast tasks: [validate][sktimevalidate].
- For forecast tasks: [evaluate][sktimeevaluate].
- Else: [cross_validate][sklearncrossvalidate].
Returns
Expand Down
29 changes: 13 additions & 16 deletions atom/basetransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import os
import random
import re
import tempfile
import warnings
from collections.abc import Hashable
Expand All @@ -31,16 +32,14 @@
from joblib.memory import Memory
from pandas._typing import Axes

Check notice on line 33 in atom/basetransformer.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _typing of a class
from ray.util.joblib import register_ray
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_memory

from atom.utils.constants import SHARED_PARAMS
from atom.utils.types import (
Backend, Bool, DataFrame, Engine, Estimator, Int, IntLargerEqualZero,
Pandas, Sequence, Severity, Verbose, Warnings, XSelector, YSelector,
bool_t, dataframe_t, int_t, sequence_t,
)
from atom.utils.utils import crash, flt, lst, n_cols, sign, to_df, to_pandas
from atom.utils.utils import crash, flt, lst, n_cols, to_df, to_pandas


T_Estimator = TypeVar("T_Estimator", bound=Estimator)
Expand Down Expand Up @@ -354,36 +353,34 @@ def _device_id(self) -> int:

# Methods ====================================================== >>

def _inherit(self, obj: T_Estimator) -> T_Estimator:
def _inherit(self, obj: T_Estimator, fixed: tuple[str, ...] = ()) -> T_Estimator:
"""Inherit parameters from parent.
Utility method to set the sp (seasonal period), n_jobs and
random_state parameters of an estimator (if available) equal
to that of this instance. If `obj` is a meta-estimator, it
also sets the parameters to the base estimator. Note that
the parameters are only changed when the value is equal to
the constructor's default value.
also adjusts the parameters of the base estimator.
Parameters
----------
obj: Estimator
Instance for which to change the parameters.
fixed: tuple of str, default=()
Fixed parameters that should not be overriden.
Returns
-------
Estimator
Same object with changed parameters.
"""
signature = sign(obj.__class__)
for p, value in obj.get_params().items():
if p in SHARED_PARAMS and (p not in signature or value == signature[p]._default):
# Some estimators like XGB use kwargs, so param
# isn't in signature. In that case, always override
obj.set_params(**{p: getattr(self, p)})
elif isinstance(value, BaseEstimator):
obj.set_params(**{p: self._inherit(value)})
elif p == "sp" and hasattr(self, "_config") and self._config.sp:
for p in obj.get_params():
if p in fixed:
continue
elif match := re.search("(n_jobs|random_state)$|__\1$", p):
obj.set_params(**{p: getattr(self, match.group())})
elif re.search(r"sp$|__sp$", p) and hasattr(self, "_config") and self._config.sp:
if self.multiple_seasonality:
obj.set_params(**{p: self._config.sp})
else:
Expand Down
8 changes: 4 additions & 4 deletions atom/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,14 +406,15 @@ def fit(self, X: DataFrame, y: Pandas = -1) -> Self:
f"Invalid value for the strategy parameter, got {self.strategy}. "
f"Choose from: {', '.join(strategies)}."
)
estimator = strategies[self.strategy.lower()](**self.kwargs)
est_class = strategies[self.strategy.lower()]
estimator = self._inherit(est_class(**self.kwargs), fixed=tuple(self.kwargs))
elif not hasattr(self.strategy, "fit_resample"):
raise TypeError(
"Invalid type for the strategy parameter. A "
"custom estimator must have a fit_resample method."
)
elif callable(self.strategy):
estimator = self.strategy(**self.kwargs)
estimator = self._inherit(self.strategy(**self.kwargs), fixed=tuple(self.kwargs))
else:
estimator = self.strategy

Expand All @@ -425,8 +426,7 @@ def fit(self, X: DataFrame, y: Pandas = -1) -> Self:
for key, value in self.mapping_.items():
self._counts[key] = np.sum(y == value)

# Add n_jobs or random_state if its one of the estimator's parameters
self._estimator = self._inherit(estimator).fit(X, y)
self._estimator = estimator.fit(X, y)

Check notice on line 429 in atom/data_cleaning.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

An instance attribute is defined outside `__init__`

Instance attribute _estimator defined outside __init__

# Add the estimator as attribute to the instance
setattr(self, f"{estimator.__class__.__name__.lower()}_", self._estimator)
Expand Down
2 changes: 1 addition & 1 deletion atom/models/classreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -1598,7 +1598,7 @@ class LeastAngleRegression(BaseModel):
class LightGBM(BaseModel):
"""Light Gradient Boosting Machine.
LightGBM is a gradient boosting model that uses tree based learning
LightGBM is a gradient boosting model that uses tree-based learning
algorithms. It is designed to be distributed and efficient with the
following advantages:
Expand Down
28 changes: 14 additions & 14 deletions atom/models/ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class ARIMA(BaseModel):
handles_missing = True
uses_exogenous = True
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -244,7 +244,7 @@ class AutoARIMA(BaseModel):
handles_missing = True
uses_exogenous = True
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -336,7 +336,7 @@ class BATS(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -417,7 +417,7 @@ class Croston(BaseModel):
handles_missing = False
uses_exogenous = True
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -543,7 +543,7 @@ class ExponentialSmoothing(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -629,7 +629,7 @@ class ETS(BaseModel):
handles_missing = True
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -723,7 +723,7 @@ class MSTL(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = True
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -825,7 +825,7 @@ class NaiveForecaster(BaseModel):
handles_missing = True
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -880,7 +880,7 @@ class PolynomialTrend(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -940,7 +940,7 @@ class Prophet(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = True
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -1046,7 +1046,7 @@ class SARIMAX(BaseModel):
handles_missing = False
uses_exogenous = True
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -1183,7 +1183,7 @@ class STL(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -1260,7 +1260,7 @@ class TBATS(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = True
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down Expand Up @@ -1345,7 +1345,7 @@ class Theta(BaseModel):
handles_missing = False
uses_exogenous = False
multiple_seasonality = False
native_multioutput = False
native_multioutput = True
supports_engines = ("sktime",)

_estimators: ClassVar[dict[str, str]] = {
Expand Down
Loading

0 comments on commit c6eb43f

Please sign in to comment.