Skip to content

Commit

Permalink
add plot_ccf
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Jan 30, 2024
1 parent 9cbe039 commit f96b802
Show file tree
Hide file tree
Showing 20 changed files with 635 additions and 43 deletions.
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,24 @@ Example steps taken by ATOM's pipeline:

<br><br>

❗ Why you should use ATOM
-------------------------

* [Multiple data cleaning and feature engineering classes](https://tvdboom.github.io/ATOM/latest/user_guide/data_cleaning/)
* [55+ classification, regression and forecast models](https://tvdboom.github.io/ATOM/latest/user_guide/models/) to choose from
* [Possibility to train multiple models with one line of code](https://tvdboom.github.io/ATOM/latest/getting_started/#usage)
* [Fast implementation of hyperparameter tuning](https://tvdboom.github.io/ATOM/latest/user_guide/training/#hyperparameter-tuning)
* [Easy way to compare the results from different models](https://tvdboom.github.io/ATOM/latest/user_guide/training/)
* [50+ plots to analyze the data and model performance](https://tvdboom.github.io/ATOM/latest/user_guide/plots/#available-plots)
* [Avoid refactoring to test new pipelines](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#branches)
* [Native support for GPU training](https://tvdboom.github.io/ATOM/latest/user_guide/accelerating/#gpu-acceleration)
* [25+ example notebooks to get you started](https://tvdboom.github.io/ATOM/latest/examples/accelerating_cuml/)
* [Full integration with multilabel and multioutput datasets](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#multioutput-tasks)
* [Native support for sparse datasets](https://tvdboom.github.io/ATOM/latest/user_guide/data_management/#sparse-datasets)
* [Build-in transformers for NLP pipelines](https://tvdboom.github.io/ATOM/latest/user_guide/nlp/)
* [Avoid endless imports and documentation lookups](https://tvdboom.github.io/ATOM/latest/getting_started/#usage)

<br><br>

🛠️ Installation
---------------
Expand Down Expand Up @@ -183,10 +201,10 @@ atom.plot_lift()
--- | ---
**[About](https://tvdboom.github.io/ATOM/latest/release_history/)** | Learn more about the package.
🚀 **[Getting started](https://tvdboom.github.io/ATOM/latest/getting_started/)** | New to ATOM? Here's how to get you started!
📢 **[Release history](https://tvdboom.github.io/ATOM/latest/release_history/)** | What are the new features of the latest release?
👨‍💻 **[User guide](https://tvdboom.github.io/ATOM/latest/user_guide/introduction/)** | How to use ATOM and its features.
🎛️ **[API Reference](https://tvdboom.github.io/ATOM/latest/API/ATOM/atomclassifier/)** | The detailed reference for ATOM's API.
📋 **[Examples](https://tvdboom.github.io/ATOM/latest/examples/binary_classification/)** | Example notebooks show you what can be done and how.
📢 **[Chagelog](https://tvdboom.github.io/ATOM/latest/changelog/)** | What are the new features in the latest release?
**[FAQ](https://tvdboom.github.io/ATOM/latest/faq/)** | Get answers to frequently asked questions.
🔧 **[Contributing](https://tvdboom.github.io/ATOM/latest/contributing/)** | Do you wan to contribute to the project? Read this before creating a PR.
🌳 **[Dependencies](https://tvdboom.github.io/ATOM/latest/dependencies/)** | Which other packages does ATOM depend on?
Expand Down
10 changes: 5 additions & 5 deletions atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1261,12 +1261,12 @@ def _add_transformer(

# Memoize the fitted transformer_c for repeated instantiations of atom
fit = self._memory.cache(fit_one)
kwargs = dict(
estimator=transformer_c,
X=self.X_train,
y=self.y_train,
kwargs = {
"estimator": transformer_c,
"X": self.X_train,
"y": self.y_train,
**fit_params,
)
}

# Check if the fitted estimator is retrieved from cache to inform
# the user, else user might notice the lack of printed messages
Expand Down
12 changes: 6 additions & 6 deletions atom/baserunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1431,13 +1431,13 @@ def stacking(
"train multiple Stacking models within the same instance."
)

kw_model = dict(
goal=self._goal,
config=self._config,
branches=self._branches,
metric=self._metric,
kw_model = {
"goal": self._goal,
"config": self._config,
"branches": self._branches,
"metric": self._metric,
**{attr: getattr(self, attr) for attr in BaseTransformer.attrs},
)
}

# The parameter name is different in sklearn and sktime
regressor = "regressor" if self.task.is_forecast else "final_estimator"
Expand Down
12 changes: 6 additions & 6 deletions atom/basetrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,13 @@ def _prepare_parameters(self):

# Define models ============================================ >>

kwargs = dict(
goal=self._goal,
config=self._config,
branches=self._branches,
metric=self._metric,
kwargs = {
"goal": self._goal,
"config": self._config,
"branches": self._branches,
"metric": self._metric,
**{attr: getattr(self, attr) for attr in BaseTransformer.attrs},
)
}

inc = []
exc = []
Expand Down
2 changes: 1 addition & 1 deletion atom/basetransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def _inherit(self, obj: T_Estimator, fixed: tuple[str, ...] = ()) -> T_Estimator
for p in obj.get_params():
if p in fixed:
continue
elif match := re.search("(n_jobs|random_state)$|__\1$", p):
elif match := re.search("^(n_jobs|random_state)$|__\1$", p):
obj.set_params(**{p: getattr(self, match.group())})
elif re.search(r"^sp$|__sp$", p) and hasattr(self, "_config") and self._config.sp:
if self.multiple_seasonality:
Expand Down
3 changes: 2 additions & 1 deletion atom/branch/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,8 @@ def _get_columns(
select from.
only_numerical: bool, default=False
Whether to select only numerical columns.
Whether to select only numerical columns when
`columns=None`.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion atom/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None) -> Self:
self._log("Fitting Cleaner...", 1)

if X is not None and self.drop_dtypes is not None:
self._drop_cols = list(X.select_dtypes(include=list(self.drop_dtypes)).columns)
self._drop_cols = list(X.select_dtypes(include=lst(self.drop_dtypes)).columns)

Check notice on line 776 in atom/data_cleaning.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

An instance attribute is defined outside `__init__`

Instance attribute _drop_cols defined outside __init__

if y is not None:
if isinstance(y, series_t):
Expand Down
40 changes: 32 additions & 8 deletions atom/models/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,23 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
Estimator instance.
"""
# We use _est_class with get_params instead of just a dict
# to also fix the parameters of the models in the ensemble
estimator = self._est_class(
**{
"estimators" if not self.task.is_forecast else "forecasters": [
(m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
for m in self._models
]
}
)

# Drop the model names from params since those
# are not direct parameters of the ensemble
default = {
"estimators" if not self.task.is_forecast else "forecasters": [
(m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
for m in self._models
]
k: v
for k, v in estimator.get_params().items()
if k not in (m.name for m in self._models)
}

return super()._get_est(default | params)
Expand Down Expand Up @@ -115,11 +127,23 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
Estimator instance.
"""
# We use _est_class with get_params instead of just a dict
# to also fix the parameters of the models in the ensemble
estimator = self._est_class(
**{
"estimators" if not self.task.is_forecast else "forecasters": [
(m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
for m in self._models
]
}
)

# Drop the model names from params since those
# are not direct parameters of the ensemble
default = {
"estimators" if not self.task.is_forecast else "forecasters": [
(m.name, m.export_pipeline()[-2:] if m.scaler else m.estimator)
for m in self._models
]
k: v
for k, v in estimator.get_params().items()
if k not in (m.name for m in self._models)
}

return super()._get_est(default | params)
52 changes: 51 additions & 1 deletion atom/models/ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def _get_est(self, params: dict[str, Any]) -> Predictor:
Estimator instance.
"""
return super()._get_est({"auto": True} | params)
return super()._get_est({"sp": self._config.sp.sp or 1, "auto": True} | params)

@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
Expand Down Expand Up @@ -900,6 +900,22 @@ class NaiveForecaster(BaseModel):
"forecast": "sktime.forecasting.naive.NaiveForecaster"
}

def _get_est(self, params: dict[str, Any]) -> Predictor:
"""Get the model's estimator with unpacked parameters.
Parameters
----------
params: dict
Hyperparameters for the estimator.
Returns
-------
Predictor
Estimator instance.
"""
return super()._get_est({"sp": self._config.sp.sp or 1} | params)

@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
"""Get the predefined hyperparameter distributions.
Expand Down Expand Up @@ -1258,6 +1274,24 @@ class STL(BaseModel):
"forecast": "sktime.forecasting.trend.STLForecaster"
}

def _get_est(self, params: dict[str, Any]) -> Predictor:
"""Get the model's estimator with unpacked parameters.
Parameters
----------
params: dict
Hyperparameters for the estimator.
Returns
-------
Predictor
Estimator instance.
"""
# Parameter sp must be provided to STL and >=2
# None is only accepted if y has freq in index but sktime passes array
return super()._get_est({"sp": self._config.sp.sp or 2} | params)

@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
"""Get the predefined hyperparameter distributions.
Expand Down Expand Up @@ -1420,6 +1454,22 @@ class Theta(BaseModel):
"forecast": "sktime.forecasting.theta.ThetaForecaster"
}

def _get_est(self, params: dict[str, Any]) -> Predictor:
"""Get the model's estimator with unpacked parameters.
Parameters
----------
params: dict
Hyperparameters for the estimator.
Returns
-------
Predictor
Estimator instance.
"""
return super()._get_est({"sp": self._config.sp.sp or 1} | params)

@staticmethod
def _get_distributions() -> dict[str, BaseDistribution]:
"""Get the predefined hyperparameter distributions.
Expand Down
Loading

0 comments on commit f96b802

Please sign in to comment.