Skip to content

Commit

Permalink
fix tests all plots
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Oct 13, 2023
1 parent 2023ab2 commit 77561a3
Show file tree
Hide file tree
Showing 20 changed files with 768 additions and 897 deletions.
2 changes: 1 addition & 1 deletion atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def automl(self, **kwargs):

self._models.append(model)
self._log(
f" --> Adding model {model._fullname} "
f" --> Adding model {model.fullname} "
f"({model.name}) to the pipeline...", 2
)
break # Avoid non-linear pipelines
Expand Down
48 changes: 24 additions & 24 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def __getitem__(self, item: Int | str | list) -> Pandas:
return self.dataset[item] # Get a subset of the dataset

@property
def _fullname(self) -> str:
def fullname(self) -> str:
"""Return the model's class name."""
return self.__class__.__name__

Expand Down Expand Up @@ -1010,17 +1010,17 @@ def fit_model(
estimator = results[0][0]
score = list(np.mean(scores := [r[1] for r in results], axis=0))

if len(results) > 1:
# Report cv scores for termination judgment
report_cross_validation_scores(trial, scores)
if len(results) > 1:
# Report cv scores for termination judgment
report_cross_validation_scores(trial, scores)

trial.set_user_attr("estimator", estimator)

return score

# Running hyperparameter tuning ============================ >>

self._log(f"Running hyperparameter tuning for {self._fullname}...", 1)
self._log(f"Running hyperparameter tuning for {self.fullname}...", 1)

# Check the validity of the provided parameters
self._check_est_params()
Expand All @@ -1041,7 +1041,7 @@ def fit_model(
raise ValueError(
"Invalid value for the distributions parameter. "
f"Parameter {n} is not a predefined hyperparameter "
f"of the {self._fullname} model. See the model's "
f"of the {self.fullname} model. See the model's "
"documentation for an overview of the available "
"hyperparameters and their distributions."
)
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def fit_model(
# Initialize live study plot
if self._ht.get("plot", False) and n_jobs == 1:
plot_callback = PlotCallback(
name=self._fullname,
name=self.fullname,
metric=self._metric.keys(),
aesthetics=self.aesthetics,
)
Expand Down Expand Up @@ -1159,7 +1159,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
self.clear() # Reset model's state

if self._study is None:
self._log(f"Results for {self._fullname}:", 1)
self._log(f"Results for {self.fullname}:", 1)
self._log(f"Fit {'-' * 45}", 1)

# Assign estimator if not done already
Expand Down Expand Up @@ -1193,7 +1193,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
mlflow.set_tags(
{
"name": self.name,
"model": self._fullname,
"model": self.fullname,
"branch": self.branch.name,
**self._ht["tags"],
}
Expand All @@ -1213,17 +1213,17 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
for step in range(len(value)):
mlflow.log_metric(f"evals_{key}", value[step], step=step)

# The Rest of the metrics are tracked when calling _get_score
mlflow.log_metric("time_fit", self.time_fit)
# The rest of the metrics are tracked when calling _get_score
mlflow.log_metric("time_fit", self._time_fit)

mlflow.sklearn.log_model(
sk_model=self.estimator,
artifact_path=self._est_class.__name__,
signature=infer_signature(
model_input=pd.DataFrame(self.X),
model_output=self.estimator.predict(self.test.iloc[0]),
model_output=self.estimator.predict(self.X_test.iloc[[0]]),
),
input_example=pd.DataFrame(self.X.iloc[[0], :]),
input_example=pd.DataFrame(self.X.iloc[[0]]),
)

if self.log_data:
Expand All @@ -1239,9 +1239,9 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
artifact_path=f"{self._est_class.__name__}_pipeline",
signature=infer_signature(
model_input=pd.DataFrame(self.X),
model_output=self.estimator.predict(self.test.iloc[0]),
model_output=self.estimator.predict(self.X_test.iloc[[0]]),
),
input_example=pd.DataFrame(self.X.iloc[[0], :]),
input_example=pd.DataFrame(self.X.iloc[[0]]),
)

@composed(crash, method_to_log)
Expand Down Expand Up @@ -1303,7 +1303,7 @@ def bootstrapping(self, n_bootstrap: Int, reset: Bool = False):
self._log(f"Evaluation --> {' '.join(out)}", 1)

self._time_bootstrap += (dt.now() - t_init).total_seconds()
self._log(f"Time elapsed: {time_to_str(self.time_bootstrap)}", 1)
self._log(f"Time elapsed: {time_to_str(self._time_bootstrap)}", 1)

# Utility properties =========================================== >>

Expand Down Expand Up @@ -1524,7 +1524,7 @@ def results(self) -> pd.Series:
data[f"{met.name}_{ds}"] = self._get_score(met, ds)
data["time_fit"] = self._time_fit
if self._bootstrap is not None:
for met in self._metic.keys():
for met in self._metric.keys():
data[f"{met}_bootstrap"] = self.bootstrap[met].mean()
data["time_bootstrap"] = self._time_bootstrap
data["time"] = data.get("time_ht", 0) + self._time_fit + self._time_bootstrap
Expand Down Expand Up @@ -2244,7 +2244,7 @@ def register(

model = mlflow.register_model(
model_uri=f"runs:/{self.run.info.run_id}/{self._est_class.__name__}",
name=name or self._fullname,
name=name or self.fullname,
tags=self._ht["tags"] or None,
)

Expand Down Expand Up @@ -2274,7 +2274,7 @@ def save_estimator(self, filename: str = "auto"):
with open(filename, "wb") as f:
pickle.dump(self.estimator, f)

self._log(f"{self._fullname} estimator successfully saved.", 1)
self._log(f"{self.fullname} estimator successfully saved.", 1)

@composed(crash, method_to_log)
def serve(self, method: str = "predict", host: str = "127.0.0.1", port: Int = 8000):
Expand Down Expand Up @@ -2346,7 +2346,7 @@ async def __call__(self, request: Request) -> str:
server = ServeModel.bind(model=self.export_pipeline(verbose=0), method=method)
serve.run(server, host=host, port=port)

self._log(f"Serving model {self._fullname} on {host}:{port}...", 1)
self._log(f"Serving model {self.fullname} on {host}:{port}...", 1)

@composed(crash, method_to_log)
def transform(
Expand Down Expand Up @@ -2488,7 +2488,7 @@ def assign_prediction_columns() -> list[str]:
X, y = X

if method != "score":
pred = self.memory.cache(getattr(self.estimator, method))(X)
pred = np.array(self.memory.cache(getattr(self.estimator, method))(X))

if np.array(pred).ndim < 3:
data = to_pandas(
Expand All @@ -2497,19 +2497,19 @@ def assign_prediction_columns() -> list[str]:
name=self.target,
columns=assign_prediction_columns(),
)
elif self.task.startswith("multilabel"):
elif self.task is Task.multilabel_classification:
# Convert to (n_samples, n_targets)
data = bk.DataFrame(
data=np.array([d[:, 1] for d in pred]).T,
index=X.index,
columns=assign_prediction_columns(),
)
else:
# Convert to (n_samples * n_classes, n_targets)
# Convert to (n_samples * n_classes, n_targets)'
data = bk.DataFrame(
data=pred.reshape(-1, pred.shape[2]),
index=bk.MultiIndex.from_tuples(
[(col, idx) for col in np.unique(self.y) for idx in X]
[(col, idx) for col in np.unique(self.y) for idx in X.index]
),
columns=assign_prediction_columns(),
)
Expand Down
6 changes: 3 additions & 3 deletions atom/baserunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def winners(self) -> list[Model] | None:
"""
if self._models: # Returns None if not fitted
return sorted(
self._models, key=lambda x: (x._best_score(), x.time_fit), reverse=True
self._models, key=lambda x: (x._best_score(), x._time_fit), reverse=True
)

@property
Expand Down Expand Up @@ -402,7 +402,7 @@ def available_models(self) -> pd.DataFrame:
rows.append(
{
"acronym": m.acronym,
"model": m._fullname,
"model": m.fullname,
"estimator": m._est_class.__name__,
"module": m._est_class.__module__.split(".")[0] + m._module,
"needs_scaling": m.needs_scaling,
Expand Down Expand Up @@ -790,7 +790,7 @@ def stacking(
if self._goal not in model._estimators:
raise ValueError(
"Invalid value for the final_estimator parameter. Model "
f"{model._fullname} can not perform {self.task} tasks."
f"{model.fullname} can not perform {self.task} tasks."
)

kwargs["final_estimator"] = model._get_est()
Expand Down
10 changes: 5 additions & 5 deletions atom/basetrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,15 +417,15 @@ def execute_model(m: Model) -> Model | None:

maxlen = 0
names, scores = [], []
for model in self._models:
for m in self._models:
# Add the model name for repeated model classes
if len(list(filter(lambda x: x.acronym == model.acronym, self._models))) > 1:
names.append(f"{model._fullname} ({model.name})")
if len(list(filter(lambda x: x.acronym == m.acronym, self._models))) > 1:
names.append(f"{m.fullname} ({m.name})")
else:
names.append(model._fullname)
names.append(m.fullname)

try:
scores.append(model._best_score())
scores.append(m._best_score())
except (ValueError, AttributeError): # Fails when errors="keep"
scores.append(-np.inf)

Expand Down
23 changes: 10 additions & 13 deletions atom/feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from atom.basetransformer import BaseTransformer
from atom.data_cleaning import Scaler, TransformerMixin
from atom.models import MODELS
from atom.plots import FeatureSelectionPlot
from atom.utils.types import (
Backend, Bool, DataFrame, Engine, Features, FeatureSelectionSolvers,
FeatureSelectionStrats, FloatLargerEqualZero, FloatLargerZero,
Expand Down Expand Up @@ -776,17 +775,13 @@ def transform(self, X: Features, y: Target | None = None) -> DataFrame:
return X


class FeatureSelector(
BaseEstimator,
TransformerMixin,
FeatureSelectionPlot,
):
class FeatureSelector(BaseEstimator, TransformerMixin, BaseTransformer):
"""Reduce the number of features in the data.
Apply feature selection or dimensionality reduction, either to
improve the estimators' accuracy or to boost their performance on
very high-dimensional datasets. Additionally, remove multicollinear
and low variance features.
and low-variance features.
This class can be accessed from atom through the [feature_selection]
[atomclassifier-feature_selection] method. Read more in the
Expand All @@ -810,10 +805,14 @@ class FeatureSelector(
with sparse matrices.
!!! tip
Use the [plot_feature_importance][] method to examine how much
a specific feature contributes to the final predictions. If the
model doesn't have a `feature_importances_` attribute, use
[plot_permutation_importance][] instead.
* Use the [plot_pca][] and [plot_components][] methods to
examine the results after using strategy="pca".
* Use the [plot_rfecv][] method to examine the results after
using strategy="rfecv".
* Use the [plot_feature_importance][] method to examine how
much a specific feature contributes to the final predictions.
If the model doesn't have a `feature_importances_` attribute,
use [plot_permutation_importance][] instead.
Parameters
----------
Expand Down Expand Up @@ -1027,8 +1026,6 @@ class FeatureSelector(
# Note that the column names changed
print(atom.dataset)
atom.plot_pca()
```
=== "stand-alone"
Expand Down
6 changes: 3 additions & 3 deletions atom/models/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ def __init__(self, **kwargs):
from atom.models import MODELS

# If no name is provided, use the name of the class
name = self._fullname
name = self.fullname
if len(n := list(filter(str.isupper, name))) >= 2 and n not in MODELS:
name = "".join(n)

self.acronym = getattr(est, "acronym", name)
if not name.startswith(self.acronym):
raise ValueError(
f"The name ({name}) and acronym ({self.acronym}) of model "
f"{self._fullname} do not match. The name should start with "
f"{self.fullname} do not match. The name should start with "
f"the model's acronym."
)

Expand All @@ -49,7 +49,7 @@ def __init__(self, **kwargs):
super().__init__(name=name, **kwargs)

@property
def _fullname(self) -> str:
def fullname(self) -> str:
"""Return the estimator's class name."""
return self._est_class.__name__

Expand Down
2 changes: 1 addition & 1 deletion atom/models/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(self, models: ClassMap, **kwargs):
raise ValueError(
"Invalid value for the voting parameter. If "
"'soft', all models in the ensemble should have "
f"a predict_proba method, got {m._fullname}."
f"a predict_proba method, got {m.fullname}."
)

def _get_est(self, **params) -> Predictor:
Expand Down
10 changes: 1 addition & 9 deletions atom/plots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@
from abc import ABC

from atom.plots.dataplot import DataPlot
from atom.plots.featureselectionplot import FeatureSelectionPlot
from atom.plots.hyperparametertuningplot import HyperparameterTuningPlot
from atom.plots.predictionplot import PredictionPlot
from atom.plots.shapplot import ShapPlot


class ATOMPlot(
FeatureSelectionPlot,
DataPlot,
HyperparameterTuningPlot,
PredictionPlot,
ShapPlot,
ABC,
):
class ATOMPlot(DataPlot, HyperparameterTuningPlot, PredictionPlot, ShapPlot, ABC):
"""Plot classes inherited by main ATOM classes."""
pass

Expand Down
Loading

0 comments on commit 77561a3

Please sign in to comment.