Skip to content

Commit

Permalink
fix tests all plots
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Oct 13, 2023
1 parent 2023ab2 commit 77561a3
Show file tree
Hide file tree
Showing 20 changed files with 768 additions and 897 deletions.
2 changes: 1 addition & 1 deletion atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ def automl(self, **kwargs):

self._models.append(model)
self._log(
f" --> Adding model {model._fullname} "
f" --> Adding model {model.fullname} "
f"({model.name}) to the pipeline...", 2
)
break # Avoid non-linear pipelines
Expand Down
48 changes: 24 additions & 24 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def __getitem__(self, item: Int | str | list) -> Pandas:
return self.dataset[item] # Get a subset of the dataset

@property
def _fullname(self) -> str:
def fullname(self) -> str:
"""Return the model's class name."""
return self.__class__.__name__

Expand Down Expand Up @@ -1010,17 +1010,17 @@ def fit_model(
estimator = results[0][0]
score = list(np.mean(scores := [r[1] for r in results], axis=0))

if len(results) > 1:
# Report cv scores for termination judgment
report_cross_validation_scores(trial, scores)
if len(results) > 1:
# Report cv scores for termination judgment
report_cross_validation_scores(trial, scores)

trial.set_user_attr("estimator", estimator)

return score

Check warning on line 1019 in atom/basemodel.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Unbound local variables

Local variable 'score' might be referenced before assignment

# Running hyperparameter tuning ============================ >>

self._log(f"Running hyperparameter tuning for {self._fullname}...", 1)
self._log(f"Running hyperparameter tuning for {self.fullname}...", 1)

# Check the validity of the provided parameters
self._check_est_params()
Expand All @@ -1041,7 +1041,7 @@ def fit_model(
raise ValueError(
"Invalid value for the distributions parameter. "
f"Parameter {n} is not a predefined hyperparameter "
f"of the {self._fullname} model. See the model's "
f"of the {self.fullname} model. See the model's "
"documentation for an overview of the available "
"hyperparameters and their distributions."
)
Expand Down Expand Up @@ -1091,7 +1091,7 @@ def fit_model(
# Initialize live study plot
if self._ht.get("plot", False) and n_jobs == 1:
plot_callback = PlotCallback(
name=self._fullname,
name=self.fullname,
metric=self._metric.keys(),
aesthetics=self.aesthetics,
)
Expand Down Expand Up @@ -1159,7 +1159,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
self.clear() # Reset model's state

if self._study is None:
self._log(f"Results for {self._fullname}:", 1)
self._log(f"Results for {self.fullname}:", 1)
self._log(f"Fit {'-' * 45}", 1)

# Assign estimator if not done already
Expand Down Expand Up @@ -1193,7 +1193,7 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
mlflow.set_tags(
{
"name": self.name,
"model": self._fullname,
"model": self.fullname,
"branch": self.branch.name,
**self._ht["tags"],
}
Expand All @@ -1213,17 +1213,17 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
for step in range(len(value)):
mlflow.log_metric(f"evals_{key}", value[step], step=step)

# The Rest of the metrics are tracked when calling _get_score
mlflow.log_metric("time_fit", self.time_fit)
# The rest of the metrics are tracked when calling _get_score
mlflow.log_metric("time_fit", self._time_fit)

mlflow.sklearn.log_model(
sk_model=self.estimator,
artifact_path=self._est_class.__name__,
signature=infer_signature(
model_input=pd.DataFrame(self.X),
model_output=self.estimator.predict(self.test.iloc[0]),
model_output=self.estimator.predict(self.X_test.iloc[[0]]),
),
input_example=pd.DataFrame(self.X.iloc[[0], :]),
input_example=pd.DataFrame(self.X.iloc[[0]]),
)

if self.log_data:
Expand All @@ -1239,9 +1239,9 @@ def fit(self, X: DataFrame | None = None, y: Pandas | None = None):
artifact_path=f"{self._est_class.__name__}_pipeline",
signature=infer_signature(
model_input=pd.DataFrame(self.X),
model_output=self.estimator.predict(self.test.iloc[0]),
model_output=self.estimator.predict(self.X_test.iloc[[0]]),
),
input_example=pd.DataFrame(self.X.iloc[[0], :]),
input_example=pd.DataFrame(self.X.iloc[[0]]),
)

@composed(crash, method_to_log)
Expand Down Expand Up @@ -1303,7 +1303,7 @@ def bootstrapping(self, n_bootstrap: Int, reset: Bool = False):
self._log(f"Evaluation --> {' '.join(out)}", 1)

self._time_bootstrap += (dt.now() - t_init).total_seconds()
self._log(f"Time elapsed: {time_to_str(self.time_bootstrap)}", 1)
self._log(f"Time elapsed: {time_to_str(self._time_bootstrap)}", 1)

# Utility properties =========================================== >>

Expand Down Expand Up @@ -1524,7 +1524,7 @@ def results(self) -> pd.Series:
data[f"{met.name}_{ds}"] = self._get_score(met, ds)
data["time_fit"] = self._time_fit
if self._bootstrap is not None:
for met in self._metic.keys():
for met in self._metric.keys():
data[f"{met}_bootstrap"] = self.bootstrap[met].mean()
data["time_bootstrap"] = self._time_bootstrap
data["time"] = data.get("time_ht", 0) + self._time_fit + self._time_bootstrap
Expand Down Expand Up @@ -2244,7 +2244,7 @@ def register(

model = mlflow.register_model(
model_uri=f"runs:/{self.run.info.run_id}/{self._est_class.__name__}",
name=name or self._fullname,
name=name or self.fullname,
tags=self._ht["tags"] or None,
)

Expand Down Expand Up @@ -2274,7 +2274,7 @@ def save_estimator(self, filename: str = "auto"):
with open(filename, "wb") as f:
pickle.dump(self.estimator, f)

self._log(f"{self._fullname} estimator successfully saved.", 1)
self._log(f"{self.fullname} estimator successfully saved.", 1)

@composed(crash, method_to_log)
def serve(self, method: str = "predict", host: str = "127.0.0.1", port: Int = 8000):
Expand Down Expand Up @@ -2346,7 +2346,7 @@ async def __call__(self, request: Request) -> str:
server = ServeModel.bind(model=self.export_pipeline(verbose=0), method=method)
serve.run(server, host=host, port=port)

self._log(f"Serving model {self._fullname} on {host}:{port}...", 1)
self._log(f"Serving model {self.fullname} on {host}:{port}...", 1)

@composed(crash, method_to_log)
def transform(
Expand Down Expand Up @@ -2488,7 +2488,7 @@ def assign_prediction_columns() -> list[str]:
X, y = X

Check notice on line 2488 in atom/basemodel.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

PEP 8 naming convention violation

Variable in function should be lowercase

if method != "score":
pred = self.memory.cache(getattr(self.estimator, method))(X)
pred = np.array(self.memory.cache(getattr(self.estimator, method))(X))

if np.array(pred).ndim < 3:
data = to_pandas(
Expand All @@ -2497,19 +2497,19 @@ def assign_prediction_columns() -> list[str]:
name=self.target,
columns=assign_prediction_columns(),
)
elif self.task.startswith("multilabel"):
elif self.task is Task.multilabel_classification:
# Convert to (n_samples, n_targets)
data = bk.DataFrame(
data=np.array([d[:, 1] for d in pred]).T,
index=X.index,
columns=assign_prediction_columns(),
)
else:
# Convert to (n_samples * n_classes, n_targets)
# Convert to (n_samples * n_classes, n_targets)'
data = bk.DataFrame(
data=pred.reshape(-1, pred.shape[2]),
index=bk.MultiIndex.from_tuples(
[(col, idx) for col in np.unique(self.y) for idx in X]
[(col, idx) for col in np.unique(self.y) for idx in X.index]
),
columns=assign_prediction_columns(),
)
Expand Down
6 changes: 3 additions & 3 deletions atom/baserunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def winners(self) -> list[Model] | None:
"""
if self._models: # Returns None if not fitted
return sorted(
self._models, key=lambda x: (x._best_score(), x.time_fit), reverse=True
self._models, key=lambda x: (x._best_score(), x._time_fit), reverse=True

Check notice on line 180 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _time_fit of a class

Check notice on line 180 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _best_score of a class
)

@property
Expand Down Expand Up @@ -402,7 +402,7 @@ def available_models(self) -> pd.DataFrame:
rows.append(
{
"acronym": m.acronym,
"model": m._fullname,
"model": m.fullname,
"estimator": m._est_class.__name__,

Check notice on line 406 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _est_class of a class
"module": m._est_class.__module__.split(".")[0] + m._module,

Check notice on line 407 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _est_class of a class

Check notice on line 407 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _module of a class
"needs_scaling": m.needs_scaling,
Expand Down Expand Up @@ -790,7 +790,7 @@ def stacking(
if self._goal not in model._estimators:

Check notice on line 790 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _estimators of a class
raise ValueError(
"Invalid value for the final_estimator parameter. Model "
f"{model._fullname} can not perform {self.task} tasks."
f"{model.fullname} can not perform {self.task} tasks."
)

kwargs["final_estimator"] = model._get_est()

Check notice on line 796 in atom/baserunner.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _get_est of a class
Expand Down
10 changes: 5 additions & 5 deletions atom/basetrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,15 +417,15 @@ def execute_model(m: Model) -> Model | None:

maxlen = 0
names, scores = [], []
for model in self._models:
for m in self._models:
# Add the model name for repeated model classes
if len(list(filter(lambda x: x.acronym == model.acronym, self._models))) > 1:
names.append(f"{model._fullname} ({model.name})")
if len(list(filter(lambda x: x.acronym == m.acronym, self._models))) > 1:
names.append(f"{m.fullname} ({m.name})")
else:
names.append(model._fullname)
names.append(m.fullname)

try:
scores.append(model._best_score())
scores.append(m._best_score())

Check notice on line 428 in atom/basetrainer.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Accessing a protected member of a class or a module

Access to a protected member _best_score of a class
except (ValueError, AttributeError): # Fails when errors="keep"
scores.append(-np.inf)

Expand Down
23 changes: 10 additions & 13 deletions atom/feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from atom.basetransformer import BaseTransformer
from atom.data_cleaning import Scaler, TransformerMixin
from atom.models import MODELS
from atom.plots import FeatureSelectionPlot
from atom.utils.types import (
Backend, Bool, DataFrame, Engine, Features, FeatureSelectionSolvers,
FeatureSelectionStrats, FloatLargerEqualZero, FloatLargerZero,
Expand Down Expand Up @@ -776,17 +775,13 @@ def transform(self, X: Features, y: Target | None = None) -> DataFrame:
return X


class FeatureSelector(
BaseEstimator,
TransformerMixin,
FeatureSelectionPlot,
):
class FeatureSelector(BaseEstimator, TransformerMixin, BaseTransformer):
"""Reduce the number of features in the data.
Apply feature selection or dimensionality reduction, either to
improve the estimators' accuracy or to boost their performance on
very high-dimensional datasets. Additionally, remove multicollinear
and low variance features.
and low-variance features.
This class can be accessed from atom through the [feature_selection]
[atomclassifier-feature_selection] method. Read more in the
Expand All @@ -810,10 +805,14 @@ class FeatureSelector(
with sparse matrices.
!!! tip
Use the [plot_feature_importance][] method to examine how much
a specific feature contributes to the final predictions. If the
model doesn't have a `feature_importances_` attribute, use
[plot_permutation_importance][] instead.
* Use the [plot_pca][] and [plot_components][] methods to
examine the results after using strategy="pca".
* Use the [plot_rfecv][] method to examine the results after
using strategy="rfecv".
* Use the [plot_feature_importance][] method to examine how
much a specific feature contributes to the final predictions.
If the model doesn't have a `feature_importances_` attribute,
use [plot_permutation_importance][] instead.
Parameters
----------
Expand Down Expand Up @@ -1027,8 +1026,6 @@ class FeatureSelector(
# Note that the column names changed
print(atom.dataset)
atom.plot_pca()
```
=== "stand-alone"
Expand Down
6 changes: 3 additions & 3 deletions atom/models/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ def __init__(self, **kwargs):
from atom.models import MODELS

# If no name is provided, use the name of the class
name = self._fullname
name = self.fullname
if len(n := list(filter(str.isupper, name))) >= 2 and n not in MODELS:
name = "".join(n)

self.acronym = getattr(est, "acronym", name)
if not name.startswith(self.acronym):
raise ValueError(
f"The name ({name}) and acronym ({self.acronym}) of model "
f"{self._fullname} do not match. The name should start with "
f"{self.fullname} do not match. The name should start with "
f"the model's acronym."
)

Expand All @@ -49,7 +49,7 @@ def __init__(self, **kwargs):
super().__init__(name=name, **kwargs)

@property
def _fullname(self) -> str:
def fullname(self) -> str:
"""Return the estimator's class name."""
return self._est_class.__name__

Expand Down
2 changes: 1 addition & 1 deletion atom/models/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def __init__(self, models: ClassMap, **kwargs):
raise ValueError(
"Invalid value for the voting parameter. If "
"'soft', all models in the ensemble should have "
f"a predict_proba method, got {m._fullname}."
f"a predict_proba method, got {m.fullname}."
)

def _get_est(self, **params) -> Predictor:
Expand Down
10 changes: 1 addition & 9 deletions atom/plots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@
from abc import ABC

from atom.plots.dataplot import DataPlot
from atom.plots.featureselectionplot import FeatureSelectionPlot
from atom.plots.hyperparametertuningplot import HyperparameterTuningPlot
from atom.plots.predictionplot import PredictionPlot
from atom.plots.shapplot import ShapPlot


class ATOMPlot(
FeatureSelectionPlot,
DataPlot,
HyperparameterTuningPlot,
PredictionPlot,
ShapPlot,
ABC,
):
class ATOMPlot(DataPlot, HyperparameterTuningPlot, PredictionPlot, ShapPlot, ABC):

Check notice on line 18 in atom/plots/__init__.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Class must implement all abstract methods

Class ATOMPlot must implement all abstract methods
"""Plot classes inherited by main ATOM classes."""
pass

Expand Down
Loading

0 comments on commit 77561a3

Please sign in to comment.