Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:

strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
python-version: ["3.11", "3.12", "3.13", "3.14"]
fail-fast: true

steps:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ panelsplit is a Python package designed to facilitate time series cross-validati

## Installation

panelsplit is tested for compatibility with python versions >= 3.10. You can install panelsplit using pip:
panelsplit is tested for compatibility with python versions >= 3.11. You can install panelsplit using pip:

```bash
pip install panelsplit
Expand Down
189 changes: 152 additions & 37 deletions panelsplit/metrics.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,42 @@
from .utils.validation import _safe_indexing
"""
Metrics that are equivalent their sklearn counterparts, except for the fact that they work with SequentialCVPipeline.
"""

# Standard library
import warnings
from inspect import signature
from collections.abc import Iterable
from functools import partial
from sklearn.metrics._scorer import _MultimetricScorer
from sklearn.utils._param_validation import (
validate_params,
)
from sklearn.metrics._scorer import _PassthroughScorer, _get_response_method_name
from copy import deepcopy
from sklearn.utils.validation import _check_response_method
import warnings
from sklearn.base import is_regressor
from panelsplit.utils._response import _get_response_values
from sklearn.utils.metadata_routing import (
_MetadataRequester,
_raise_for_params,
_routing_enabled,
MetadataRequest,
)
from .utils.typing import EstimatorLike, ArrayLike
from numpy.typing import NDArray
from typing import Callable, Optional, List, Union, Any, Dict

# Third-party / typing
from typing_extensions import Self
from numpy.typing import NDArray

# all the error scores:
# Local package utilities
from .utils.validation import _safe_indexing
from .utils.typing import EstimatorLike, ArrayLike
from panelsplit.utils._response import _get_response_values

# sklearn public metrics (single consolidated import)
from sklearn.metrics import (
accuracy_score,
adjusted_mutual_info_score,
adjusted_rand_score,
average_precision_score,
balanced_accuracy_score,
brier_score_loss,
class_likelihood_ratios,
completeness_score,
d2_absolute_error_score,
d2_brier_score,
d2_log_loss_score,
explained_variance_score,
f1_score,
fowlkes_mallows_score,
jaccard_score,
homogeneity_score,
log_loss,
matthews_corrcoef,
max_error,
Expand All @@ -42,24 +47,37 @@
mean_squared_error,
mean_squared_log_error,
median_absolute_error,
mutual_info_score,
normalized_mutual_info_score,
precision_score,
rand_score,
r2_score,
recall_score,
roc_auc_score,
root_mean_squared_error,
root_mean_squared_log_error,
top_k_accuracy_score,
)
from sklearn.metrics.cluster import (
adjusted_mutual_info_score,
adjusted_rand_score,
completeness_score,
fowlkes_mallows_score,
homogeneity_score,
mutual_info_score,
normalized_mutual_info_score,
rand_score,
v_measure_score,
)

# sklearn internals / utilities (note: private APIs)
from sklearn.metrics._scorer import (
_MultimetricScorer,
_PassthroughScorer,
_get_response_method_name,
)
from sklearn.utils._param_validation import validate_params
from sklearn.utils.validation import _check_response_method
from sklearn.base import is_regressor

# metadata routing utilities (used by some sklearn internals)
from sklearn.utils.metadata_routing import (
_MetadataRequester,
_raise_for_params,
_routing_enabled,
MetadataRequest,
)


def _get_idx_from_last_cv(estimator: EstimatorLike) -> Union[None, List[NDArray]]:
"""
Expand Down Expand Up @@ -88,14 +106,63 @@ def make_SequentialCV_scorer(
greater_is_better: bool = True,
**kwargs: Any,
) -> Callable[..., float]:
"""
Make a SequentialCVPipeline-compatible scorer from a performance metric.

A scorer is a wrapper around an arbitrary metric or loss function that is called
with the signature `scorer(estimator, X, y_true, **kwargs)`.

The parameter `response_method` allows to specify which method of the estimator
should be used to feed the scoring/loss function.

Parameters
----------
score_func : callable
Score function (or loss function) with signature
``score_func(y, y_pred, **kwargs)``.

response_method : {"predict_proba", "decision_function", "predict"} or \
list/tuple of such str, default="predict"

Specifies the response method to use get prediction from an estimator
(i.e. :term:`predict_proba`, :term:`decision_function` or
:term:`predict`). Possible choices are:

- if `str`, it corresponds to the name to the method to return;
- if a list or tuple of `str`, it provides the method names in order of
preference. The method returned corresponds to the first method in
the list and which is implemented by `estimator`.

greater_is_better : bool, default=True
Whether `score_func` is a score function (default), meaning high is
good, or a loss function, meaning low is good. In the latter case, the
scorer object will sign-flip the outcome of the `score_func`.

**kwargs : additional arguments
Additional parameters to be passed to `score_func`.

Returns
-------
Callable
Callable object that returns a scalar score; greater is better.

Examples
--------
>>> from panelsplit.metrics import make_SequentialCV_scorer
>>> from sklearn.metrics import brier_score_loss
>>> brier_loss_scorer= make_SequentialCV_scorer(brier_score_loss, response_method='predict_proba', greater_is_better=False)

>>> from panelsplit.pipeline import SequentialCVPipeline
>>> from sklearn.ensemble import RandomForestClassifier
>>> from sklearn.datasets import load_iris
>>> X, y = load_iris(return_X_y=True)
>>> p = SequentialCVPipeline(steps = [('rf', RandomForestClassifier())], cv_steps = [None])
>>> p.fit(X, y)
>>> brier_loss_scorer(p, X, y)
"""
sign = 1 if greater_is_better else -1

if response_method is None:
warnings.warn(
"response_method=None is deprecated in version 1.6 and will be removed "
"in version 1.8. Leave it to its default value to avoid this warning.",
FutureWarning,
)
response_method = "predict"
elif response_method == "default":
response_method = "predict"
Expand Down Expand Up @@ -158,7 +225,6 @@ def __init__(
self._sign = sign
self._kwargs = kwargs
self._response_method = response_method
# TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
self._deprecation_msg = None

def _get_pos_label(self) -> Optional[Any]:
Expand All @@ -170,7 +236,6 @@ def _get_pos_label(self) -> Optional[Any]:
return None

def _accept_sample_weight(self) -> bool:
# TODO(slep006): remove when metadata routing is the only way
return "sample_weight" in signature(self._score_func).parameters

def __repr__(self) -> str:
Expand Down Expand Up @@ -217,7 +282,6 @@ def __call__(
float
Score function applied to prediction of estimator on X.
"""
# TODO (1.8): remove in 1.8 (scoring="max_error" has been deprecated in 1.6)
if self._deprecation_msg is not None:
warnings.warn(
self._deprecation_msg, category=DeprecationWarning, stacklevel=2
Expand Down Expand Up @@ -314,6 +378,7 @@ def _score(
X,
pos_label=pos_label,
)

# make lookup dict for fast matching
pred_dict = dict(zip(idx, y_pred))

Expand All @@ -340,6 +405,36 @@ def _score(
prefer_skip_nested_validation=True,
)
def get_scorer(scoring: Union[str, Callable]) -> Any:
"""
Get a scorer from string.

`sklearn.metrics.get_scorer_names` can be used to retrieve the names
of all available scorers.

Parameters
----------
scoring : str, callable or None
Scoring method as string. If callable it is returned as is.
If None, returns None.

Returns
-------
callable
The scorer.

Notes
-----
When passed a string, this function always returns a copy of the scorer
object. Calling `get_scorer` twice for the same scorer results in two
separate scorer objects.

Examples
--------
>>> from panelsplit.metrics import get_scorer
>>> accuracy = get_scorer("accuracy")
>>> accuracy(classifier, X, y)
"""

if isinstance(scoring, str):
try:
scorer = deepcopy(_SCORERS[scoring])
Expand Down Expand Up @@ -489,7 +584,11 @@ def _check_multimetric_scoring(estimator: EstimatorLike, scoring: Iterable) -> A
neg_mean_gamma_deviance_scorer = make_SequentialCV_scorer(
mean_gamma_deviance, greater_is_better=False
)
# D^2 scorers (fraction of explained Brier / log-loss)
d2_absolute_error_scorer = make_SequentialCV_scorer(d2_absolute_error_score)
d2_brier_scorer = make_SequentialCV_scorer(d2_brier_score)
d2_log_loss_scorer = make_SequentialCV_scorer(d2_log_loss_score)


# Standard Classification Scores
accuracy_scorer = make_SequentialCV_scorer(accuracy_score)
Expand Down Expand Up @@ -583,6 +682,8 @@ def negative_likelihood_ratio(y_true: NDArray, y_pred: NDArray) -> float:
neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer,
neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer,
d2_absolute_error_score=d2_absolute_error_scorer,
d2_brier_score=d2_brier_scorer,
d2_log_loss_score=d2_log_loss_scorer,
accuracy=accuracy_scorer,
top_k_accuracy=top_k_accuracy_scorer,
roc_auc=roc_auc_scorer,
Expand All @@ -607,3 +708,17 @@ def negative_likelihood_ratio(y_true: NDArray, y_pred: NDArray) -> float:
normalized_mutual_info_score=normalized_mutual_info_scorer,
fowlkes_mallows_score=fowlkes_mallows_scorer,
)


for name, metric in [
("precision", precision_score),
("recall", recall_score),
("f1", f1_score),
("jaccard", jaccard_score),
]:
_SCORERS[name] = make_SequentialCV_scorer(metric, average="binary")
for average in ["macro", "micro", "samples", "weighted"]:
qualified_name = "{0}_{1}".format(name, average)
_SCORERS[qualified_name] = make_SequentialCV_scorer(
metric, pos_label=None, average=average
)
42 changes: 34 additions & 8 deletions panelsplit/model_selection/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,8 +970,8 @@ class GridSearch(BaseSearch):

If `scoring` represents a single score, one can use:

- a single string (see :ref:`scoring_string_names`);
- a callable (see :ref:`scoring_callable`) that returns a single value;
- a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
- a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
- `None`, the `estimator`'s default evaluation criterion is used.

If `scoring` represents multiple scores, one can use:
Expand All @@ -981,7 +981,7 @@ class GridSearch(BaseSearch):
names and the values are the metric scores;
- a dictionary with metric names as keys and callables as values.

See :ref:`multimetric_grid_search` for an example.
See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.

n_jobs : int, default=None
Number of jobs to run in parallel.
Expand Down Expand Up @@ -1057,6 +1057,19 @@ class GridSearch(BaseSearch):
A dict with keys as column headers and values as columns, that can be
imported into a pandas ``DataFrame``.

For an example of visualization and interpretation of GridSearch results,
see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.

NOTE

The key ``'params'`` is used to store a list of parameter
settings dicts for all the parameter candidates.

For multi-metric evaluation, the scores for all the scorers are
available in the ``cv_results_`` dict at the keys ending with that
scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
above. ('split0_test_precision', 'mean_train_precision' etc.)

best_estimator_ : estimator
Estimator that was chosen by the search, i.e. estimator
which gave highest score (or smallest loss if specified)
Expand Down Expand Up @@ -1157,11 +1170,11 @@ class GridSearch(BaseSearch):
GridSearch(estimator=SVC(),
param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')})
>>> sorted(clf.cv_results_.keys())
['mean_fit_time', 'mean_score_time', 'mean_test_score',...
['mean_test_score',...
'param_C', 'param_kernel', 'params',...
'rank_test_score', 'split0_test_score',...
'split2_test_score', ...
'std_fit_time', 'std_score_time', 'std_test_score']
'std_test_score']
"""

_parameter_constraints: dict = {
Expand Down Expand Up @@ -1251,8 +1264,8 @@ class RandomizedSearch(BaseSearch):

If `scoring` represents a single score, one can use:

- a single string (see :ref:`scoring_string_names`);
- a callable (see :ref:`scoring_callable`) that returns a single value;
- a single string (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-string-names);
- a callable (see https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-callable) that returns a single value;
- `None`, the `estimator`'s default evaluation criterion is used.

If `scoring` represents multiple scores, one can use:
Expand All @@ -1262,7 +1275,7 @@ class RandomizedSearch(BaseSearch):
names and the values are the metric scores;
- a dictionary with metric names as keys and callables as values.

See :ref:`multimetric_grid_search` for an example.
See https://scikit-learn.org/stable/modules/grid_search.html#multimetric-grid-search for an example.

If None, the estimator's score method is used.

Expand Down Expand Up @@ -1347,6 +1360,19 @@ class RandomizedSearch(BaseSearch):
A dict with keys as column headers and values as columns, that can be
imported into a pandas ``DataFrame``.

For an example of analysing ``cv_results_``,
see https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_stats.html#sphx-glr-auto-examples-model-selection-plot-grid-search-stats-py.

NOTE

The key ``'params'`` is used to store a list of parameter
settings dicts for all the parameter candidates.

For multi-metric evaluation, the scores for all the scorers are
available in the ``cv_results_`` dict at the keys ending with that
scorer's name (``'_<scorer_name>'``) instead of ``'_score'`` shown
above. ('split0_test_precision', 'mean_train_precision' etc.)

best_estimator_ : estimator
Estimator that was chosen by the search, i.e. estimator
which gave highest score (or smallest loss if specified)
Expand Down
Loading