diff --git a/src/enfobench/__version__.py b/src/enfobench/__version__.py index 6526deb..a73339b 100644 --- a/src/enfobench/__version__.py +++ b/src/enfobench/__version__.py @@ -1 +1 @@ -__version__ = "0.0.7" +__version__ = "0.0.8" diff --git a/src/enfobench/evaluation/__init__.py b/src/enfobench/evaluation/__init__.py index 84c037b..5299b2c 100644 --- a/src/enfobench/evaluation/__init__.py +++ b/src/enfobench/evaluation/__init__.py @@ -1,3 +1,4 @@ +from ._cross_validate import cross_validate from ._evaluate import ( evaluate_metric_on_forecast, evaluate_metric_on_forecasts, diff --git a/src/enfobench/evaluation/_cross_validate.py b/src/enfobench/evaluation/_cross_validate.py index e83def5..89028ae 100644 --- a/src/enfobench/evaluation/_cross_validate.py +++ b/src/enfobench/evaluation/_cross_validate.py @@ -3,7 +3,8 @@ import pandas as pd from tqdm import tqdm -from enfobench import Model, ForecastClient +from enfobench.evaluation.client import ForecastClient +from enfobench.evaluation.protocols import Model from enfobench.utils import steps_in_horizon @@ -75,7 +76,7 @@ def cross_validate( forecasts = [] for cutoff in tqdm(cutoff_dates): # make sure that there is no data leakage - history = y.loc[y.ds <= cutoff, ["ds", 'y']] + history = y.loc[y.ds <= cutoff, ["ds", "y"]] forecast = model.predict( horizon_length, @@ -83,7 +84,7 @@ def cross_validate( level=level, ) forecast = forecast.fillna(0) - forecast['cutoff'] = cutoff + forecast["cutoff"] = cutoff forecasts.append(forecast) crossval_df = pd.concat(forecasts) diff --git a/src/enfobench/evaluation/_evaluate.py b/src/enfobench/evaluation/_evaluate.py index 1fa8cd5..8bf0c4e 100644 --- a/src/enfobench/evaluation/_evaluate.py +++ b/src/enfobench/evaluation/_evaluate.py @@ -18,13 +18,14 @@ def evaluate_metric_on_forecast(forecast: pd.DataFrame, metric: Callable) -> flo metric_value: Metric value. """ - _nonempty_df = forecast.dropna(subset=['y']) + _nonempty_df = forecast.dropna(subset=["y"]) metric_value = metric(_nonempty_df.y, _nonempty_df.yhat) return metric_value -def evaluate_metrics_on_forecast(forecast: pd.DataFrame, metrics: dict[str, Callable]) -> dict[ - str, float]: +def evaluate_metrics_on_forecast( + forecast: pd.DataFrame, metrics: dict[str, Callable] +) -> dict[str, float]: """Evaluate multiple metrics on a single forecast. Parameters: @@ -63,14 +64,15 @@ def evaluate_metric_on_forecasts(forecasts: pd.DataFrame, metric: Callable) -> p """ metrics = { cutoff: evaluate_metric_on_forecast(group_df, metric) - for cutoff, group_df in forecasts.groupby('cutoff') + for cutoff, group_df in forecasts.groupby("cutoff") } - metrics_df = pd.DataFrame.from_dict(metrics, orient='index', columns=['value']) + metrics_df = pd.DataFrame.from_dict(metrics, orient="index", columns=["value"]) return metrics_df -def evaluate_metrics_on_forecasts(forecasts: pd.DataFrame, - metrics: dict[str, Callable]) -> pd.DataFrame: +def evaluate_metrics_on_forecasts( + forecasts: pd.DataFrame, metrics: dict[str, Callable] +) -> pd.DataFrame: """Evaluate multiple metrics on a set of forecasts made at different cutoff points. Parameters: @@ -86,7 +88,7 @@ def evaluate_metrics_on_forecasts(forecasts: pd.DataFrame, Metric values for each cutoff with their weight. """ metric_dfs = [ - evaluate_metric_on_forecasts(forecasts, metric_func).rename(columns={'value': metric_name}) + evaluate_metric_on_forecasts(forecasts, metric_func).rename(columns={"value": metric_name}) for metric_name, metric_func in metrics.items() ] metrics_df = pd.concat(metric_dfs, axis=1) diff --git a/src/enfobench/evaluation/client.py b/src/enfobench/evaluation/client.py index 523b35e..9dc426e 100644 --- a/src/enfobench/evaluation/client.py +++ b/src/enfobench/evaluation/client.py @@ -61,5 +61,5 @@ def predict( response.raise_for_status() df = pd.DataFrame.from_records(response.json()["forecast"]) - df['ds'] = pd.to_datetime(df['ds']) + df["ds"] = pd.to_datetime(df["ds"]) return df diff --git a/src/enfobench/evaluation/metrics.py b/src/enfobench/evaluation/metrics.py index 792e479..a24315d 100644 --- a/src/enfobench/evaluation/metrics.py +++ b/src/enfobench/evaluation/metrics.py @@ -1,7 +1,4 @@ -from typing import Callable - import numpy as np -import pandas as pd from numpy import ndarray @@ -13,7 +10,7 @@ def check_not_empty(*arrays: ndarray) -> None: *arrays: list or tuple of input arrays. Objects that will be checked for emptiness. """ - if any([X.size == 0 for X in arrays]): + if any(X.size == 0 for X in arrays): raise ValueError("Found empty array in inputs.") @@ -27,15 +24,13 @@ def check_consistent_length(*arrays: ndarray) -> None: *arrays : list or tuple of input arrays. Objects that will be checked for consistent length. """ - if any([X.ndim != 1 for X in arrays]): + if any(X.ndim != 1 for X in arrays): raise ValueError("Found multi dimensional array in inputs.") lengths = [len(X) for X in arrays] uniques = np.unique(lengths) if len(uniques) > 1: - raise ValueError( - f"Found input variables with inconsistent numbers of samples: {lengths}" - ) + raise ValueError(f"Found input variables with inconsistent numbers of samples: {lengths}") def check_has_no_nan(*arrays: ndarray) -> None: @@ -48,9 +43,7 @@ def check_has_no_nan(*arrays: ndarray) -> None: """ for X in arrays: if np.isnan(X).any(): - raise ValueError( - f"Found NaNs in input variables: {X}" - ) + raise ValueError(f"Found NaNs in input variables: {X}") def check_arrays(*arrays: ndarray) -> None: @@ -135,4 +128,4 @@ def mean_absolute_percentage_error(y_true: ndarray, y_pred: ndarray) -> float: check_arrays(y_true, y_pred) if np.any(y_true == 0): raise ValueError("Found zero in true values. MAPE is undefined.") - return float(100. * np.mean(np.abs((y_true - y_pred) / y_true))) + return float(100.0 * np.mean(np.abs((y_true - y_pred) / y_true))) diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 493ca05..42d0074 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -3,10 +3,10 @@ from enfobench.evaluation.metrics import ( mean_absolute_error, + mean_absolute_percentage_error, mean_bias_error, - root_mean_squared_error, mean_squared_error, - mean_absolute_percentage_error + root_mean_squared_error, ) all_metrics = [ @@ -43,12 +43,12 @@ def test_metric_raises_with_empty_array(metric): def test_mean_absolute_error(): - assert mean_absolute_error(np.array([1, 2, 3]), np.array([1, 2, 3])) == 0. - assert mean_absolute_error(np.array([1, 2, 3]), np.array([2, 3, 4])) == 1. - assert mean_absolute_error(np.array([1, 2, 3]), np.array([0, 1, 2])) == 1. + assert mean_absolute_error(np.array([1, 2, 3]), np.array([1, 2, 3])) == 0.0 + assert mean_absolute_error(np.array([1, 2, 3]), np.array([2, 3, 4])) == 1.0 + assert mean_absolute_error(np.array([1, 2, 3]), np.array([0, 1, 2])) == 1.0 def test_mean_bias_error(): - assert mean_bias_error(np.array([1, 2, 3]), np.array([1, 2, 3])) == 0. - assert mean_bias_error(np.array([1, 2, 3]), np.array([2, 3, 4])) == 1. - assert mean_bias_error(np.array([1, 2, 3]), np.array([0, 1, 2])) == -1. + assert mean_bias_error(np.array([1, 2, 3]), np.array([1, 2, 3])) == 0.0 + assert mean_bias_error(np.array([1, 2, 3]), np.array([2, 3, 4])) == 1.0 + assert mean_bias_error(np.array([1, 2, 3]), np.array([0, 1, 2])) == -1.0