From ca6f3fd2e8d785c381117c29c53c47b67268a76b Mon Sep 17 00:00:00 2001 From: Attila Balint Date: Fri, 24 Nov 2023 13:24:45 +0100 Subject: [PATCH] fixed dataset.utils bug --- src/enfobench/__version__.py | 2 +- src/enfobench/dataset/utils.py | 14 +++++----- src/enfobench/evaluation/utils.py | 8 ++++-- tests/test_dataset/__init__.py | 0 .../test_dataset.py | 0 tests/test_dataset/test_utils.py | 26 +++++++++++++++++++ 6 files changed, 41 insertions(+), 9 deletions(-) create mode 100644 tests/test_dataset/__init__.py rename tests/{test_evaluations => test_dataset}/test_dataset.py (100%) create mode 100644 tests/test_dataset/test_utils.py diff --git a/src/enfobench/__version__.py b/src/enfobench/__version__.py index e19434e..334b899 100644 --- a/src/enfobench/__version__.py +++ b/src/enfobench/__version__.py @@ -1 +1 @@ -__version__ = "0.3.3" +__version__ = "0.3.4" diff --git a/src/enfobench/dataset/utils.py b/src/enfobench/dataset/utils.py index d236962..1d7903e 100644 --- a/src/enfobench/dataset/utils.py +++ b/src/enfobench/dataset/utils.py @@ -8,7 +8,7 @@ def create_perfect_forecasts_from_covariates( *, horizon: pd.Timedelta, step: pd.Timedelta, - **kwargs, + start: pd.Timestamp | None = None, ) -> pd.DataFrame: """Create forecasts from covariates. @@ -19,19 +19,20 @@ def create_perfect_forecasts_from_covariates( past_covariates: The external covariates. horizon: The forecast horizon. step: The step size between forecasts. + start: The start date of the forecast. If None, the first date of the covariates is used. Returns: The external forecast dataframe. """ - start = kwargs.get("start", past_covariates.index[0]) + start = start or past_covariates.index[0] last_date = past_covariates.index[-1] forecasts = [] while start + horizon <= last_date: forecast = past_covariates.loc[(past_covariates.index > start) & (past_covariates.index <= start + horizon)] - forecast.insert(0, "cutoff_date", start) forecast.rename_axis("timestamp", inplace=True) forecast.reset_index(inplace=True) + forecast["cutoff_date"] = start.isoformat() # pd.concat fails if cutoff_date is a Timestamp if len(forecast) == 0: warnings.warn( @@ -39,9 +40,10 @@ def create_perfect_forecasts_from_covariates( UserWarning, stacklevel=2, ) - - forecasts.append(forecast) + else: + forecasts.append(forecast) start += step - forecast_df = pd.concat(forecasts, ignore_index=True) + forecast_df = pd.concat(forecasts, ignore_index=False) + forecast_df["cutoff_date"] = pd.to_datetime(forecast_df["cutoff_date"]) # convert back to Timestamp return forecast_df diff --git a/src/enfobench/evaluation/utils.py b/src/enfobench/evaluation/utils.py index bd58b08..ea6adc1 100644 --- a/src/enfobench/evaluation/utils.py +++ b/src/enfobench/evaluation/utils.py @@ -29,10 +29,14 @@ def periods_in_duration(target: pd.DatetimeIndex, duration: timedelta | pd.Timed first_delta = target[1] - target[0] last_delta = target[-1] - target[-2] - assert first_delta == last_delta, "Season length is not constant" + if first_delta != last_delta: + msg = f"Season length is not constant: '{first_delta}' != '{last_delta}'" + raise ValueError(msg) periods = duration / first_delta - assert periods.is_integer(), "Season length is not a multiple of the frequency" + if not periods.is_integer(): + msg = f"Season length '{duration}' is not a multiple of the frequency '{first_delta}'" + raise ValueError(msg) return int(periods) diff --git a/tests/test_dataset/__init__.py b/tests/test_dataset/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_evaluations/test_dataset.py b/tests/test_dataset/test_dataset.py similarity index 100% rename from tests/test_evaluations/test_dataset.py rename to tests/test_dataset/test_dataset.py diff --git a/tests/test_dataset/test_utils.py b/tests/test_dataset/test_utils.py new file mode 100644 index 0000000..4fafb3a --- /dev/null +++ b/tests/test_dataset/test_utils.py @@ -0,0 +1,26 @@ +import numpy as np +import pandas as pd + +from enfobench.dataset.utils import create_perfect_forecasts_from_covariates + + +def test_create_perfect_forecasts_from_covariates(): + index = pd.date_range(start="2020-01-01", end="2020-10-02 13:54:00", freq="1H") + past_covariates = pd.DataFrame( + index=index, + data=np.random.rand(len(index), 2), + columns=["covariate_1", "covariate_2"], + ) + + future_covariates = create_perfect_forecasts_from_covariates( + past_covariates, + start=pd.Timestamp("2020-01-01"), + step=pd.Timedelta("1D"), + horizon=pd.Timedelta("7D"), + ) + + assert isinstance(future_covariates, pd.DataFrame) + assert "covariate_1" in future_covariates.columns + assert "covariate_2" in future_covariates.columns + assert "timestamp" in future_covariates.columns + assert "cutoff_date" in future_covariates.columns