Skip to content

Commit

Permalink
implemented multivariate forecast benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
attila-balint-kul committed Jun 28, 2023
1 parent 76a66e4 commit f874419
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 33 deletions.
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,21 @@ pip install enfobench

## Usage

Import your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'.
Load your own data and create a dataset.

```python
import pandas as pd

from enfobench.evaluation import Dataset

# Load your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'
data = (
pd.read_csv("../path/to/your/data.csv")
.rename(columns={"timestamp": "ds", "value": "y"})
data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp')
covariates = data.drop(columns=['target_column'])

dataset = Dataset(
target=data['target_column'],
covariates=covariates,
)
y = data.set_index("ds")["y"]
```

You can perform a cross validation on any model locally that adheres to the `enfobench.Model` protocol.
Expand All @@ -52,11 +56,11 @@ model = MyModel()
# Run cross validation on your model
cv_results = cross_validate(
model,
dataset,
start_date=pd.Timestamp("2018-01-01"),
end_date=pd.Timestamp("2018-01-31"),
horizon=pd.Timedelta("24 hours"),
step=pd.Timedelta("1 day"),
y=y,
)
```

Expand All @@ -71,11 +75,11 @@ client = ForecastClient(host='localhost', port=3000)
# Run cross validation on your model
cv_results = cross_validate(
client,
dataset,
start_date=pd.Timestamp("2018-01-01"),
end_date=pd.Timestamp("2018-01-31"),
horizon=pd.Timedelta("24 hours"),
step=pd.Timedelta("1 day"),
y=y,
)
```

Expand All @@ -89,7 +93,7 @@ from enfobench.evaluation.metrics import (
)

# Merge the cross validation results with the original data
forecasts = cv_results.merge(data, on="ds", how="left")
forecasts = cv_results.merge(dataset.target, on="ds", how="left")

metrics = evaluate_metrics_on_forecasts(
forecasts,
Expand Down
2 changes: 1 addition & 1 deletion src/enfobench/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.1"
__version__ = "0.2.0"
2 changes: 1 addition & 1 deletion src/enfobench/evaluation/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from enfobench.evaluation.client import ForecastClient
from enfobench.evaluation.protocols import Dataset, Model
from enfobench.utils import steps_in_horizon
from enfobench.evaluation.utils import steps_in_horizon


def evaluate_metric_on_forecast(forecast: pd.DataFrame, metric: Callable) -> float:
Expand Down
102 changes: 102 additions & 0 deletions src/enfobench/evaluation/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import warnings

import pandas as pd


def steps_in_horizon(horizon: pd.Timedelta, freq: str) -> int:
"""Return the number of steps in a given horizon.
Parameters
----------
horizon:
The horizon to be split into steps.
freq:
The frequency of the horizon.
Returns
-------
The number of steps in the horizon.
"""
freq = "1" + freq if not freq[0].isdigit() else freq
periods = horizon / pd.Timedelta(freq)
if not periods.is_integer():
raise ValueError("Horizon is not a multiple of the frequency")
return int(periods)


def create_forecast_index(history: pd.DataFrame, horizon: int) -> pd.DatetimeIndex:
"""Create time index for a forecast horizon.
Parameters
----------
history:
The history of the time series.
horizon:
The forecast horizon.
Returns
-------
The time index for the forecast horizon.
"""
last_date = history["ds"].iloc[-1]
inferred_freq = history["ds"].dt.freq
freq = "1" + inferred_freq if not inferred_freq[0].isdigit() else inferred_freq
return pd.date_range(
start=last_date + pd.Timedelta(freq),
periods=horizon,
freq=freq,
)


def create_perfect_forecasts_from_covariates(
covariates: pd.DataFrame,
horizon: pd.Timedelta,
step: pd.Timedelta,
**kwargs,
) -> pd.DataFrame:
"""Create forecasts from covariates.
Sometimes external forecasts are not available for the entire horizon. This function creates
external forecast dataframe from external covariates as a perfect forecast.
Parameters
----------
covariates:
The external covariates.
horizon:
The forecast horizon.
step:
The step size between forecasts.
Returns
-------
The external forecast dataframe.
"""
if kwargs.get("start") is not None:
start = kwargs.get("start")
else:
start = covariates.index[0]

last_date = covariates.index[-1]

forecasts = []
while start + horizon <= last_date:
forecast = covariates.loc[
(covariates.index > start) & (covariates.index <= start + horizon)
]
forecast.insert(0, "cutoff_date", start)
forecast.rename_axis("ds", inplace=True)
forecast.reset_index(inplace=True)

if len(forecast) == 0:
warnings.warn(
f"Covariates not found for {start} - {start + horizon}, cannot make forecast at step {start}",
UserWarning,
stacklevel=2,
)

forecasts.append(forecast)
start += step

forecast_df = pd.concat(forecasts, ignore_index=True)
return forecast_df
21 changes: 0 additions & 21 deletions src/enfobench/utils.py

This file was deleted.

4 changes: 2 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from enfobench.evaluation import ForecasterType, ModelInfo
from enfobench.utils import create_forecast_index
from enfobench.evaluation.utils import create_forecast_index


class TestModel:
Expand Down Expand Up @@ -43,7 +43,7 @@ def model():


@pytest.fixture(scope="session")
def target() -> pd.Series:
def target() -> pd.DataFrame:
index = pd.date_range("2020-01-01", "2020-02-01", freq="30T")
y = pd.Series(np.random.random(len(index)), index=index)
return y
Expand Down
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
49 changes: 49 additions & 0 deletions tests/test_evaluations/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pandas as pd
import pytest

from enfobench.evaluation import utils


@pytest.mark.parametrize(
"horizon, freq, expected",
[
("1 day", "15T", 96),
("1 day", "1H", 24),
("7 days", "1H", 7 * 24),
("1D", "1D", 1),
("1H", "1H", 1),
],
)
def test_steps_in_horizon(horizon, freq, expected):
assert utils.steps_in_horizon(pd.Timedelta(horizon), freq) == expected


def test_steps_in_horizon_raises_with_non_multiple_horizon():
with pytest.raises(ValueError):
utils.steps_in_horizon(pd.Timedelta("36 minutes"), "15T")


def test_create_forecast_index(target):
history = target.to_frame("y").rename_axis("ds").reset_index()
horizon = 96
last_date = history["ds"].iloc[-1]

index = utils.create_forecast_index(history, horizon)

assert isinstance(index, pd.DatetimeIndex)
assert index.freq == target.index.freq
assert len(index) == horizon
assert all(idx > last_date for idx in index)


def test_create_perfect_forecasts_from_covariates(covariates):
forecasts = utils.create_perfect_forecasts_from_covariates(
covariates,
horizon=pd.Timedelta("7 days"),
step=pd.Timedelta("1D"),
)

assert isinstance(forecasts, pd.DataFrame)
assert "ds" in forecasts.columns
assert "cutoff_date" in forecasts.columns
assert all(col in forecasts.columns for col in covariates.columns)

0 comments on commit f874419

Please sign in to comment.