diff --git a/README.md b/README.md
index baeccbd..aaae72b 100644
--- a/README.md
+++ b/README.md
@@ -27,17 +27,21 @@ pip install enfobench
 
 ## Usage
 
-Import your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'.
+Load your own data and create a dataset.
 
 ```python
 import pandas as pd
 
+from enfobench.evaluation import Dataset
+
 # Load your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'
-data = (
-    pd.read_csv("../path/to/your/data.csv")
-    .rename(columns={"timestamp": "ds", "value": "y"})
+data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp')
+covariates = data.drop(columns=['target_column'])
+
+dataset = Dataset(
+    target=data['target_column'],
+    covariates=covariates,
 )
-y = data.set_index("ds")["y"]
 ```
 
 You can perform a cross validation on any model locally that adheres to the `enfobench.Model` protocol.
@@ -52,11 +56,11 @@ model = MyModel()
 # Run cross validation on your model
 cv_results = cross_validate(
     model,
+    dataset,
     start_date=pd.Timestamp("2018-01-01"),
     end_date=pd.Timestamp("2018-01-31"),
     horizon=pd.Timedelta("24 hours"),
     step=pd.Timedelta("1 day"),
-    y=y,
 )
 ```
 
@@ -71,11 +75,11 @@ client = ForecastClient(host='localhost', port=3000)
 # Run cross validation on your model
 cv_results = cross_validate(
     client,
+    dataset,
     start_date=pd.Timestamp("2018-01-01"),
     end_date=pd.Timestamp("2018-01-31"),
     horizon=pd.Timedelta("24 hours"),
     step=pd.Timedelta("1 day"),
-    y=y,
 )
 ```
 
@@ -89,7 +93,7 @@ from enfobench.evaluation.metrics import (
 )
 
 # Merge the cross validation results with the original data
-forecasts = cv_results.merge(data, on="ds", how="left")
+forecasts = cv_results.merge(dataset.target, on="ds", how="left")
 
 metrics = evaluate_metrics_on_forecasts(
     forecasts,
diff --git a/src/enfobench/__version__.py b/src/enfobench/__version__.py
index 485f44a..d3ec452 100644
--- a/src/enfobench/__version__.py
+++ b/src/enfobench/__version__.py
@@ -1 +1 @@
-__version__ = "0.1.1"
+__version__ = "0.2.0"
diff --git a/src/enfobench/evaluation/evaluate.py b/src/enfobench/evaluation/evaluate.py
index 5025dd1..dae0171 100644
--- a/src/enfobench/evaluation/evaluate.py
+++ b/src/enfobench/evaluation/evaluate.py
@@ -6,7 +6,7 @@
 
 from enfobench.evaluation.client import ForecastClient
 from enfobench.evaluation.protocols import Dataset, Model
-from enfobench.utils import steps_in_horizon
+from enfobench.evaluation.utils import steps_in_horizon
 
 
 def evaluate_metric_on_forecast(forecast: pd.DataFrame, metric: Callable) -> float:
diff --git a/src/enfobench/evaluation/utils.py b/src/enfobench/evaluation/utils.py
new file mode 100644
index 0000000..6a8f9a4
--- /dev/null
+++ b/src/enfobench/evaluation/utils.py
@@ -0,0 +1,102 @@
+import warnings
+
+import pandas as pd
+
+
+def steps_in_horizon(horizon: pd.Timedelta, freq: str) -> int:
+    """Return the number of steps in a given horizon.
+
+    Parameters
+    ----------
+    horizon:
+        The horizon to be split into steps.
+    freq:
+        The frequency of the horizon.
+
+    Returns
+    -------
+        The number of steps in the horizon.
+    """
+    freq = "1" + freq if not freq[0].isdigit() else freq
+    periods = horizon / pd.Timedelta(freq)
+    if not periods.is_integer():
+        raise ValueError("Horizon is not a multiple of the frequency")
+    return int(periods)
+
+
+def create_forecast_index(history: pd.DataFrame, horizon: int) -> pd.DatetimeIndex:
+    """Create time index for a forecast horizon.
+
+    Parameters
+    ----------
+    history:
+        The history of the time series.
+    horizon:
+        The forecast horizon.
+
+    Returns
+    -------
+        The time index for the forecast horizon.
+    """
+    last_date = history["ds"].iloc[-1]
+    inferred_freq = history["ds"].dt.freq
+    freq = "1" + inferred_freq if not inferred_freq[0].isdigit() else inferred_freq
+    return pd.date_range(
+        start=last_date + pd.Timedelta(freq),
+        periods=horizon,
+        freq=freq,
+    )
+
+
+def create_perfect_forecasts_from_covariates(
+    covariates: pd.DataFrame,
+    horizon: pd.Timedelta,
+    step: pd.Timedelta,
+    **kwargs,
+) -> pd.DataFrame:
+    """Create forecasts from covariates.
+
+    Sometimes external forecasts are not available for the entire horizon. This function creates
+    external forecast dataframe from external covariates as a perfect forecast.
+
+    Parameters
+    ----------
+    covariates:
+        The external covariates.
+    horizon:
+        The forecast horizon.
+    step:
+        The step size between forecasts.
+
+    Returns
+    -------
+        The external forecast dataframe.
+    """
+    if kwargs.get("start") is not None:
+        start = kwargs.get("start")
+    else:
+        start = covariates.index[0]
+
+    last_date = covariates.index[-1]
+
+    forecasts = []
+    while start + horizon <= last_date:
+        forecast = covariates.loc[
+            (covariates.index > start) & (covariates.index <= start + horizon)
+        ]
+        forecast.insert(0, "cutoff_date", start)
+        forecast.rename_axis("ds", inplace=True)
+        forecast.reset_index(inplace=True)
+
+        if len(forecast) == 0:
+            warnings.warn(
+                f"Covariates not found for {start} - {start + horizon}, cannot make forecast at step {start}",
+                UserWarning,
+                stacklevel=2,
+            )
+
+        forecasts.append(forecast)
+        start += step
+
+    forecast_df = pd.concat(forecasts, ignore_index=True)
+    return forecast_df
diff --git a/src/enfobench/utils.py b/src/enfobench/utils.py
deleted file mode 100644
index 702513c..0000000
--- a/src/enfobench/utils.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import pandas as pd
-from pandas import Timedelta
-
-
-def steps_in_horizon(horizon: Timedelta, freq: str) -> int:
-    """Return the number of steps in a given horizon."""
-    freq = "1" + freq if not freq[0].isdigit() else freq
-    periods = horizon / pd.Timedelta(freq)
-    assert periods.is_integer(), "Horizon is not a multiple of the frequency"
-    return int(periods)
-
-
-def create_forecast_index(history: pd.DataFrame, horizon: int) -> pd.DatetimeIndex:
-    last_date = history["ds"].iloc[-1]
-    inferred_freq = history["ds"].dt.freq
-    freq = "1" + inferred_freq if not inferred_freq[0].isdigit() else inferred_freq
-    return pd.date_range(
-        start=last_date + pd.Timedelta(freq),
-        periods=horizon,
-        freq=freq,
-    )
diff --git a/tests/conftest.py b/tests/conftest.py
index fc70d3f..39b0553 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,7 +3,7 @@
 import pytest
 
 from enfobench.evaluation import ForecasterType, ModelInfo
-from enfobench.utils import create_forecast_index
+from enfobench.evaluation.utils import create_forecast_index
 
 
 class TestModel:
@@ -43,7 +43,7 @@ def model():
 
 
 @pytest.fixture(scope="session")
-def target() -> pd.Series:
+def target() -> pd.DataFrame:
     index = pd.date_range("2020-01-01", "2020-02-01", freq="30T")
     y = pd.Series(np.random.random(len(index)), index=index)
     return y
diff --git a/tests/test_evaluations/__init__.py b/tests/test_evaluations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_dataset.py b/tests/test_evaluations/test_dataset.py
similarity index 100%
rename from tests/test_dataset.py
rename to tests/test_evaluations/test_dataset.py
diff --git a/tests/test_evaluate.py b/tests/test_evaluations/test_evaluate.py
similarity index 100%
rename from tests/test_evaluate.py
rename to tests/test_evaluations/test_evaluate.py
diff --git a/tests/test_metrics.py b/tests/test_evaluations/test_metrics.py
similarity index 100%
rename from tests/test_metrics.py
rename to tests/test_evaluations/test_metrics.py
diff --git a/tests/test_server.py b/tests/test_evaluations/test_server.py
similarity index 100%
rename from tests/test_server.py
rename to tests/test_evaluations/test_server.py
diff --git a/tests/test_evaluations/test_utils.py b/tests/test_evaluations/test_utils.py
new file mode 100644
index 0000000..c3a790b
--- /dev/null
+++ b/tests/test_evaluations/test_utils.py
@@ -0,0 +1,49 @@
+import pandas as pd
+import pytest
+
+from enfobench.evaluation import utils
+
+
+@pytest.mark.parametrize(
+    "horizon, freq, expected",
+    [
+        ("1 day", "15T", 96),
+        ("1 day", "1H", 24),
+        ("7 days", "1H", 7 * 24),
+        ("1D", "1D", 1),
+        ("1H", "1H", 1),
+    ],
+)
+def test_steps_in_horizon(horizon, freq, expected):
+    assert utils.steps_in_horizon(pd.Timedelta(horizon), freq) == expected
+
+
+def test_steps_in_horizon_raises_with_non_multiple_horizon():
+    with pytest.raises(ValueError):
+        utils.steps_in_horizon(pd.Timedelta("36 minutes"), "15T")
+
+
+def test_create_forecast_index(target):
+    history = target.to_frame("y").rename_axis("ds").reset_index()
+    horizon = 96
+    last_date = history["ds"].iloc[-1]
+
+    index = utils.create_forecast_index(history, horizon)
+
+    assert isinstance(index, pd.DatetimeIndex)
+    assert index.freq == target.index.freq
+    assert len(index) == horizon
+    assert all(idx > last_date for idx in index)
+
+
+def test_create_perfect_forecasts_from_covariates(covariates):
+    forecasts = utils.create_perfect_forecasts_from_covariates(
+        covariates,
+        horizon=pd.Timedelta("7 days"),
+        step=pd.Timedelta("1D"),
+    )
+
+    assert isinstance(forecasts, pd.DataFrame)
+    assert "ds" in forecasts.columns
+    assert "cutoff_date" in forecasts.columns
+    assert all(col in forecasts.columns for col in covariates.columns)