ported examples from separate repository and added them to documentation

attila-balint-kul · Nov 30, 2023 · ad63b33 · ad63b33
1 parent 74cb9af
commit ad63b33
Show file tree

Hide file tree

Showing 44 changed files with 3,419 additions and 116 deletions.
diff --git a/Makefile b/Makefile
@@ -1,13 +1,11 @@
-.PHONY: install clean lint style format test build publish publish-test
-
 #################################################################################
 # GLOBALS                                                                       #
 #################################################################################
 
 PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
-PROJECT_NAME = energy-forecat-benchmark-toolkit
+PROJECT_NAME = energy-forecast-benchmark-toolkit
 PACKAGE_NAME = enfobench
-PYTHON_INTERPRETER = python3
+PYTHON_INTERPRETER ?= python3
 
 #################################################################################
 # COMMANDS                                                                      #
@@ -21,52 +19,103 @@ venv/bin/python:
 		pip install --upgrade pip; \
 	)
 
+.PHONY: install
 ## Install project dependencies
 install: venv/bin/python
 	(\
 		source $(PROJECT_DIR)/venv/bin/activate; \
 		pip install -e .; \
     )
 
+.PHONY: clean
 ## Delete all compiled Python files
 clean:
 	find . -type f -name "*.py[co]" -delete
 	find . -type d -name "__pycache__" -delete
 
+.PHONY: lint
 ## Lint using ruff, mypy, black, and isort
 lint:
 	hatch run lint:all
 
-
+.PHONY: style
 ## Check style using ruff, black, and isort
 style:
 	hatch run lint:style
 
+.PHONY: format
 ## Format using black
 format:
 	hatch run lint:fmt
 
+.PHONY: tests
 ## Run pytest with coverage
-test:
+tests:
 	hatch run cov
 
+.PHONY: docs
+## Create documentation
+docs:
+	hatch run docs:serve
+
+.PHONY: docs-build
+## Build documentation
+docs-build:
+	hatch run docs:build
+
 #################################################################################
-# PROJECT RULES                                                                 #
+# PACKAGING RULES                                                               #
 #################################################################################
 
+.PHONY: build
 ## Build source distribution and wheel
 build: style
 	hatch build
 
+.PHONY: publish
 ## Upload source distribution and wheel to PyPI
 publish: build
 	hatch publish --repo main
 
+.PHONY: publish-test
 ## Upload source distribution and wheel to TestPyPI
 publish-test: build
 	hatch publish --repo test
 
 
+#################################################################################
+# MODEL RULES                                                                   #
+#################################################################################
+
+DOCKER_HUB_REPOSITORY := $(DOCKER_HUB_REPOSITORY)
+ENFOBENCH_VERSION := $(shell hatch version)
+MODEL_NAME := sf-naive
+IMAGE_TAG := $(ENFOBENCH_VERSION)-$(MODEL_NAME)
+DEFAULT_PORT := 3000
+
+.PHONY: image
+## Create docker image
+image:
+	docker build -t $(DOCKER_HUB_REPOSITORY):$(IMAGE_TAG) ./models/$(MODEL_NAME)
+
+.PHONY: push-image
+## Push docker image to Docker Hub
+push-image: image
+	docker push $(DOCKER_HUB_REPOSITORY):$(IMAGE_TAG)
+
+.PHONY: run-image
+run-image: image
+	docker run -it --rm -p $(DEFAULT_PORT):3000 $(DOCKER_HUB_REPOSITORY):$(IMAGE_TAG)
+
+
+MODELS = $(shell ls -d ./models/* | xargs -n 1 basename)
+images:
+	$(foreach var,$(MODELS), $(MAKE) image MODEL_NAME=$(var);)
+
+push-images:
+	$(foreach var,$(MODELS), $(MAKE) push-image MODEL_NAME=$(var);)
+
+
 #################################################################################
 # Self Documenting Commands                                                     #
 #################################################################################

diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
-Energy  Forecast Benchmark Toolkit
-==============================
+# Energy  Forecast Benchmark Toolkit
 
 [![PyPI version](https://badge.fury.io/py/enfobench.svg)](https://badge.fury.io/py/enfobench)
 [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
@@ -10,13 +9,23 @@ Energy  Forecast Benchmark Toolkit
 Energy Forecast Benchmark Toolkit is a Python project that aims to provide common tools to
 benchmark forecast models.
 
+---
+
+**Documentation**: https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/
+
+**Source code**: https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit
+
+---
+
 ## Table of Contents
 
 - [Installation](#installation)
 - [Usage](#usage)
 - [Contributing](#contributing)
 - [License](#license)
 
+---
+
 ## Installation
 
 Use the package manager pip to install foobar.
@@ -27,54 +36,27 @@ pip install enfobench
 
 ## Usage
 
-Load your own data and create a dataset.
+Download the HuggingFace Dataset ['attila-balint-kul/electricity-demand'](https://huggingface.co/datasets/attila-balint-kul/electricity-demand),
+and download the files from the data folder to your computer.
 
 ```python
 import pandas as pd
 
-from enfobench.dataset import Dataset
-
-# Load your datasets
-data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp')
-
-# Create a target DataFrame that has a pd.DatetimeIndex and a column named 'y'
-target = data.loc[:, ['target_column']].rename(columns={'target_column': 'y'})
-
-# Add covariates that can be used as past covariates. This also has to have a pd.DatetimeIndex
-past_covariates = data.loc[:, ['covariate_1', 'covariate_2']]
-
-# As sometimes it can be challenging to access historical forecasts to use future covariates, 
-# the package also has a helper function to create perfect historical forecasts from the past covariates.
-from enfobench.dataset.utils import create_perfect_forecasts_from_covariates
-
-# The example below creates simulated perfect historical forecasts with a horizon of 24 hours and a step of 1 day.
-future_covariates = create_perfect_forecasts_from_covariates(
-    past_covariates,
-    horizon=pd.Timedelta("24 hours"),
-    step=pd.Timedelta("1 day"),
-)
-
-dataset = Dataset(
-    target=data['target_column'],
-    past_covariates=past_covariates,
-    future_covariates=future_covariates,
-)
-```
-
-The package integrates with the HuggingFace Dataset ['attila-balint-kul/electricity-demand'](https://huggingface.co/datasets/attila-balint-kul/electricity-demand). 
-To use this, just download all the files from the data folder to your computer.
-
-```python
 from enfobench.dataset import Dataset, DemandDataset
+from enfobench.evaluation import cross_validate, evaluate_metrics
+from enfobench.evaluation.metrics import mean_bias_error, mean_absolute_error, root_mean_squared_error
 
 # Load the dataset from the folder that you downloaded the files to.
 ds = DemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets")
 
 # List all meter ids
 ds.metadata_subset.list_unique_ids()
 
+# Get one of the meter ids
+unique_id = ds.metadata_subset.list_unique_ids()[0]
+
 # Get dataset for a specific meter id
-target, past_covariates, metadata = ds.get_data_by_unique_id("unique_id_of_the_meter")
+target, past_covariates, metadata = ds.get_data_by_unique_id(unique_id)
 
 # Create a dataset
 dataset = Dataset(
@@ -83,18 +65,9 @@ dataset = Dataset(
     future_covariates=None,
     metadata=metadata
 )
-```
-
-
-You can perform a cross validation on any model locally that adheres to the `enfobench.Model` protocol.
-
-```python
-import MyModel
-import pandas as pd
-from enfobench.evaluation import cross_validate
 
 # Import your model and instantiate it
-model = MyModel()
+model = MyForecastModel()
 
 # Run cross validation on your model
 cv_results = cross_validate(
@@ -105,72 +78,24 @@ cv_results = cross_validate(
     horizon=pd.Timedelta("24 hours"),
     step=pd.Timedelta("1 day"),
 )
-```
-
-You can use the same crossvalidation interface with your model served behind an API. 
-To make this simple, both a client and a server are provided.
-
-```python
-import pandas as pd
-from enfobench.evaluation import cross_validate, ForecastClient
-
-# Import your model and instantiate it
-client = ForecastClient(host='localhost', port=3000)
-
-# Run cross validation on your model
-cv_results = cross_validate(
-    client,
-    dataset,
-    start_date=pd.Timestamp("2018-01-01"),
-    end_date=pd.Timestamp("2018-01-31"),
-    horizon=pd.Timedelta("24 hours"),
-    step=pd.Timedelta("1 day"),
-)
-```
-
-The package also collects common metrics used in forecasting.
-
-```python
-from enfobench.evaluation import evaluate_metrics
-
-from enfobench.evaluation.metrics import (
-    mean_bias_error,
-    mean_absolute_error,
-    mean_squared_error,
-    root_mean_squared_error,
-)
 
 # Simply pass in the cross validation results and the metrics you want to evaluate.
 metrics = evaluate_metrics(
     cv_results,
     metrics={
-        "mean_bias_error": mean_bias_error,
-        "mean_absolute_error": mean_absolute_error,
-        "mean_squared_error": mean_squared_error,
-        "root_mean_squared_error": root_mean_squared_error,
+        "MBE": mean_bias_error,
+        "MAE": mean_absolute_error,
+        "RMSE": root_mean_squared_error,
     },
 )
 ```
 
-In order to serve your model behind an API, you can use the built in server factory.
-
-```python
-import uvicorn
-from enfobench.evaluation.server import server_factory
-
-model = MyModel()
-
-# Create a server that serves your model
-server = server_factory(model)
-uvicorn.run(server, port=3000)
-```
+To get started with some examples check out the `models` folder and the [examples](https://attila-balint-kul.github.io/energy-forecast-benchmark-toolkit/examples) section of the documentation.
 
 ## Benchmarking
 
-The package also provides a benchmarking framework that can be used to benchmark your model against
-other models. There are some example models in [this repository](https://github.com/attila-balint-kul/energy-forecast-benchmark-examples).
-
-The results of the benchmarking are openly accessible [here](https://wandb.ai/attila-balint-kul/load-forecasting-competition/reports/Enfobench-Dashboard--Vmlldzo2MDM0ODE2#models).
+Once confident in your model, you can submit for evaluation.
+The results of the benchmarks are openly accessible [here](https://wandb.ai/attila-balint-kul/load-forecasting-competition/reports/Enfobench-Dashboard--Vmlldzo2MDM0ODE2#models).
 
 
 ## Contributing

diff --git a/data/.gitignore b/data/.gitignore
@@ -0,0 +1 @@
+*.parquet
diff --git a/data/.gitkeep b/data/.gitkeep
diff --git a/docs/docs/examples.md b/docs/docs/examples.md
@@ -0,0 +1,80 @@
+---
+hide:
+  - navigation
+---
+# Examples
+
+This repository contains example models and notebooks to get started with the benchmark toolkit.
+The examples models are found in the `models/` folder, and the example notebooks are in the `notebooks/` folder.
+
+## Folder Structure
+
+The repository follows this structure:
+
+```
+├── README.md                       <- The top-level README for getting started.
+├── data
+│   ├── demand.parquet              <- Demand data subset.
+│   ├── metadata.parquet            <- Metadata subset.
+│   └── weather.parquet             <- Weather data subset.
+│
+├── models                          <- Example models each in its own subfolder.
+│   ├── sf-naive-seasonal           <- Naive seasonal model based on statsforecast package.
+│   │   ├── src                     <- Source code for the model.
+│   │   │   └── main.py             <- Entrypoint for the forecast server.
+│   ├── Dockerfile                  <- Example Dockerfile for the model. 
+│   └── requirements.txt            <- Model's requirements.
+│
+├── notebooks                       <- Jupyter notebooks, should be read in order.
+│   ├── 01. Univariate.ipynb        <- Simple univariate forecast model benchmarking example.
+│   ├── 02. Multivariate.ipynb      <- Multivariate forecast model benchmarking example.
+│   └── 02. ForecastClient.ipynb    <- Benchmarking using the ForecastClient example.
+│
+└── requirements.txt                <- Overall requirements to run all the example notebooks.
+```
+
+## Requirements
+
+To contribute models to the benchmark, you need to have Docker installed. 
+Follow the installation procedure for your platform on the [docker website](https://www.docker.com/products/docker-desktop/).
+
+## Getting Started
+
+Clone this repository:
+```bash
+git clone https://github.com/attila-balint-kul/energy-forecast-benchmark-toolkit
+cd energy-forecast-benchmark-toolkit
+```
+
+Install the requirements (recommended inside a virtual environment):
+```bash
+pip install notebook enfobench
+```
+
+To run the notebooks, you also need the HuggingFace dataset [attila-balint-kul/electricity-demand](https://huggingface.co/datasets/attila-balint-kul/electricity-demand).
+Download all three files from the `data/` folder into the `data/` folder of this repository.
+
+Run the example notebooks in the `notebooks` folder.
+
+## Creating a Model
+
+To create a model, use the `models/sf-naive/` folder as a template. 
+If you follow the folder structure, have a `requirements.txt` file, 
+and all your source code is inside the `src/` folder, there is generally 
+no need to change the `Dockerfile`.
+Once your model is ready, you can build the docker image:
+
+```bash
+docker build -t tag-that-identifies-the-model ./path/to/the/folder/containing/the/Dockerfile
+```
+
+To run the Docker image:
+```bash
+docker run -p 3000:3000 tag-that-identifies-the-model
+```
+
+Then you can test your model by using the `03. ForecastClient.ipynb` notebook.
+
+Once the model is tested, push it to any public Docker registry 
+(e.g., DockerHub). Contact us with the repository and model tag, 
+and we will add it to the [dashboard](https://wandb.ai/attila-balint-kul/load-forecasting-competition/reports/Enfobench-Dashboard--Vmlldzo2MDM0ODE2#models).