major refactor to integrate huggingface dataset

attila-balint-kul · Nov 16, 2023 · 1906306 · 1906306
1 parent 3b65bc4
commit 1906306
Show file tree

Hide file tree

Showing 22 changed files with 1,087 additions and 732 deletions.
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: install clean format lint tests build publish publish-test
+.PHONY: install clean lint style format test build publish publish-test
 
 #################################################################################
 # GLOBALS                                                                       #
@@ -7,45 +7,55 @@
 PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
 PROJECT_NAME = energy-forecat-benchmark-toolkit
 PACKAGE_NAME = enfobench
+PYTHON_INTERPRETER = python3
 
 #################################################################################
 # COMMANDS                                                                      #
 #################################################################################
 
+## Create python virtual environment
+venv/bin/python:
+	( \
+		$(PYTHON_INTERPRETER) -m venv $(PROJECT_DIR)/venv; \
+		source $(PROJECT_DIR)/venv/bin/activate; \
+		pip install --upgrade pip; \
+	)
+
 ## Install project dependencies
-install:
-	pip install -U pip
-	pip install -e ."[test,dev]"
-	mypy --install-types
+install: venv/bin/python
+	(\
+		source $(PROJECT_DIR)/venv/bin/activate; \
+		pip install -e .; \
+    )
 
 ## Delete all compiled Python files
 clean:
 	find . -type f -name "*.py[co]" -delete
 	find . -type d -name "__pycache__" -delete
 
+## Lint using ruff, mypy, black, and isort
+lint:
+	hatch run lint:all
+
+
+## Check style using ruff, black, and isort
+style:
+	hatch run lint:style
+
 ## Format using black
 format:
-	ruff src tests --fix
-	black src tests
-	isort src tests
-
-## Lint using ruff, mypy, black, and isort
-lint: format
-	mypy src
-	ruff src tests
-	black src tests --check
-	isort src tests --check-only
+	hatch run lint:fmt
 
 ## Run pytest with coverage
-tests:
-	pytest src tests
+test:
+	hatch run cov
 
 #################################################################################
 # PROJECT RULES                                                                 #
 #################################################################################
 
 ## Build source distribution and wheel
-build: lint tests
+build: style
 	hatch build
 
 ## Upload source distribution and wheel to PyPI

diff --git a/README.md b/README.md
@@ -32,22 +32,65 @@ Load your own data and create a dataset.
 ```python
 import pandas as pd
 
-from enfobench.evaluation import Dataset
+from enfobench.dataset import Dataset
 
-# Load your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'
+# Load your datasets
 data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp')
-covariates = data.drop(columns=['target_column'])
+
+# Create a target DataFrame that has a pd.DatetimeIndex and a column named 'y'
+target = data.loc[:, ['target_column']].rename(columns={'target_column': 'y'})
+
+# Add covariates that can be used as past covariates. This also has to have a pd.DatetimeIndex
+past_covariates = data.loc[:, ['covariate_1', 'covariate_2']]
+
+# As sometimes it can be challenging to access historical forecasts to use future covariates, 
+# the package also has a helper function to create perfect historical forecasts from the past covariates.
+from enfobench.dataset.utils import create_perfect_forecasts_from_covariates
+
+# The example below creates simulated perfect historical forecasts with a horizon of 24 hours and a step of 1 day.
+future_covariates = create_perfect_forecasts_from_covariates(
+    past_covariates,
+    horizon=pd.Timedelta("24 hours"),
+    step=pd.Timedelta("1 day"),
+)
 
 dataset = Dataset(
     target=data['target_column'],
-    covariates=covariates,
+    past_covariates=past_covariates,
+    future_covariates=future_covariates,
+)
+```
+
+The package integrates with the HuggingFace Dataset ['attila-balint-kul/electricity-demand'](https://huggingface.co/datasets/attila-balint-kul/electricity-demand). 
+To use this, just download all the files from the data folder to your computer.
+
+```python
+from enfobench.dataset import Dataset, DemandDataset
+
+# Load the dataset from the folder that you downloaded the files to.
+ds = DemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets")
+
+# List all meter ids
+ds.metadata_subset.list_unique_ids()
+
+# Get dataset for a specific meter id
+target, past_covariates, metadata = ds.get_data_by_unique_id("unique_id_of_the_meter")
+
+# Create a dataset
+dataset = Dataset(
+    target=target,
+    past_covariates=past_covariates,
+    future_covariates=None,
+    metadata=metadata
 )
 ```
 
+
 You can perform a cross validation on any model locally that adheres to the `enfobench.Model` protocol.
 
 ```python
 import MyModel
+import pandas as pd
 from enfobench.evaluation import cross_validate
 
 # Import your model and instantiate it
@@ -64,9 +107,11 @@ cv_results = cross_validate(
 )
 ```
 
-You can use the same crossvalidation interface with your model served behind an API.
+You can use the same crossvalidation interface with your model served behind an API. 
+To make this simple, both a client and a server are provided.
 
 ```python
+import pandas as pd
 from enfobench.evaluation import cross_validate, ForecastClient
 
 # Import your model and instantiate it
@@ -83,20 +128,21 @@ cv_results = cross_validate(
 )
 ```
 
-The package also collects common metrics for you that you can quickly evaluate on your results.
+The package also collects common metrics used in forecasting.
 
 ```python
 from enfobench.evaluation import evaluate_metrics_on_forecasts
 
 from enfobench.evaluation.metrics import (
-    mean_bias_error, mean_absolute_error, mean_squared_error, root_mean_squared_error,
+    mean_bias_error, 
+    mean_absolute_error, 
+    mean_squared_error, 
+    root_mean_squared_error,
 )
 
-# Merge the cross validation results with the original data
-forecasts = cv_results.merge(dataset.target, on="ds", how="left")
-
+# Simply pass in the cross validation results and the metrics you want to evaluate.
 metrics = evaluate_metrics_on_forecasts(
-    forecasts,
+    cv_results,
     metrics={
         "mean_bias_error": mean_bias_error,
         "mean_absolute_error": mean_absolute_error,
@@ -106,6 +152,19 @@ metrics = evaluate_metrics_on_forecasts(
 )
 ```
 
+In order to serve your model behind an API, you can use the built in server factory.
+
+```python
+import uvicorn
+from enfobench.evaluation.server import server_factory
+
+model = MyModel()
+
+# Create a server that serves your model
+server = server_factory(model)
+uvicorn.run(server, port=3000)
+```
+
 ## Contributing
 
 Contributions and feedback are welcome! For major changes, please open an issue first to discuss
@@ -121,4 +180,4 @@ Submit a pull request describing your changes.
 
 ## License
 
-BSD 3-Clause License
+BSD 2-Clause License