Skip to content

Commit

Permalink
major refactor to integrate huggingface dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
attila-balint-kul committed Nov 16, 2023
1 parent 3b65bc4 commit 1906306
Show file tree
Hide file tree
Showing 22 changed files with 1,087 additions and 732 deletions.
46 changes: 28 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: install clean format lint tests build publish publish-test
.PHONY: install clean lint style format test build publish publish-test

#################################################################################
# GLOBALS #
Expand All @@ -7,45 +7,55 @@
PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
PROJECT_NAME = energy-forecat-benchmark-toolkit
PACKAGE_NAME = enfobench
PYTHON_INTERPRETER = python3

#################################################################################
# COMMANDS #
#################################################################################

## Create python virtual environment
venv/bin/python:
( \
$(PYTHON_INTERPRETER) -m venv $(PROJECT_DIR)/venv; \
source $(PROJECT_DIR)/venv/bin/activate; \
pip install --upgrade pip; \
)

## Install project dependencies
install:
pip install -U pip
pip install -e ."[test,dev]"
mypy --install-types
install: venv/bin/python
(\
source $(PROJECT_DIR)/venv/bin/activate; \
pip install -e .; \
)

## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete

## Lint using ruff, mypy, black, and isort
lint:
hatch run lint:all


## Check style using ruff, black, and isort
style:
hatch run lint:style

## Format using black
format:
ruff src tests --fix
black src tests
isort src tests

## Lint using ruff, mypy, black, and isort
lint: format
mypy src
ruff src tests
black src tests --check
isort src tests --check-only
hatch run lint:fmt

## Run pytest with coverage
tests:
pytest src tests
test:
hatch run cov

#################################################################################
# PROJECT RULES #
#################################################################################

## Build source distribution and wheel
build: lint tests
build: style
hatch build

## Upload source distribution and wheel to PyPI
Expand Down
83 changes: 71 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,65 @@ Load your own data and create a dataset.
```python
import pandas as pd

from enfobench.evaluation import Dataset
from enfobench.dataset import Dataset

# Load your dataset and make sure that the timestamp column in named 'ds' and the target values named 'y'
# Load your datasets
data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp')
covariates = data.drop(columns=['target_column'])

# Create a target DataFrame that has a pd.DatetimeIndex and a column named 'y'
target = data.loc[:, ['target_column']].rename(columns={'target_column': 'y'})

# Add covariates that can be used as past covariates. This also has to have a pd.DatetimeIndex
past_covariates = data.loc[:, ['covariate_1', 'covariate_2']]

# As sometimes it can be challenging to access historical forecasts to use future covariates,
# the package also has a helper function to create perfect historical forecasts from the past covariates.
from enfobench.dataset.utils import create_perfect_forecasts_from_covariates

# The example below creates simulated perfect historical forecasts with a horizon of 24 hours and a step of 1 day.
future_covariates = create_perfect_forecasts_from_covariates(
past_covariates,
horizon=pd.Timedelta("24 hours"),
step=pd.Timedelta("1 day"),
)

dataset = Dataset(
target=data['target_column'],
covariates=covariates,
past_covariates=past_covariates,
future_covariates=future_covariates,
)
```

The package integrates with the HuggingFace Dataset ['attila-balint-kul/electricity-demand'](https://huggingface.co/datasets/attila-balint-kul/electricity-demand).
To use this, just download all the files from the data folder to your computer.

```python
from enfobench.dataset import Dataset, DemandDataset

# Load the dataset from the folder that you downloaded the files to.
ds = DemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets")

# List all meter ids
ds.metadata_subset.list_unique_ids()

# Get dataset for a specific meter id
target, past_covariates, metadata = ds.get_data_by_unique_id("unique_id_of_the_meter")

# Create a dataset
dataset = Dataset(
target=target,
past_covariates=past_covariates,
future_covariates=None,
metadata=metadata
)
```


You can perform a cross validation on any model locally that adheres to the `enfobench.Model` protocol.

```python
import MyModel
import pandas as pd
from enfobench.evaluation import cross_validate

# Import your model and instantiate it
Expand All @@ -64,9 +107,11 @@ cv_results = cross_validate(
)
```

You can use the same crossvalidation interface with your model served behind an API.
You can use the same crossvalidation interface with your model served behind an API.
To make this simple, both a client and a server are provided.

```python
import pandas as pd
from enfobench.evaluation import cross_validate, ForecastClient

# Import your model and instantiate it
Expand All @@ -83,20 +128,21 @@ cv_results = cross_validate(
)
```

The package also collects common metrics for you that you can quickly evaluate on your results.
The package also collects common metrics used in forecasting.

```python
from enfobench.evaluation import evaluate_metrics_on_forecasts

from enfobench.evaluation.metrics import (
mean_bias_error, mean_absolute_error, mean_squared_error, root_mean_squared_error,
mean_bias_error,
mean_absolute_error,
mean_squared_error,
root_mean_squared_error,
)

# Merge the cross validation results with the original data
forecasts = cv_results.merge(dataset.target, on="ds", how="left")

# Simply pass in the cross validation results and the metrics you want to evaluate.
metrics = evaluate_metrics_on_forecasts(
forecasts,
cv_results,
metrics={
"mean_bias_error": mean_bias_error,
"mean_absolute_error": mean_absolute_error,
Expand All @@ -106,6 +152,19 @@ metrics = evaluate_metrics_on_forecasts(
)
```

In order to serve your model behind an API, you can use the built in server factory.

```python
import uvicorn
from enfobench.evaluation.server import server_factory

model = MyModel()

# Create a server that serves your model
server = server_factory(model)
uvicorn.run(server, port=3000)
```

## Contributing

Contributions and feedback are welcome! For major changes, please open an issue first to discuss
Expand All @@ -121,4 +180,4 @@ Submit a pull request describing your changes.

## License

BSD 3-Clause License
BSD 2-Clause License
Loading

0 comments on commit 1906306

Please sign in to comment.