diff --git a/README.md b/README.md index f51ed3b..a0d0ea1 100644 --- a/README.md +++ b/README.md @@ -46,18 +46,19 @@ and download the files from the data folder to your computer. ```python import pandas as pd -from enfobench.dataset import Dataset, DemandDataset +from enfobench import Dataset +from enfobench.datasets import ElectricityDemandDataset from enfobench.evaluation import cross_validate, evaluate_metrics from enfobench.evaluation.metrics import mean_bias_error, mean_absolute_error, root_mean_squared_error # Load the dataset from the folder that you downloaded the files to. -ds = DemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets") +ds = ElectricityDemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets") # List all meter ids -ds.metadata_subset.list_unique_ids() +ds.list_unique_ids() # Get one of the meter ids -unique_id = ds.metadata_subset.list_unique_ids()[0] +unique_id = ds.list_unique_ids()[0] # Get dataset for a specific meter id target, past_covariates, metadata = ds.get_data_by_unique_id(unique_id) diff --git a/docs/docs/examples.md b/docs/docs/examples.md index bea4d84..f0c84ad 100644 --- a/docs/docs/examples.md +++ b/docs/docs/examples.md @@ -12,11 +12,11 @@ The examples models are found in the `models/` folder, and the example notebooks The repository follows this structure: ``` -├── README.md <- The top-level README for getting started. ├── data -│ ├── demand.parquet <- Demand data subset. -│ ├── metadata.parquet <- Metadata subset. -│ └── weather.parquet <- Weather data subset. +│ └── electricity-demand <- Electricity demand dataset. +│ ├── demand.parquet <- Demand data subset. +│ ├── metadata.parquet <- Metadata subset. +│ └── weather.parquet <- Weather data subset. │ ├── models <- Example models each in its own subfolder. │ ├── sf-naive-seasonal <- Naive seasonal model based on statsforecast package. @@ -28,9 +28,9 @@ The repository follows this structure: ├── notebooks <- Jupyter notebooks, should be read in order. │ ├── 01. Univariate.ipynb <- Simple univariate forecast model benchmarking example. │ ├── 02. Multivariate.ipynb <- Multivariate forecast model benchmarking example. -│ └── 02. ForecastClient.ipynb <- Benchmarking using the ForecastClient example. +│ └── 03. ForecastClient.ipynb <- Benchmarking using the ForecastClient example. │ -└── requirements.txt <- Overall requirements to run all the example notebooks. +└── README.md <- The top-level README for getting started. ``` ## Requirements @@ -52,7 +52,7 @@ pip install notebook enfobench ``` To run the notebooks, you also need the HuggingFace dataset [attila-balint-kul/electricity-demand](https://huggingface.co/datasets/attila-balint-kul/electricity-demand). -Download all three files from the `data/` folder into the `data/` folder of this repository. +Download all three files from the `data/` folder into the `data/electicity-demand/` folder of this repository. Run the example notebooks in the `notebooks` folder. diff --git a/docs/docs/index.md b/docs/docs/index.md index 528a09b..cd88e1e 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -33,7 +33,7 @@ Load your own data and create a dataset. ```python import pandas as pd -from enfobench.dataset import Dataset +from enfobench import Dataset # Load your datasets data = pd.read_csv("../path/to/your/data.csv", parse_dates=['timestamp'], index_col='timestamp') @@ -46,7 +46,7 @@ past_covariates = data.loc[:, ['covariate_1', 'covariate_2']] # As sometimes it can be challenging to access historical forecasts to use future covariates, # the package also has a helper function to create perfect historical forecasts from the past covariates. -from enfobench.dataset.utils import create_perfect_forecasts_from_covariates +from enfobench.datasets.utils import create_perfect_forecasts_from_covariates # The example below creates simulated perfect historical forecasts with a horizon of 24 hours and a step of 1 day. future_covariates = create_perfect_forecasts_from_covariates( @@ -56,7 +56,7 @@ future_covariates = create_perfect_forecasts_from_covariates( ) dataset = Dataset( - target=data['target_column'], + target=target, past_covariates=past_covariates, future_covariates=future_covariates, ) @@ -66,13 +66,14 @@ The package integrates with the HuggingFace Dataset ['attila-balint-kul/electric To use this, just download all the files from the data folder to your computer. ```python -from enfobench.dataset import Dataset, DemandDataset +from enfobench import Dataset +from enfobench.datasets import ElectricityDemandDataset # Load the dataset from the folder that you downloaded the files to. -ds = DemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets") +ds = ElectricityDemandDataset("/path/to/the/dataset/folder/that/contains/all/subsets") # List all meter ids -ds.metadata_subset.list_unique_ids() +ds.list_unique_ids() # Get dataset for a specific meter id target, past_covariates, metadata = ds.get_data_by_unique_id("unique_id_of_the_meter") @@ -101,8 +102,8 @@ model = MyModel() cv_results = cross_validate( model, dataset, - start_date=pd.Timestamp("2018-01-01"), - end_date=pd.Timestamp("2018-01-31"), + start_date=pd.Timestamp("2018-01-01T00:00:00"), + end_date=pd.Timestamp("2018-01-31T00:00:00"), horizon=pd.Timedelta("24 hours"), step=pd.Timedelta("1 day"), ) @@ -153,7 +154,7 @@ metrics = evaluate_metrics( ) ``` -In order to serve your model behind an API, you can use the built in server factory. +In order to serve your model behind an API, you can use the built-in server factory. ```python import uvicorn diff --git a/pyproject.toml b/pyproject.toml index 5d83373..0704c12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ python = ["3.10", "3.11"] [tool.hatch.envs.lint] detached = true dependencies = [ - "black>=23.1.0", + "black[jupyter]>=23.1.0", "mypy>=1.6.0", "ruff>=0.1.5", ]