diff --git a/docs/architecture_methodology_components.rst b/docs/architecture_methodology_components.rst index a097900c1..d8576ee37 100644 --- a/docs/architecture_methodology_components.rst +++ b/docs/architecture_methodology_components.rst @@ -33,7 +33,6 @@ OpenSTEF contains: * **Machine learning**: is called by pipelines to perform training, forecasting, or evaluation based on the configuration from the prediction job (e.g. train an XGB quantile model). * **Model storage**: is called by pipelines to store or fetch trained machine learning model with MLFlow (e.g. store model locally in disk/database/s3_bucket on AWS). * **Post processing**: is called by pipelines to post process forecasting (e.g. combine forecast dataframe with extra configuration information from prediction job or split load forecast into solar, wind, and energy usage forecast). -* **Post processing**: is called by pipelines to post process forecasting (e.g. combine forecast dataframe with extra configuration information from prediction job or split load forecast into solar, wind, and energy usage forecast). Tasks are provided in a separate Python package called `openstef-dbc `_. If you need to use tasks, the openstef-dbc package is required in order to interface to databases for reading/writing. diff --git a/openstef/feature_engineering/apply_features.py b/openstef/feature_engineering/apply_features.py index 7e901fffc..3e9de085d 100644 --- a/openstef/feature_engineering/apply_features.py +++ b/openstef/feature_engineering/apply_features.py @@ -40,13 +40,14 @@ def apply_features( pj: PredictionJobDataClass = None, feature_names: list[str] = None, horizon: float = 24.0, + years: list[int] | None = None, ) -> pd.DataFrame: """Applies the feature functions defined in ``feature_functions.py`` and returns the complete dataframe. Features requiring more recent label-data are omitted. .. note:: - For the time deriven features only the onces in the features list will be added. But for the weather features all will be added at present. + For the time derived features only the ones in the features list will be added. But for the weather features all will be added at present. These unrequested additional features have to be filtered out later. Args: @@ -56,8 +57,9 @@ def apply_features( columns=[label, predictor_1,..., predictor_n] ) pj (PredictionJobDataClass): Prediction job. - feature_names (list[str]): list of reuqested features + feature_names (list[str]): list of requested features horizon (float): Forecast horizon limit in hours. + years (list[int] | None): years for which to create holiday features. Returns: pd.DataFrame(index = datetime, columns = [label, predictor_1,..., predictor_n, feature_1, ..., feature_m]) @@ -100,7 +102,7 @@ def apply_features( # Get holiday feature functions feature_functions.update( - generate_holiday_feature_functions(country_code=country_code) + generate_holiday_feature_functions(country_code=country_code, years=years) ) # Add the features to the dataframe using previously defined feature functions diff --git a/openstef/feature_engineering/holiday_features.py b/openstef/feature_engineering/holiday_features.py index 53260c57d..c4315bf08 100644 --- a/openstef/feature_engineering/holiday_features.py +++ b/openstef/feature_engineering/holiday_features.py @@ -15,7 +15,7 @@ def generate_holiday_feature_functions( country_code: str = "NL", - years: list = None, + years: list[int] | None = None, path_to_school_holidays_csv: str = HOLIDAY_CSV_PATH, ) -> dict: """Generates functions for creating holiday feature. @@ -48,9 +48,11 @@ def generate_holiday_feature_functions( The 'Brugdagen' are updated untill dec 2020. (Generated using agenda) Args: - country: Country for which to create holiday features. - years: years for which to create holiday features. + country_code: Country for which to create holiday features. + years: years for which to create holiday features. If None, + the last 4 years, the current and next year are used. path_to_school_holidays_csv: Filepath to csv with school holidays. + NOTE: Dutch holidays csv file is only until January 2026. Returns: diff --git a/openstef/model/fallback.py b/openstef/model/fallback.py index 924e091f1..68ab91701 100644 --- a/openstef/model/fallback.py +++ b/openstef/model/fallback.py @@ -65,7 +65,7 @@ def generate_fallback( .merge( highest_daily_loadprofile, left_on="time", right_on="time", how="outer" ) - .set_index("index") + .set_index(forecast.index) ) # Rename so column is called forecast diff --git a/setup.py b/setup.py index 1a37b2cd3..cbe4217f4 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ def read_long_description_from_readme(): setup( name="openstef", - version="3.4.48", + version="3.4.50", packages=find_packages(include=["openstef", "openstef.*"]), description="Open short term energy forecaster", long_description=read_long_description_from_readme(), diff --git a/test/unit/feature_engineering/test_apply_features.py b/test/unit/feature_engineering/test_apply_features.py index a35118544..5a8c2c56c 100644 --- a/test/unit/feature_engineering/test_apply_features.py +++ b/test/unit/feature_engineering/test_apply_features.py @@ -198,7 +198,7 @@ def test_apply_holiday_features(self): } input_data_with_features = apply_features.apply_features( - pj=pj, data=input_data, horizon=24 + pj=pj, data=input_data, horizon=24, years=[2020, 2021, 2022, 2023] ) expected = TestData.load("../data/input_data_with_holiday_features.csv")