diff --git a/.github/workflows/run-forecast-unit-tests.yml b/.github/workflows/run-forecast-unit-tests.yml index 4a12deb51..5f2c4a3a6 100644 --- a/.github/workflows/run-forecast-unit-tests.yml +++ b/.github/workflows/run-forecast-unit-tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v4 @@ -35,6 +35,8 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} + - uses: ./.github/workflows/create-more-space + name: "Create more disk space" - uses: actions/setup-python@v5 with: diff --git a/.github/workflows/run-operators-unit-tests.yml b/.github/workflows/run-operators-unit-tests.yml index 239ee56c5..f8c34b68f 100644 --- a/.github/workflows/run-operators-unit-tests.yml +++ b/.github/workflows/run-operators-unit-tests.yml @@ -27,7 +27,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/ads/opctl/operator/lowcode/common/utils.py b/ads/opctl/operator/lowcode/common/utils.py index 15405c59f..38160063a 100644 --- a/ads/opctl/operator/lowcode/common/utils.py +++ b/ads/opctl/operator/lowcode/common/utils.py @@ -327,7 +327,7 @@ def get_frequency_of_datetime(dt_col: pd.Series, ignore_duplicates=True): str Pandas Datetime Frequency """ s = pd.Series(dt_col).drop_duplicates() if ignore_duplicates else dt_col - return pd.infer_freq(s) + return pd.infer_freq(s) or pd.infer_freq(s[-5:]) def human_time_friendly(seconds): diff --git a/ads/opctl/operator/lowcode/forecast/const.py b/ads/opctl/operator/lowcode/forecast/const.py index 077cb49ef..7e982b084 100644 --- a/ads/opctl/operator/lowcode/forecast/const.py +++ b/ads/opctl/operator/lowcode/forecast/const.py @@ -14,6 +14,7 @@ class SupportedModels(ExtendedEnum): Arima = "arima" NeuralProphet = "neuralprophet" LGBForecast = "lgbforecast" + XGBForecast = "xgbforecast" AutoMLX = "automlx" Theta = "theta" ETSForecaster = "ets" diff --git a/ads/opctl/operator/lowcode/forecast/model/base_model.py b/ads/opctl/operator/lowcode/forecast/model/base_model.py index 88d29a539..67c2947f5 100644 --- a/ads/opctl/operator/lowcode/forecast/model/base_model.py +++ b/ads/opctl/operator/lowcode/forecast/model/base_model.py @@ -611,7 +611,7 @@ def get_path(filename): results.set_global_explanations(global_expl_rounded) else: logger.warning( - f"Attempted to generate global explanations for the {self.spec.global_explanation_filename} file, but an issue occured in formatting the explanations." + f"Attempted to generate global explanations for the {self.spec.global_explanation_filename} file, but an issue occurred in formatting the explanations." ) if not self.formatted_local_explanation.empty: @@ -634,7 +634,7 @@ def get_path(filename): results.set_local_explanations(local_expl_rounded) else: logger.warning( - f"Attempted to generate local explanations for the {self.spec.local_explanation_filename} file, but an issue occured in formatting the explanations." + f"Attempted to generate local explanations for the {self.spec.local_explanation_filename} file, but an issue occurred in formatting the explanations." ) except AttributeError as e: logger.warning( diff --git a/ads/opctl/operator/lowcode/forecast/model/factory.py b/ads/opctl/operator/lowcode/forecast/model/factory.py index fc834e501..dc36476c8 100644 --- a/ads/opctl/operator/lowcode/forecast/model/factory.py +++ b/ads/opctl/operator/lowcode/forecast/model/factory.py @@ -20,9 +20,10 @@ from .autots import AutoTSOperatorModel from .base_model import ForecastOperatorBaseModel from .forecast_datasets import ForecastDatasets -from .ml_forecast import MLForecastOperatorModel +from .lgbforecast import LGBForecastOperatorModel from .neuralprophet import NeuralProphetOperatorModel from .prophet import ProphetOperatorModel +from .xgbforecast import XGBForecastOperatorModel from .theta import ThetaOperatorModel from .ets import ETSOperatorModel @@ -45,7 +46,8 @@ class ForecastOperatorModelFactory: SupportedModels.Prophet: ProphetOperatorModel, SupportedModels.Arima: ArimaOperatorModel, SupportedModels.NeuralProphet: NeuralProphetOperatorModel, - SupportedModels.LGBForecast: MLForecastOperatorModel, + SupportedModels.LGBForecast: LGBForecastOperatorModel, + SupportedModels.XGBForecast: XGBForecastOperatorModel, SupportedModels.AutoMLX: AutoMLXOperatorModel, SupportedModels.AutoTS: AutoTSOperatorModel, SupportedModels.Theta: ThetaOperatorModel, diff --git a/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py b/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py index 511f0f187..b463ebf7d 100644 --- a/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +++ b/ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py @@ -26,17 +26,19 @@ class HistoricalData(AbstractData): def __init__(self, spec, historical_data=None, subset=None): super().__init__(spec=spec, name="historical_data", data=historical_data, subset=subset) self.subset = subset - self.freq = None + self.freq = self._infer_frequency() - def _ingest_data(self, spec): + def _infer_frequency(self): try: - self.freq = get_frequency_of_datetime(self.data.index.get_level_values(0)) + return get_frequency_of_datetime(self.data.index.get_level_values(0)) except TypeError as e: logger.warning( f"Error determining frequency: {e.args}. Setting Frequency to None" ) logger.debug(f"Full traceback: {e}") - self.freq = None + return None + + def _ingest_data(self, spec): self._verify_dt_col(spec) super()._ingest_data(spec) diff --git a/ads/opctl/operator/lowcode/forecast/model/lgbforecast.py b/ads/opctl/operator/lowcode/forecast/model/lgbforecast.py new file mode 100644 index 000000000..c2688a1b2 --- /dev/null +++ b/ads/opctl/operator/lowcode/forecast/model/lgbforecast.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python + +# Copyright (c) 2024 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import logging +import traceback + +import pandas as pd + +from ads.common.decorator import runtime_dependency +from ads.opctl import logger +from .forecast_datasets import ForecastDatasets, ForecastOutput +from .ml_forecast import MLForecastBaseModel +from ..const import ForecastOutputColumns, SupportedModels +from ..operator_config import ForecastOperatorConfig + + +class LGBForecastOperatorModel(MLForecastBaseModel): + """Class representing MLForecast operator model.""" + + def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets): + super().__init__(config=config, datasets=datasets) + + def get_model_kwargs(self): + """ + Returns the model parameters. + """ + model_kwargs = self.spec.model_kwargs + + upper_quantile = round(0.5 + self.spec.confidence_interval_width / 2, 2) + lower_quantile = round(0.5 - self.spec.confidence_interval_width / 2, 2) + + model_kwargs["lower_quantile"] = lower_quantile + model_kwargs["upper_quantile"] = upper_quantile + return model_kwargs + + + def preprocess(self, df, series_id): + pass + + @runtime_dependency( + module="mlforecast", + err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'", + ) + @runtime_dependency( + module="lightgbm", + err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'", + ) + def _train_model(self, data_train, data_test, model_kwargs): + import lightgbm as lgb + from mlforecast import MLForecast + try: + + lgb_params = { + "verbosity": model_kwargs.get("verbosity", -1), + "num_leaves": model_kwargs.get("num_leaves", 512), + } + + data_freq = self.datasets.get_datetime_frequency() + + additional_data_params = self.set_model_config(data_freq, model_kwargs) + + fcst = MLForecast( + models={ + "forecast": lgb.LGBMRegressor(**lgb_params), + "upper": lgb.LGBMRegressor( + **lgb_params, + objective="quantile", + alpha=model_kwargs["upper_quantile"], + ), + "lower": lgb.LGBMRegressor( + **lgb_params, + objective="quantile", + alpha=model_kwargs["lower_quantile"], + ), + }, + freq=data_freq, + date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'], + **additional_data_params, + ) + + num_models = model_kwargs.get("recursive_models", False) + + self.model_columns = [ + ForecastOutputColumns.SERIES + ] + data_train.select_dtypes(exclude=["object"]).columns.to_list() + fcst.fit( + data_train[self.model_columns], + static_features=model_kwargs.get("static_features", []), + id_col=ForecastOutputColumns.SERIES, + time_col=self.date_col, + target_col=self.spec.target_column, + fitted=True, + max_horizon=None if num_models is False else self.spec.horizon, + ) + + self.outputs = fcst.predict( + h=self.spec.horizon, + X_df=pd.concat( + [ + data_test[self.model_columns], + fcst.get_missing_future( + h=self.spec.horizon, X_df=data_test[self.model_columns] + ), + ], + axis=0, + ignore_index=True, + ).fillna(0), + ) + self.fitted_values = fcst.forecast_fitted_values() + for s_id in self.datasets.list_series_ids(): + self.forecast_output.init_series_output( + series_id=s_id, + data_at_series=self.datasets.get_data_at_series(s_id), + ) + + self.forecast_output.populate_series_output( + series_id=s_id, + fit_val=self.fitted_values[ + self.fitted_values[ForecastOutputColumns.SERIES] == s_id + ].forecast.values, + forecast_val=self.outputs[ + self.outputs[ForecastOutputColumns.SERIES] == s_id + ].forecast.values, + upper_bound=self.outputs[ + self.outputs[ForecastOutputColumns.SERIES] == s_id + ].upper.values, + lower_bound=self.outputs[ + self.outputs[ForecastOutputColumns.SERIES] == s_id + ].lower.values, + ) + + one_step_model = fcst.models_['forecast'][0] if isinstance(fcst.models_['forecast'], list) else \ + fcst.models_['forecast'] + self.model_parameters[s_id] = { + "framework": SupportedModels.LGBForecast, + **lgb_params, + **one_step_model.get_params(), + } + + logger.debug("===========Done===========") + + except Exception as e: + self.errors_dict[self.spec.model] = { + "model_name": self.spec.model, + "error": str(e), + "error_trace": traceback.format_exc(), + } + logger.warning(f"Encountered Error: {e}. Skipping.") + logger.warning(traceback.format_exc()) + raise e + + + def _generate_report(self): + """ + Generates the report for the model + """ + import report_creator as rc + + logging.getLogger("report_creator").setLevel(logging.WARNING) + + # Section 2: LGBForecast Model Parameters + sec2_text = rc.Block( + rc.Heading("LGBForecast Model Parameters", level=2), + rc.Text("These are the parameters used for the LGBForecast model."), + ) + + k, v = next(iter(self.model_parameters.items())) + sec_2 = rc.Html( + pd.DataFrame(list(v.items())).to_html(index=False, header=False), + ) + + all_sections = [sec2_text, sec_2] + model_description = rc.Text( + "LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models" + "with the option to scale to massive amounts of data using remote clusters." + "Fastest implementations of feature engineering for time series forecasting in Python." + "Support for exogenous variables and static covariates." + ) + + return model_description, all_sections diff --git a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py index 29a832a7f..b88ecbcfb 100644 --- a/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +++ b/ads/opctl/operator/lowcode/forecast/model/ml_forecast.py @@ -2,24 +2,19 @@ # Copyright (c) 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import logging -import traceback + +from abc import ABC, abstractmethod import pandas as pd from ads.common.decorator import runtime_dependency -from ads.opctl import logger -from ads.opctl.operator.lowcode.forecast.utils import _select_plot_list - -from ..const import ForecastOutputColumns, SupportedModels -from ..operator_config import ForecastOperatorConfig from .base_model import ForecastOperatorBaseModel from .forecast_datasets import ForecastDatasets, ForecastOutput +from ..const import ForecastOutputColumns +from ..operator_config import ForecastOperatorConfig -class MLForecastOperatorModel(ForecastOperatorBaseModel): - """Class representing MLForecast operator model.""" - +class MLForecastBaseModel(ForecastOperatorBaseModel, ABC): def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets): super().__init__(config=config, datasets=datasets) self.global_explanation = {} @@ -27,208 +22,67 @@ def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets): self.formatted_global_explanation = None self.formatted_local_explanation = None self.date_col = config.spec.datetime_column.name - - def set_kwargs(self): - """ - Returns the model parameters. - """ - model_kwargs = self.spec.model_kwargs - - uppper_quantile = round(0.5 + self.spec.confidence_interval_width / 2, 2) - lower_quantile = round(0.5 - self.spec.confidence_interval_width / 2, 2) - - model_kwargs["lower_quantile"] = lower_quantile - model_kwargs["uppper_quantile"] = uppper_quantile - return model_kwargs - - - def preprocess(self, df, series_id): - pass + self.data_train = self.datasets.get_all_data_long(include_horizon=False) + self.data_test = self.datasets.get_all_data_long_forecast_horizon() @runtime_dependency( module="mlforecast", err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'", ) - @runtime_dependency( - module="lightgbm", - err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'", - ) - def _train_model(self, data_train, data_test, model_kwargs): - import lightgbm as lgb - from mlforecast import MLForecast + def set_model_config(self, freq, model_kwargs): from mlforecast.lag_transforms import ExpandingMean, RollingMean from mlforecast.target_transforms import Differences - - def set_model_config(freq): - seasonal_map = { - "H": 24, - "D": 7, - "W": 52, - "M": 12, - "Q": 4, - } - sp = seasonal_map.get(freq.upper(), 7) - series_lengths = data_train.groupby(ForecastOutputColumns.SERIES).size() - min_len = series_lengths.min() - max_allowed = min_len - sp - - default_lags = [lag for lag in [1, sp, 2 * sp] if lag <= max_allowed] - lags = model_kwargs.get("lags", default_lags) - - default_roll = 2 * sp - roll = model_kwargs.get("RollingMean", default_roll) - - default_diff = sp if sp <= max_allowed else None - diff = model_kwargs.get("Differences", default_diff) - - return { - "target_transforms": [Differences([diff])], - "lags": lags, - "lag_transforms": { - 1: [ExpandingMean()], - sp: [RollingMean(window_size=roll, min_samples=1)] - } - } - - try: - - lgb_params = { - "verbosity": model_kwargs.get("verbosity", -1), - "num_leaves": model_kwargs.get("num_leaves", 512), + seasonal_map = { + "H": 24, + "D": 7, + "W": 52, + "M": 12, + "Q": 4, + } + sp = seasonal_map.get(freq.upper(), 7) + series_lengths = self.data_train.groupby(ForecastOutputColumns.SERIES).size() + min_len = series_lengths.min() + max_allowed = min_len - sp + + default_lags = [lag for lag in [1, sp, 2 * sp] if lag <= max_allowed] + lags = model_kwargs.get("lags", default_lags) + + default_roll = 2 * sp + roll = model_kwargs.get("RollingMean", default_roll) + + default_diff = sp if sp <= max_allowed else None + diff = model_kwargs.get("Differences", default_diff) + + return { + "target_transforms": [Differences([diff])], + "lags": lags, + "lag_transforms": { + 1: [ExpandingMean()], + sp: [RollingMean(window_size=roll, min_samples=1)] } + } - data_freq = pd.infer_freq(data_train[self.date_col].drop_duplicates()) \ - or pd.infer_freq(data_train[self.date_col].drop_duplicates()[-5:]) - - additional_data_params = set_model_config(data_freq) - - fcst = MLForecast( - models={ - "forecast": lgb.LGBMRegressor(**lgb_params), - "upper": lgb.LGBMRegressor( - **lgb_params, - objective="quantile", - alpha=model_kwargs["uppper_quantile"], - ), - "lower": lgb.LGBMRegressor( - **lgb_params, - objective="quantile", - alpha=model_kwargs["lower_quantile"], - ), - }, - freq=data_freq, - date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'], - **additional_data_params, - ) - - num_models = model_kwargs.get("recursive_models", False) - - self.model_columns = [ - ForecastOutputColumns.SERIES - ] + data_train.select_dtypes(exclude=["object"]).columns.to_list() - fcst.fit( - data_train[self.model_columns], - static_features=model_kwargs.get("static_features", []), - id_col=ForecastOutputColumns.SERIES, - time_col=self.date_col, - target_col=self.spec.target_column, - fitted=True, - max_horizon=None if num_models is False else self.spec.horizon, - ) - - self.outputs = fcst.predict( - h=self.spec.horizon, - X_df=pd.concat( - [ - data_test[self.model_columns], - fcst.get_missing_future( - h=self.spec.horizon, X_df=data_test[self.model_columns] - ), - ], - axis=0, - ignore_index=True, - ).fillna(0), - ) - self.fitted_values = fcst.forecast_fitted_values() - for s_id in self.datasets.list_series_ids(): - self.forecast_output.init_series_output( - series_id=s_id, - data_at_series=self.datasets.get_data_at_series(s_id), - ) - - self.forecast_output.populate_series_output( - series_id=s_id, - fit_val=self.fitted_values[ - self.fitted_values[ForecastOutputColumns.SERIES] == s_id - ].forecast.values, - forecast_val=self.outputs[ - self.outputs[ForecastOutputColumns.SERIES] == s_id - ].forecast.values, - upper_bound=self.outputs[ - self.outputs[ForecastOutputColumns.SERIES] == s_id - ].upper.values, - lower_bound=self.outputs[ - self.outputs[ForecastOutputColumns.SERIES] == s_id - ].lower.values, - ) - - self.model_parameters[s_id] = { - "framework": SupportedModels.LGBForecast, - **lgb_params, - **fcst.models_['forecast'].get_params(), - } - - logger.debug("===========Done===========") + @abstractmethod + def _train_model(self, data_train, data_test, model_kwargs) -> pd.DataFrame: + """ + Build the model. + The method that needs to be implemented on the particular model level. + """ - except Exception as e: - self.errors_dict[self.spec.model] = { - "model_name": self.spec.model, - "error": str(e), - "error_trace": traceback.format_exc(), - } - logger.warning(f"Encountered Error: {e}. Skipping.") - logger.warning(traceback.format_exc()) - raise e + @abstractmethod + def get_model_kwargs(self) -> pd.DataFrame: + """ + Build the model. + The method that needs to be implemented on the particular model level. + """ def _build_model(self) -> pd.DataFrame: - data_train = self.datasets.get_all_data_long(include_horizon=False) - data_test = self.datasets.get_all_data_long_forecast_horizon() self.models = {} - model_kwargs = self.set_kwargs() self.forecast_output = ForecastOutput( confidence_interval_width=self.spec.confidence_interval_width, horizon=self.spec.horizon, target_column=self.original_target_column, dt_column=self.date_col, ) - self._train_model(data_train, data_test, model_kwargs) + self._train_model(self.data_train, self.data_test, self.get_model_kwargs()) return self.forecast_output.get_forecast_long() - - def _generate_report(self): - """ - Generates the report for the model - """ - import report_creator as rc - - logging.getLogger("report_creator").setLevel(logging.WARNING) - - # Section 2: LGBForecast Model Parameters - sec2_text = rc.Block( - rc.Heading("LGBForecast Model Parameters", level=2), - rc.Text("These are the parameters used for the LGBForecast model."), - ) - - k, v = next(iter(self.model_parameters.items())) - sec_2 = rc.Html( - pd.DataFrame(list(v.items())).to_html(index=False, header=False), - ) - - all_sections = [sec2_text, sec_2] - model_description = rc.Text( - "LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models" - "with the option to scale to massive amounts of data using remote clusters." - "Fastest implementations of feature engineering for time series forecasting in Python." - "Support for exogenous variables and static covariates." - ) - - return model_description, all_sections diff --git a/ads/opctl/operator/lowcode/forecast/model/xgbforecast.py b/ads/opctl/operator/lowcode/forecast/model/xgbforecast.py new file mode 100644 index 000000000..2bbbda0eb --- /dev/null +++ b/ads/opctl/operator/lowcode/forecast/model/xgbforecast.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python + +# Copyright (c) 2024 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import logging +import traceback + +import pandas as pd + +from ads.common.decorator import runtime_dependency +from ads.opctl import logger +from ..const import ForecastOutputColumns, SupportedModels +from ..operator_config import ForecastOperatorConfig +from .forecast_datasets import ForecastDatasets, ForecastOutput +from .ml_forecast import MLForecastBaseModel + + +class XGBForecastOperatorModel(MLForecastBaseModel): + """Class representing MLForecast operator model.""" + + def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets): + super().__init__(config=config, datasets=datasets) + + def get_model_kwargs(self): + """ + Returns the model parameters. + """ + return self.spec.model_kwargs + + @runtime_dependency( + module="mlforecast", + err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'", + ) + @runtime_dependency( + module="xgboost", + err_msg="xgboost is not installed, please install it with 'pip install xgboost'", + ) + def _train_model(self, data_train, data_test, model_kwargs): + try: + xgb_params = { + "verbosity": model_kwargs.get("verbosity", 0), + "num_leaves": model_kwargs.get("num_leaves", 512), + } + from xgboost import XGBRegressor + from mlforecast import MLForecast + from mlforecast.utils import PredictionIntervals + level = int(self.spec.confidence_interval_width * 100) + data_freq = self.datasets.get_datetime_frequency() + additional_data_params = self.set_model_config(data_freq, model_kwargs) + model = XGBRegressor(**xgb_params) + + prediction_intervals = PredictionIntervals( + n_windows=5, + method="conformal_distribution", + ) + + fcst = MLForecast( + models={"xgb": model}, + freq=data_freq, + date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'], + **additional_data_params, + ) + + num_models = model_kwargs.get("recursive_models", False) + + model_columns = [ + ForecastOutputColumns.SERIES + ] + data_train.select_dtypes(exclude=["object"]).columns.to_list() + + fcst.fit( + data_train[model_columns], + prediction_intervals=prediction_intervals, + id_col=ForecastOutputColumns.SERIES, + time_col=self.date_col, + target_col=self.spec.target_column, + fitted=True, + static_features=model_kwargs.get("static_features", []), + max_horizon=None if num_models is False else self.spec.horizon, + ) + + forecast = fcst.predict( + h=self.spec.horizon, + X_df=pd.concat( + [ + data_test[model_columns], + fcst.get_missing_future( + h=self.spec.horizon, X_df=data_test[model_columns] + ), + ], + axis=0, + ignore_index=True, + ).fillna(0), + level=[level] + ) + forcast_col = "xgb" + lower_ci_col = f"{forcast_col}-lo-{level}" + upper_ci_col = f"{forcast_col}-hi-{level}" + self.fitted_values = fcst.forecast_fitted_values() + for s_id in self.datasets.list_series_ids(): + self.forecast_output.init_series_output( + series_id=s_id, + data_at_series=self.datasets.get_data_at_series(s_id), + ) + + self.forecast_output.populate_series_output( + series_id=s_id, + fit_val=self.fitted_values[ + self.fitted_values[ForecastOutputColumns.SERIES] == s_id + ][forcast_col].values, + forecast_val=forecast[ + forecast[ForecastOutputColumns.SERIES] == s_id + ][forcast_col].values, + upper_bound=forecast[ + forecast[ForecastOutputColumns.SERIES] == s_id + ][upper_ci_col].values, + lower_bound=forecast[ + forecast[ForecastOutputColumns.SERIES] == s_id + ][lower_ci_col].values, + ) + + self.model_parameters[s_id] = { + "framework": SupportedModels.XGBForecast, + **xgb_params, + **fcst.models['xgb'].get_params(), + } + + logger.debug("===========Done===========") + + except Exception as e: + self.errors_dict[self.spec.model] = { + "model_name": self.spec.model, + "error": str(e), + "error_trace": traceback.format_exc(), + } + logger.warning(f"Encountered Error: {e}. Skipping.") + logger.warning(traceback.format_exc()) + raise e + + def _generate_report(self): + """ + Generates the report for the model + """ + import report_creator as rc + + logging.getLogger("report_creator").setLevel(logging.WARNING) + + # Section 2: xgbForecast Model Parameters + sec2_text = rc.Block( + rc.Heading("XGBForecast Model Parameters", level=2), + rc.Text("These are the parameters used for the XGBForecast model."), + ) + + k, v = next(iter(self.model_parameters.items())) + sec_2 = rc.Html( + pd.DataFrame(list(v.items())).to_html(index=False, header=False), + ) + + all_sections = [sec2_text, sec_2] + model_description = rc.Text( + """ + XGBForecast performs time series forecasting using XGBoost’s XGBRegressor, + It provides fast, optimized implementations of feature engineering for time series forecasting + and supports exogenous variables and static covariates. + """ + ) + + return model_description, all_sections diff --git a/ads/opctl/operator/lowcode/forecast/schema.yaml b/ads/opctl/operator/lowcode/forecast/schema.yaml index 7e90caca7..d90a0aa83 100644 --- a/ads/opctl/operator/lowcode/forecast/schema.yaml +++ b/ads/opctl/operator/lowcode/forecast/schema.yaml @@ -456,6 +456,7 @@ spec: - arima - neuralprophet - lgbforecast + - xgbforecast - automlx - autots - auto-select diff --git a/docs/source/user_guide/operators/forecast_operator/yaml_schema.rst b/docs/source/user_guide/operators/forecast_operator/yaml_schema.rst index 49fbaacae..da24eac67 100644 --- a/docs/source/user_guide/operators/forecast_operator/yaml_schema.rst +++ b/docs/source/user_guide/operators/forecast_operator/yaml_schema.rst @@ -137,7 +137,7 @@ Below is an example of a ``forecast.yaml`` file with every parameter specified: - string - No - prophet - - Model to use. Options: prophet, arima, neuralprophet, theta, ets, automlx, autots, auto-select. + - Model to use. Options: prophet, arima, neuralprophet, theta, ets, lgbforecast, xgbforecast, automlx, autots, auto-select. * - model_kwargs - dict diff --git a/pyproject.toml b/pyproject.toml index bf8e3e674..8f4804d36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -184,6 +184,7 @@ forecast = [ "pmdarima", "prophet==1.1.7", "cmdstanpy==1.2.5", + "xgboost", "shap", "sktime", "statsmodels", diff --git a/tests/operators/forecast/test_datasets.py b/tests/operators/forecast/test_datasets.py index 59d93d89d..a1994574e 100644 --- a/tests/operators/forecast/test_datasets.py +++ b/tests/operators/forecast/test_datasets.py @@ -33,6 +33,7 @@ "neuralprophet", "autots", "lgbforecast", + "xgbforecast", "theta", "ets", "auto-select", @@ -170,8 +171,8 @@ def test_load_datasets(model, data_details): yaml_i["spec"]["model_kwargs"] = {"time_budget": 2} if model == "auto-select": yaml_i["spec"]["model_kwargs"] = { - "model_list": ["prophet", "arima"] - } # 'lgbforecast' + "model_list": ["prophet", "xgbforecast", "ets"] + } if dataset_name == f"{DATASET_PREFIX}dataset4.csv": pytest.skip("Skipping dataset4 with auto-select") # todo:// ODSC-58584 @@ -180,6 +181,7 @@ def test_load_datasets(model, data_details): if yaml_i["spec"]["generate_explanations"] and model not in [ "automlx", "lgbforecast", + "xgbforecast", "auto-select", ]: verify_explanations( diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py index 268057ae7..dad179009 100644 --- a/tests/operators/forecast/test_errors.py +++ b/tests/operators/forecast/test_errors.py @@ -144,6 +144,7 @@ "neuralprophet", "autots", "lgbforecast", + "xgbforecast", "theta", "ets", ] @@ -417,7 +418,7 @@ def test_0_series(operator_setup, model): "local_explanation.csv", "global_explanation.csv", ] - if model in ["autots", "lgbforecast"]: + if model in ["autots", "lgbforecast", "xgbforecast"]: # explanations are not supported for autots or lgbforecast output_files.remove("local_explanation.csv") output_files.remove("global_explanation.csv") @@ -711,7 +712,7 @@ def test_arima_automlx_errors(operator_setup, model): in error_content["13"]["model_fitting"]["error"] ), f"Error message mismatch: {error_content}" - if model not in ["autots", "automlx", "lgbforecast"]: + if model not in ["autots", "automlx", "lgbforecast", "xgbforecast"]: if yaml_i["spec"].get("explanations_accuracy_mode") != "AUTOMLX": global_fn = f"{tmpdirname}/results/global_explanation.csv" assert os.path.exists( @@ -818,7 +819,7 @@ def test_date_format(operator_setup, model): @pytest.mark.parametrize("model", MODELS) def test_what_if_analysis(operator_setup, model): os.environ["TEST_MODE"] = "True" - if model in ["auto-select", "lgbforecast", "theta", "ets"]: + if model in ["auto-select", "lgbforecast", "xgbforecast", "theta", "ets"]: pytest.skip("Skipping what-if scenario for auto-select") tmpdirname = operator_setup historical_data_path, additional_data_path = setup_small_rossman()