Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/run-forecast-unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11"]

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}

- uses: ./.github/workflows/create-more-space
name: "Create more disk space"

- uses: actions/setup-python@v5
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-operators-unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.10", "3.11"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion ads/opctl/operator/lowcode/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def get_frequency_of_datetime(dt_col: pd.Series, ignore_duplicates=True):
str Pandas Datetime Frequency
"""
s = pd.Series(dt_col).drop_duplicates() if ignore_duplicates else dt_col
return pd.infer_freq(s)
return pd.infer_freq(s) or pd.infer_freq(s[-5:])


def human_time_friendly(seconds):
Expand Down
1 change: 1 addition & 0 deletions ads/opctl/operator/lowcode/forecast/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class SupportedModels(ExtendedEnum):
Arima = "arima"
NeuralProphet = "neuralprophet"
LGBForecast = "lgbforecast"
XGBForecast = "xgbforecast"
AutoMLX = "automlx"
Theta = "theta"
ETSForecaster = "ets"
Expand Down
4 changes: 2 additions & 2 deletions ads/opctl/operator/lowcode/forecast/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def get_path(filename):
results.set_global_explanations(global_expl_rounded)
else:
logger.warning(
f"Attempted to generate global explanations for the {self.spec.global_explanation_filename} file, but an issue occured in formatting the explanations."
f"Attempted to generate global explanations for the {self.spec.global_explanation_filename} file, but an issue occurred in formatting the explanations."
)

if not self.formatted_local_explanation.empty:
Expand All @@ -634,7 +634,7 @@ def get_path(filename):
results.set_local_explanations(local_expl_rounded)
else:
logger.warning(
f"Attempted to generate local explanations for the {self.spec.local_explanation_filename} file, but an issue occured in formatting the explanations."
f"Attempted to generate local explanations for the {self.spec.local_explanation_filename} file, but an issue occurred in formatting the explanations."
)
except AttributeError as e:
logger.warning(
Expand Down
6 changes: 4 additions & 2 deletions ads/opctl/operator/lowcode/forecast/model/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
from .autots import AutoTSOperatorModel
from .base_model import ForecastOperatorBaseModel
from .forecast_datasets import ForecastDatasets
from .ml_forecast import MLForecastOperatorModel
from .lgbforecast import LGBForecastOperatorModel
from .neuralprophet import NeuralProphetOperatorModel
from .prophet import ProphetOperatorModel
from .xgbforecast import XGBForecastOperatorModel
from .theta import ThetaOperatorModel
from .ets import ETSOperatorModel

Expand All @@ -45,7 +46,8 @@ class ForecastOperatorModelFactory:
SupportedModels.Prophet: ProphetOperatorModel,
SupportedModels.Arima: ArimaOperatorModel,
SupportedModels.NeuralProphet: NeuralProphetOperatorModel,
SupportedModels.LGBForecast: MLForecastOperatorModel,
SupportedModels.LGBForecast: LGBForecastOperatorModel,
SupportedModels.XGBForecast: XGBForecastOperatorModel,
SupportedModels.AutoMLX: AutoMLXOperatorModel,
SupportedModels.AutoTS: AutoTSOperatorModel,
SupportedModels.Theta: ThetaOperatorModel,
Expand Down
10 changes: 6 additions & 4 deletions ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,19 @@ class HistoricalData(AbstractData):
def __init__(self, spec, historical_data=None, subset=None):
super().__init__(spec=spec, name="historical_data", data=historical_data, subset=subset)
self.subset = subset
self.freq = None
self.freq = self._infer_frequency()

def _ingest_data(self, spec):
def _infer_frequency(self):
try:
self.freq = get_frequency_of_datetime(self.data.index.get_level_values(0))
return get_frequency_of_datetime(self.data.index.get_level_values(0))
except TypeError as e:
logger.warning(
f"Error determining frequency: {e.args}. Setting Frequency to None"
)
logger.debug(f"Full traceback: {e}")
self.freq = None
return None

def _ingest_data(self, spec):
self._verify_dt_col(spec)
super()._ingest_data(spec)

Expand Down
181 changes: 181 additions & 0 deletions ads/opctl/operator/lowcode/forecast/model/lgbforecast.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
#!/usr/bin/env python

# Copyright (c) 2024 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import logging
import traceback

import pandas as pd

from ads.common.decorator import runtime_dependency
from ads.opctl import logger
from .forecast_datasets import ForecastDatasets, ForecastOutput
from .ml_forecast import MLForecastBaseModel
from ..const import ForecastOutputColumns, SupportedModels
from ..operator_config import ForecastOperatorConfig


class LGBForecastOperatorModel(MLForecastBaseModel):
"""Class representing MLForecast operator model."""

def __init__(self, config: ForecastOperatorConfig, datasets: ForecastDatasets):
super().__init__(config=config, datasets=datasets)

def get_model_kwargs(self):
"""
Returns the model parameters.
"""
model_kwargs = self.spec.model_kwargs

upper_quantile = round(0.5 + self.spec.confidence_interval_width / 2, 2)
lower_quantile = round(0.5 - self.spec.confidence_interval_width / 2, 2)

model_kwargs["lower_quantile"] = lower_quantile
model_kwargs["upper_quantile"] = upper_quantile
return model_kwargs


def preprocess(self, df, series_id):
pass

@runtime_dependency(
module="mlforecast",
err_msg="MLForecast is not installed, please install it with 'pip install mlforecast'",
)
@runtime_dependency(
module="lightgbm",
err_msg="lightgbm is not installed, please install it with 'pip install lightgbm'",
)
def _train_model(self, data_train, data_test, model_kwargs):
import lightgbm as lgb
from mlforecast import MLForecast
try:

lgb_params = {
"verbosity": model_kwargs.get("verbosity", -1),
"num_leaves": model_kwargs.get("num_leaves", 512),
}

data_freq = self.datasets.get_datetime_frequency()

additional_data_params = self.set_model_config(data_freq, model_kwargs)

fcst = MLForecast(
models={
"forecast": lgb.LGBMRegressor(**lgb_params),
"upper": lgb.LGBMRegressor(
**lgb_params,
objective="quantile",
alpha=model_kwargs["upper_quantile"],
),
"lower": lgb.LGBMRegressor(
**lgb_params,
objective="quantile",
alpha=model_kwargs["lower_quantile"],
),
},
freq=data_freq,
date_features=['year', 'month', 'day', 'dayofweek', 'dayofyear'],
**additional_data_params,
)

num_models = model_kwargs.get("recursive_models", False)

self.model_columns = [
ForecastOutputColumns.SERIES
] + data_train.select_dtypes(exclude=["object"]).columns.to_list()
fcst.fit(
data_train[self.model_columns],
static_features=model_kwargs.get("static_features", []),
id_col=ForecastOutputColumns.SERIES,
time_col=self.date_col,
target_col=self.spec.target_column,
fitted=True,
max_horizon=None if num_models is False else self.spec.horizon,
)

self.outputs = fcst.predict(
h=self.spec.horizon,
X_df=pd.concat(
[
data_test[self.model_columns],
fcst.get_missing_future(
h=self.spec.horizon, X_df=data_test[self.model_columns]
),
],
axis=0,
ignore_index=True,
).fillna(0),
)
self.fitted_values = fcst.forecast_fitted_values()
for s_id in self.datasets.list_series_ids():
self.forecast_output.init_series_output(
series_id=s_id,
data_at_series=self.datasets.get_data_at_series(s_id),
)

self.forecast_output.populate_series_output(
series_id=s_id,
fit_val=self.fitted_values[
self.fitted_values[ForecastOutputColumns.SERIES] == s_id
].forecast.values,
forecast_val=self.outputs[
self.outputs[ForecastOutputColumns.SERIES] == s_id
].forecast.values,
upper_bound=self.outputs[
self.outputs[ForecastOutputColumns.SERIES] == s_id
].upper.values,
lower_bound=self.outputs[
self.outputs[ForecastOutputColumns.SERIES] == s_id
].lower.values,
)

one_step_model = fcst.models_['forecast'][0] if isinstance(fcst.models_['forecast'], list) else \
fcst.models_['forecast']
self.model_parameters[s_id] = {
"framework": SupportedModels.LGBForecast,
**lgb_params,
**one_step_model.get_params(),
}

logger.debug("===========Done===========")

except Exception as e:
self.errors_dict[self.spec.model] = {
"model_name": self.spec.model,
"error": str(e),
"error_trace": traceback.format_exc(),
}
logger.warning(f"Encountered Error: {e}. Skipping.")
logger.warning(traceback.format_exc())
raise e


def _generate_report(self):
"""
Generates the report for the model
"""
import report_creator as rc

logging.getLogger("report_creator").setLevel(logging.WARNING)

# Section 2: LGBForecast Model Parameters
sec2_text = rc.Block(
rc.Heading("LGBForecast Model Parameters", level=2),
rc.Text("These are the parameters used for the LGBForecast model."),
)

k, v = next(iter(self.model_parameters.items()))
sec_2 = rc.Html(
pd.DataFrame(list(v.items())).to_html(index=False, header=False),
)

all_sections = [sec2_text, sec_2]
model_description = rc.Text(
"LGBForecast uses mlforecast framework to perform time series forecasting using machine learning models"
"with the option to scale to massive amounts of data using remote clusters."
"Fastest implementations of feature engineering for time series forecasting in Python."
"Support for exogenous variables and static covariates."
)

return model_description, all_sections
Loading
Loading