diff --git a/.github/workflows/build-docs.yaml b/.github/workflows/build-docs.yaml
index db72fd8e4..21e14e57b 100644
--- a/.github/workflows/build-docs.yaml
+++ b/.github/workflows/build-docs.yaml
@@ -30,7 +30,7 @@ jobs:
           python-version: "3.10"
 
       - name: Install dependencies
-        run: pip install -e '.[all]' lazydocs pyyaml
+        run: pip install -e '.[all, docs]'
 
       # setup quarto for rendering example/tutorial nbs
       - uses: quarto-dev/quarto-actions/setup@v2
@@ -41,7 +41,7 @@ jobs:
         run: make all_docs
 
       - name: Deploy (Push to main or Pull Request)
-        if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || github.event_name == 'pull_request' || (github.event_name == 'workflow_dispatch' && github.event.inputs.environment == 'staging')
+        if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || (github.event_name == 'workflow_dispatch' && github.event.inputs.environment == 'staging')
         uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
@@ -61,7 +61,7 @@ jobs:
           user_email: 41898282+github-actions[bot]@users.noreply.github.com
 
       - name: Trigger mintlify workflow (Push to main or Pull Request)
-        if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || github.event_name == 'pull_request' || (github.event_name == 'workflow_dispatch' && github.event.inputs.environment == 'staging')
+        if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository) || (github.event_name == 'workflow_dispatch' && github.event.inputs.environment == 'staging')
         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
         with:
           github-token: ${{ secrets.DOCS_WORKFLOW_TOKEN }}
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 158f73ece..06b348a57 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -30,8 +30,10 @@ jobs:
             [ubuntu-latest, manylinux_x86_64],
             [ubuntu-latest, manylinux_aarch64],
             [windows-latest, win_amd64],
-            [macos-13, macosx_x86_64],
+            [macos-15, macosx_x86_64],
             [macos-14, macosx_arm64],
+            [macos-15, macosx_arm64],
+            [macos-latest, macosx_arm64],
           ]
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
diff --git a/Makefile b/Makefile
index 688b2367c..4cb39fca1 100644
--- a/Makefile
+++ b/Makefile
@@ -31,8 +31,7 @@ load_docs_scripts:
 	fi
 
 api_docs:
-	cd python && lazydocs .statsforecast --no-watermark  --output-path ../docs
-	python docs/to_mdx.py
+	python docs/to_mdx.py docs
 
 examples_docs:
 	mkdir -p nbs/_extensions
@@ -40,6 +39,7 @@ examples_docs:
 	quarto render nbs/docs --output-dir ../docs/mintlify/
 	quarto render nbs/src --output-dir ../docs/mintlify/
 	quarto render nbs/blog --output-dir ../docs/mintlify/
+	find docs/mintlify -name "*.mdx" ! -name "*.html.mdx" -exec sh -c 'dir=$$(dirname "$$1"); base=$$(basename "$$1" .mdx | tr "[:upper:]" "[:lower:]"); mv "$$1" "$$dir/$$base.html.mdx"' _ {} \;
 
 format_docs:
 	# replace _docs with docs
@@ -47,8 +47,6 @@ format_docs:
 	bash ./docs-scripts/docs-final-formatting.bash
 	find docs/mintlify -name "*.mdx" -exec sed -i.bak '/^:::/d' {} + && find docs/mintlify -name "*.bak" -delete
 
-# replace <= with \<=
-	find docs/mintlify -name "*.mdx" -exec sed -i.bak 's/<=/\\<=/g' {} + && find docs/mintlify -name "*.bak" -delete
 
 preview_docs:
 	cd docs/mintlify && mintlify dev
diff --git a/README.md b/README.md
index cf1b88835..ce8dc499a 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ or
 conda install -c conda-forge statsforecast
 ```
 
-Vist our [Installation Guide](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/0_Installation) for further instructions.
+Vist our [Installation Guide](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/installation.html) for further instructions.
 
 ## Quick Start
 
@@ -55,9 +55,9 @@ sf.fit(df)
 sf.predict(h=12, level=[95])
 ```
 
-**Get Started [quick guide](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/1_Getting_Started_short)**
+**Get Started [quick guide](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/getting_started_short.html)**
 
-**Follow this [end-to-end walkthrough](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/2_Getting_Started_complete) for best practices.**
+**Follow this [end-to-end walkthrough](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/getting_started_complete.html) for best practices.**
 
 ## Why?
 
@@ -88,19 +88,19 @@ Missing something? Please open an issue or write us in [![Slack](https://img.shi
 
 ## Examples and Guides
 
-📚 [End to End Walkthrough](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/2_Getting_Started_complete): Model training, evaluation and selection for multiple time series
+📚 [End to End Walkthrough](https://nixtlaverse.nixtla.io/statsforecast/docs/getting-started/getting_started_complete.html): Model training, evaluation and selection for multiple time series
 
-🔎 [Anomaly Detection](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/AnomalyDetection): detect anomalies for time series using in-sample prediction intervals.
+🔎 [Anomaly Detection](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/anomalydetection.html): detect anomalies for time series using in-sample prediction intervals.
 
-👩‍🔬 [Cross Validation](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/CrossValidation): robust model’s performance evaluation.
+👩‍🔬 [Cross Validation](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/crossvalidation.html): robust model’s performance evaluation.
 
-❄️ [Multiple Seasonalities](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/MultipleSeasonalities): how to forecast data with multiple seasonalities using an MSTL.
+❄️ [Multiple Seasonalities](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/multipleseasonalities.html): how to forecast data with multiple seasonalities using an MSTL.
 
-🔌 [Predict Demand Peaks](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/ElectricityPeakForecasting): electricity load forecasting for detecting daily peaks and reducing electric bills.
+🔌 [Predict Demand Peaks](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/electricitypeakforecasting.html): electricity load forecasting for detecting daily peaks and reducing electric bills.
 
-📈 [Intermittent Demand](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/IntermittentData): forecast series with very few non-zero observations.
+📈 [Intermittent Demand](https://nixtlaverse.nixtla.io/statsforecast/docs/tutorials/intermittentdata.html): forecast series with very few non-zero observations.
 
-🌡️ [Exogenous Regressors](https://nixtlaverse.nixtla.io/statsforecast/docs/how-to-guides/Exogenous): like weather or prices
+🌡️ [Exogenous Regressors](https://nixtlaverse.nixtla.io/statsforecast/docs/how-to-guides/exogenous.html): like weather or prices
 
 ## Models
 
@@ -110,12 +110,12 @@ Automatic forecasting tools search for the best parameters and select the best p
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[AutoARIMA](https://nixtlaverse.nixtla.io/statsforecast/models#class-autoarima)|✅|✅|✅|✅|✅|
-|[AutoETS](https://nixtlaverse.nixtla.io/statsforecast/models#class-autoets)|✅|✅|✅|✅||
-|[AutoCES](https://nixtlaverse.nixtla.io/statsforecast/models#class-autoces)|✅|✅|✅|✅||
-|[AutoTheta](https://nixtlaverse.nixtla.io/statsforecast/models#class-autotheta)|✅|✅|✅|✅||
-|[AutoMFLES](https://nixtlaverse.nixtla.io/statsforecast/models#class-automfles)|✅|✅|✅|✅|✅|
-|[AutoTBATS](https://nixtlaverse.nixtla.io/statsforecast/models#class-autotbats)|✅|✅|✅|✅||
+|[AutoARIMA](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoarima)|✅|✅|✅|✅|✅|
+|[AutoETS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoets)|✅|✅|✅|✅||
+|[AutoCES](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoces)|✅|✅|✅|✅||
+|[AutoTheta](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotheta)|✅|✅|✅|✅||
+|[AutoMFLES](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#automfles)|✅|✅|✅|✅|✅|
+|[AutoTBATS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotbats)|✅|✅|✅|✅||
 
 ### ARIMA Family
 
@@ -123,8 +123,8 @@ These models exploit the existing autocorrelations in the time series.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[ARIMA](https://nixtlaverse.nixtla.io/statsforecast/models#class-arima)|✅|✅|✅|✅|✅|
-|[AutoRegressive](https://nixtlaverse.nixtla.io/statsforecast/models#class-autoregressive)|✅|✅|✅|✅|✅|
+|[ARIMA](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#arima)|✅|✅|✅|✅|✅|
+|[AutoRegressive](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autoregressive)|✅|✅|✅|✅|✅|
 
 ### Theta Family
 
@@ -132,10 +132,10 @@ Fit two theta lines to a deseasonalized time series, using different techniques
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[Theta](https://nixtlaverse.nixtla.io/statsforecast/models#class-theta)|✅|✅|✅|✅|✅|
-|[OptimizedTheta](https://nixtlaverse.nixtla.io/statsforecast/models#class-optimizedtheta)|✅|✅|✅|✅||
-|[DynamicTheta](https://nixtlaverse.nixtla.io/statsforecast/models#class-dynamictheta)|✅|✅|✅|✅||
-|[DynamicOptimizedTheta](https://nixtlaverse.nixtla.io/statsforecast/models#class-dynamicoptimizedtheta)|✅|✅|✅|✅||
+|[Theta](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#theta)|✅|✅|✅|✅|✅|
+|[OptimizedTheta](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#optimizedtheta)|✅|✅|✅|✅||
+|[DynamicTheta](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#dynamictheta)|✅|✅|✅|✅||
+|[DynamicOptimizedTheta](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#dynamicoptimizedtheta)|✅|✅|✅|✅||
 
 ### Multiple Seasonalities
 
@@ -143,9 +143,9 @@ Suited for signals with more than one clear seasonality. Useful for low-frequenc
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[MSTL](https://nixtlaverse.nixtla.io/statsforecast/models#class-mstl)|✅|✅|✅|✅|If trend forecaster supports|
-|[MFLES](https://nixtlaverse.nixtla.io/statsforecast/models#class-mfles)|✅|✅|✅|✅|✅|
-|[TBATS](https://nixtlaverse.nixtla.io/statsforecast/models#class-tbats)|✅|✅|✅|✅||
+|[MSTL](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#mstl)|✅|✅|✅|✅|If trend forecaster supports|
+|[MFLES](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#mfles)|✅|✅|✅|✅|✅|
+|[TBATS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#tbats)|✅|✅|✅|✅||
 
 ### GARCH and ARCH Models
 
@@ -153,8 +153,8 @@ Suited for modeling time series that exhibit non-constant volatility over time.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[GARCH](https://nixtlaverse.nixtla.io/statsforecast/models#class-garch)|✅|✅|✅|✅||
-|[ARCH](https://nixtlaverse.nixtla.io/statsforecast/models#class-arch)|✅|✅|✅|✅||
+|[GARCH](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#garch)|✅|✅|✅|✅||
+|[ARCH](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#arch)|✅|✅|✅|✅||
 
 ### Baseline Models
 
@@ -162,12 +162,12 @@ Classical models for establishing baseline.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[HistoricAverage](https://nixtlaverse.nixtla.io/statsforecast/models#class-historicaverage)|✅|✅|✅|✅||
-|[Naive](https://nixtlaverse.nixtla.io/statsforecast/models#class-naive)|✅|✅|✅|✅||
-|[RandomWalkWithDrift](https://nixtlaverse.nixtla.io/statsforecast/models#class-randomwalkwithdrift)|✅|✅|✅|✅||
-|[SeasonalNaive](https://nixtlaverse.nixtla.io/statsforecast/models#class-seasonalnaive)|✅|✅|✅|✅||
-|[WindowAverage](https://nixtlaverse.nixtla.io/statsforecast/models#class-windowaverage)|✅|||||
-|[SeasonalWindowAverage](https://nixtlaverse.nixtla.io/statsforecast/models#class-seasonalwindowaverage)|✅|||||
+|[HistoricAverage](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#historicaverage)|✅|✅|✅|✅||
+|[Naive](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#naive)|✅|✅|✅|✅||
+|[RandomWalkWithDrift](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#randomwalkwithdrift)|✅|✅|✅|✅||
+|[SeasonalNaive](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#seasonalnaive)|✅|✅|✅|✅||
+|[WindowAverage](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#windowaverage)|✅|||||
+|[SeasonalWindowAverage](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#seasonalwindowaverage)|✅|||||
 
 ### Exponential Smoothing
 
@@ -175,12 +175,12 @@ Uses a weighted average of all past observations where the weights decrease expo
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[SimpleExponentialSmoothing](https://nixtlaverse.nixtla.io/statsforecast/models#class-simpleexponentialsmoothing)|✅||✅|||
-|[SimpleExponentialSmoothingOptimized](https://nixtlaverse.nixtla.io/statsforecast/models#class-simpleexponentialsmoothingoptimized)|✅||✅|||
-|[SeasonalExponentialSmoothing](https://nixtlaverse.nixtla.io/statsforecast/models#class-seasonalexponentialsmoothing)|✅||✅|||
-|[SeasonalExponentialSmoothingOptimized](https://nixtlaverse.nixtla.io/statsforecast/models#class-seasonalexponentialsmoothingoptimized)|✅||✅|||
-|[Holt](https://nixtlaverse.nixtla.io/statsforecast/models#class-holt)|✅|✅|✅|✅||
-|[HoltWinters](https://nixtlaverse.nixtla.io/statsforecast/models#class-holtwinters)|✅|✅|✅|✅||
+|[SimpleExponentialSmoothing](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#simpleexponentialsmoothing)|✅||✅|||
+|[SimpleExponentialSmoothingOptimized](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#simpleexponentialsmoothingoptimized)|✅||✅|||
+|[SeasonalExponentialSmoothing](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#seasonalexponentialsmoothing)|✅||✅|||
+|[SeasonalExponentialSmoothingOptimized](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#seasonalexponentialsmoothingoptimized)|✅||✅|||
+|[Holt](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#holt)|✅|✅|✅|✅||
+|[HoltWinters](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#holtwinters)|✅|✅|✅|✅||
 
 ### Sparse or Inttermitent
 
@@ -188,12 +188,12 @@ Suited for series with very few non-zero observations
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |Exogenous features|
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|:----------------:|
-|[ADIDA](https://nixtlaverse.nixtla.io/statsforecast/models#class-adida)|✅||✅|✅||
-|[CrostonClassic](https://nixtlaverse.nixtla.io/statsforecast/models#class-crostonclassic)|✅||✅|✅||
-|[CrostonOptimized](https://nixtlaverse.nixtla.io/statsforecast/models#class-crostonoptimized)|✅||✅|✅||
-|[CrostonSBA](https://nixtlaverse.nixtla.io/statsforecast/models#class-crostonsba)|✅||✅|✅||
-|[IMAPA](https://nixtlaverse.nixtla.io/statsforecast/models#class-imapa)|✅||✅|✅||
-|[TSB](https://nixtlaverse.nixtla.io/statsforecast/models#class-tsb)|✅||✅|✅||
+|[ADIDA](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#adida)|✅||✅|✅||
+|[CrostonClassic](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#crostonclassic)|✅||✅|✅||
+|[CrostonOptimized](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#crostonoptimized)|✅||✅|✅||
+|[CrostonSBA](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#crostonsba)|✅||✅|✅||
+|[IMAPA](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#imapa)|✅||✅|✅||
+|[TSB](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#tsb)|✅||✅|✅||
 
 ## 🔨 How to contribute
 
diff --git a/docs/mintlify/mint.json b/docs/mintlify/mint.json
index 112fb9110..c07a63547 100644
--- a/docs/mintlify/mint.json
+++ b/docs/mintlify/mint.json
@@ -27,112 +27,111 @@
     {
       "group": "",
       "pages": [
-        "index"
+        "index.html"
       ]
     },
     {
       "group": "Getting Started",
       "pages": [
-        "docs/getting-started/0_Installation",
-        "docs/getting-started/1_Getting_Started_short",
-        "docs/getting-started/2_Getting_Started_complete",
-        "docs/getting-started/3_Getting_Started_complete_polars"
+        "docs/getting-started/installation.html",
+        "docs/getting-started/getting_started_short.html",
+        "docs/getting-started/getting_started_complete.html",
+        "docs/getting-started/getting_started_complete_polars.html"
       ]
     },
     {
       "group": "Tutorials",
       "pages": [
-        "docs/tutorials/AnomalyDetection",
-        "docs/tutorials/ConformalPrediction",
-        "docs/tutorials/CrossValidation",
-        "docs/tutorials/ElectricityLoadForecasting",
-        "docs/tutorials/ElectricityPeakForecasting",
-        "docs/tutorials/GARCH_tutorial",
-        "docs/tutorials/IntermittentData",
-        "docs/tutorials/MLFlow",
-        "docs/tutorials/MultipleSeasonalities",
-        "docs/tutorials/StatisticalNeuralMethods",
-        "docs/tutorials/UncertaintyIntervals"
+        "docs/tutorials/anomalydetection.html",
+        "docs/tutorials/conformalprediction.html",
+        "docs/tutorials/crossvalidation.html",
+        "docs/tutorials/electricityloadforecasting.html",
+        "docs/tutorials/electricitypeakforecasting.html",
+        "docs/tutorials/garch_tutorial.html",
+        "docs/tutorials/intermittentdata.html",
+        "docs/tutorials/mlflow.html",
+        "docs/tutorials/multipleseasonalities.html",
+        "docs/tutorials/statisticalneuralmethods.html",
+        "docs/tutorials/uncertaintyintervals.html"
       ]
     },
     {
       "group": "How to Guides",
       "pages": [
-        "docs/how-to-guides/00_Automatic_Forecasting",
-        "docs/how-to-guides/Exogenous",
-        "docs/how-to-guides/generating_features",
-        "docs/how-to-guides/sklearn_models",
-        "docs/how-to-guides/migrating_R",
-        "docs/how-to-guides/numba_cache"
+        "docs/how-to-guides/automatic_forecasting.html",
+        "docs/how-to-guides/exogenous.html",
+        "docs/how-to-guides/generating_features.html",
+        "docs/how-to-guides/sklearn_models.html",
+        "docs/how-to-guides/numba_cache.html"
       ]
     },
     {
       "group": "Distributed",
       "pages": [
-        "docs/distributed/dask",
-        "docs/distributed/ray",
-        "docs/distributed/spark"
+        "docs/distributed/dask.html",
+        "docs/distributed/ray.html",
+        "docs/distributed/spark.html"
       ]
     },
     {
       "group": "Experiments",
       "pages": [
-        "docs/experiments/AmazonStatsForecast",
-        "docs/experiments/AutoArima_vs_Prophet",
-        "docs/experiments/ETS_ray_m5",
-        "docs/experiments/Prophet_spark_m5"
+        "docs/experiments/amazonstatsforecast.html",
+        "docs/experiments/autoarima_vs_prophet.html",
+        "docs/experiments/ets_ray_m5.html",
+        "docs/experiments/prophet_spark_m5.html"
       ]
     },
     {
       "group": "Model References",
       "pages": [
-        "docs/models/ADIDA",
-        "docs/models/ARCH",
-        "docs/models/ARIMA",
-        "docs/models/AutoARIMA",
-        "docs/models/AutoCES",
-        "docs/models/AutoETS",
-        "docs/models/AutoRegressive",
-        "docs/models/AutoTheta",
-        "docs/models/CrostonClassic",
-        "docs/models/CrostonOptimized",
-        "docs/models/CrostonSBA",
-        "docs/models/DynamicOptimizedTheta",
-        "docs/models/DynamicStandardTheta",
-        "docs/models/GARCH",
-        "docs/models/Holt",
-        "docs/models/HoltWinters",
-        "docs/models/IMAPA",
-        "docs/models/MFLES",
-        "docs/models/MultipleSeasonalTrend",
-        "docs/models/OptimizedTheta",
-        "docs/models/SeasonalExponentialSmoothing",
-        "docs/models/SeasonalExponentialSmoothingOptimized",
-        "docs/models/SimpleExponentialOptimized",
-        "docs/models/SimpleExponentialSmoothing",
-        "docs/models/StandardTheta",
-        "docs/models/TSB"
+        "docs/models/adida.html",
+        "docs/models/arch.html",
+        "docs/models/arima.html",
+        "docs/models/autoarima.html",
+        "docs/models/autoces.html",
+        "docs/models/autoets.html",
+        "docs/models/autoregressive.html",
+        "docs/models/autotheta.html",
+        "docs/models/crostonclassic.html",
+        "docs/models/crostonoptimized.html",
+        "docs/models/crostonsba.html",
+        "docs/models/dynamicoptimizedtheta.html",
+        "docs/models/dynamicstandardtheta.html",
+        "docs/models/garch.html",
+        "docs/models/holt.html",
+        "docs/models/holtwinters.html",
+        "docs/models/imapa.html",
+        "docs/models/mfles.html",
+        "docs/models/multipleseasonaltrend.html",
+        "docs/models/optimizedtheta.html",
+        "docs/models/seasonalexponentialsmoothing.html",
+        "docs/models/seasonalexponentialsmoothingoptimized.html",
+        "docs/models/simpleexponentialoptimized.html",
+        "docs/models/simpleexponentialsmoothing.html",
+        "docs/models/standardtheta.html",
+        "docs/models/tsb.html"
       ]
     },
     {
       "group": "API Reference",
       "pages": [
-        "core",
-        "distributed.fugue",
-        "models",
-        "src/core/models_intro",
-        "feature_engineering"
+        "src/core/core.html",
+        "src/core/distributed.fugue.html",
+        "src/core/models.html",
+        "src/core/models_intro.html",
+        "src/feature_engineering.html"
       ]
     },
     {
       "group": "Contributing",
       "pages": [
-        "docs/contribute/contribute",
-        "docs/contribute/docs",
-        "docs/contribute/issue-labels",
-        "docs/contribute/issues",
-        "docs/contribute/step-by-step",
-        "docs/contribute/techstack"
+        "docs/contribute/contribute.html",
+        "docs/contribute/docs.html",
+        "docs/contribute/issue-labels.html",
+        "docs/contribute/issues.html",
+        "docs/contribute/step-by-step.html",
+        "docs/contribute/techstack.html"
       ]
     }
   ]
diff --git a/docs/src/core/core.html.md b/docs/src/core/core.html.md
new file mode 100644
index 000000000..f54f27de8
--- /dev/null
+++ b/docs/src/core/core.html.md
@@ -0,0 +1,182 @@
+---
+description: >-
+  Methods for Fit, Predict, Forecast (fast), Cross Validation and plotting
+output-file: core.html
+title: Core Methods
+---
+
+The core methods of `StatsForecast` provide a comprehensive interface for fitting, predicting, forecasting, and evaluating statistical forecasting models on large sets of time series.
+
+## Overview
+
+The main methods include:
+
+- `StatsForecast.fit` - Fit statistical models
+- `StatsForecast.predict` - Predict using fitted models
+- `StatsForecast.forecast` - Memory-efficient predictions without storing models
+- `StatsForecast.cross_validation` - Temporal cross-validation
+- `StatsForecast.plot` - Visualization of forecasts and historical data
+
+## StatsForecast Class
+
+::: statsforecast.core.StatsForecast
+    options:
+      show_source: true
+      heading_level: 3
+      members:
+        - __init__
+        - fit
+        - predict
+        - fit_predict
+        - forecast
+        - cross_validation
+        - plot
+        - save
+        - load
+
+## Usage Examples
+
+### Basic Forecasting
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import AutoARIMA, Naive
+from statsforecast.utils import generate_series
+
+# Generate example data
+panel_df = generate_series(n_series=9, equal_ends=False, engine='pandas')
+
+# Instantiate StatsForecast class
+fcst = StatsForecast(
+    models=[AutoARIMA(), Naive()],
+    freq='D',
+    n_jobs=1,
+    verbose=True
+)
+
+# Efficiently predict
+fcsts_df = fcst.forecast(df=panel_df, h=4, fitted=True)
+```
+
+### Cross-Validation
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import Naive
+from statsforecast.utils import AirPassengersDF as panel_df
+
+# Instantiate StatsForecast class
+fcst = StatsForecast(
+    models=[Naive()],
+    freq='D',
+    n_jobs=1,
+    verbose=True
+)
+
+# Perform cross-validation
+cv_df = fcst.cross_validation(df=panel_df, h=14, n_windows=2)
+```
+
+### Prediction Intervals
+
+```python
+import pandas as pd
+import numpy as np
+from statsforecast import StatsForecast
+from statsforecast.models import SeasonalNaive, AutoARIMA
+from statsforecast.utils import AirPassengers as ap
+
+# Prepare data
+ap_df = pd.DataFrame({'ds': np.arange(ap.size), 'y': ap})
+ap_df['unique_id'] = 0
+
+# Forecast with prediction intervals
+sf = StatsForecast(
+    models=[
+        SeasonalNaive(season_length=12),
+        AutoARIMA(season_length=12)
+    ],
+    freq=1,
+    n_jobs=1
+)
+ap_ci = sf.forecast(df=ap_df, h=12, level=(80, 95))
+
+# Plot with confidence intervals
+sf.plot(ap_df, ap_ci, level=[80], engine="matplotlib")
+```
+
+### Conformal Prediction Intervals
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import AutoARIMA
+from statsforecast.utils import ConformalIntervals
+
+sf = StatsForecast(
+    models=[
+        AutoARIMA(season_length=12),
+        AutoARIMA(
+            season_length=12,
+            prediction_intervals=ConformalIntervals(n_windows=2, h=12),
+            alias='ConformalAutoARIMA'
+        ),
+    ],
+    freq=1,
+    n_jobs=1
+)
+ap_ci = sf.forecast(df=ap_df, h=12, level=(80, 95))
+```
+
+## Advanced Features
+
+### Integer Datestamps
+
+The `StatsForecast` class can work with integer datestamps instead of datetime objects:
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import HistoricAverage
+from statsforecast.utils import AirPassengers as ap
+import pandas as pd
+import numpy as np
+
+# Create dataframe with integer datestamps
+int_ds_df = pd.DataFrame({'ds': np.arange(1, len(ap) + 1), 'y': ap})
+int_ds_df.insert(0, 'unique_id', 'AirPassengers')
+
+# Use freq=1 for integer datestamps
+fcst = StatsForecast(models=[HistoricAverage()], freq=1)
+forecast = fcst.forecast(df=int_ds_df, h=7)
+```
+
+### External Regressors
+
+Every column after `y` is considered an external regressor and will be passed to models that support them:
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.utils import generate_series
+import pandas as pd
+
+# Create data with external regressors
+series_xreg = generate_series(10_000, equal_ends=True)
+series_xreg['intercept'] = 1
+series_xreg['dayofweek'] = series_xreg['ds'].dt.dayofweek
+series_xreg = pd.get_dummies(series_xreg, columns=['dayofweek'], drop_first=True)
+
+# Split train/validation
+dates = sorted(series_xreg['ds'].unique())
+valid_start = dates[-14]
+train_mask = series_xreg['ds'] < valid_start
+series_train = series_xreg[train_mask]
+series_valid = series_xreg[~train_mask]
+X_valid = series_valid.drop(columns=['y'])
+
+# Forecast with external regressors
+fcst = StatsForecast(models=[your_model], freq='D')
+xreg_res = fcst.forecast(df=series_train, h=14, X_df=X_valid)
+```
+
+## Distributed Computing
+
+The `StatsForecast` class offers parallelization utilities with Dask, Spark and Ray backends for distributed computing. See the [distributed computing examples](https://github.com/Nixtla/statsforecast/tree/main/experiments/ray) for more information.
diff --git a/docs/src/core/distributed.fugue.html.md b/docs/src/core/distributed.fugue.html.md
new file mode 100644
index 000000000..0d23097e0
--- /dev/null
+++ b/docs/src/core/distributed.fugue.html.md
@@ -0,0 +1,172 @@
+---
+output-file: distributed.fugue.html
+title: Fugue Backend
+---
+
+The `FugueBackend` class enables distributed computation for StatsForecast using [Fugue](https://github.com/fugue-project/fugue), which provides a unified interface for Spark, Dask, and Ray backends without requiring code rewrites.
+
+## Overview
+
+With FugueBackend, you can:
+
+- Distribute forecasting and cross-validation across clusters
+- Switch between Spark, Dask, and Ray without changing your code
+- Scale to large datasets with parallel processing
+- Maintain the same API as the standard StatsForecast interface
+
+## API Reference
+
+::: statsforecast.distributed.fugue.FugueBackend
+    options:
+      show_source: true
+      heading_level: 3
+      members:
+        - __init__
+        - forecast
+        - cross_validation
+
+## Quick Start
+
+### Basic Usage with Spark
+
+```python
+from statsforecast.core import StatsForecast
+from statsforecast.models import AutoARIMA, AutoETS
+from statsforecast.utils import generate_series
+from pyspark.sql import SparkSession
+
+# Generate example data
+n_series = 4
+horizon = 7
+series = generate_series(n_series)
+
+# Create Spark session
+spark = SparkSession.builder.getOrCreate()
+
+# Convert unique_id to string and create Spark DataFrame
+series['unique_id'] = series['unique_id'].astype(str)
+sdf = spark.createDataFrame(series)
+
+# Use StatsForecast with Spark DataFrame (automatically uses FugueBackend)
+sf = StatsForecast(
+    models=[AutoETS(season_length=7)],
+    freq='D',
+)
+
+# Returns a Spark DataFrame
+results = sf.cross_validation(
+    df=sdf,
+    h=horizon,
+    step_size=24,
+    n_windows=2,
+    level=[90]
+)
+results.show()
+```
+
+### Basic Forecasting
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import AutoETS
+from statsforecast.utils import generate_series
+
+# Generate data
+series = generate_series(n_series=4)
+
+# Standard usage (pandas/polars)
+sf = StatsForecast(
+    models=[AutoETS(season_length=7)],
+    freq='D',
+)
+
+# Forecast with pandas DataFrame
+sf.cross_validation(
+    df=series,
+    h=7,
+    step_size=24,
+    n_windows=2,
+    level=[90]
+).head()
+```
+
+## Dask Distributed Example
+
+Here's a complete example using Dask for distributed predictions:
+
+```python
+import dask.dataframe as dd
+from dask.distributed import Client
+from fugue_dask import DaskExecutionEngine
+from statsforecast import StatsForecast
+from statsforecast.models import Naive
+from statsforecast.utils import generate_series
+
+# Generate synthetic panel data
+df = generate_series(10)
+df['unique_id'] = df['unique_id'].astype(str)
+df = dd.from_pandas(df, npartitions=10)
+
+# Instantiate Dask client and execution engine
+dask_client = Client()
+engine = DaskExecutionEngine(dask_client=dask_client)
+
+# Create StatsForecast instance
+sf = StatsForecast(models=[Naive()], freq='D')
+```
+
+### Distributed Forecast
+
+The FugueBackend automatically handles distributed forecasting when you pass a Dask/Spark/Ray DataFrame:
+
+```python
+# Distributed predictions
+forecast_df = sf.forecast(df=df, h=12).compute()
+
+# With fitted values
+sf = StatsForecast(models=[Naive()], freq='D')
+forecast_df = sf.forecast(df=df, h=12, fitted=True).compute()
+fitted_df = sf.forecast_fitted_values().compute()
+```
+
+### Distributed Cross-Validation
+
+Perform distributed temporal cross-validation across your cluster:
+
+```python
+# Distributed cross-validation
+cv_results = sf.cross_validation(
+    df=df,
+    h=12,
+    n_windows=2
+).compute()
+```
+
+## How It Works
+
+1. **Automatic Detection**: When you pass a Spark, Dask, or Ray DataFrame to StatsForecast methods, the FugueBackend is automatically used.
+
+2. **Data Partitioning**: Data is partitioned by `unique_id`, allowing parallel processing across different time series.
+
+3. **Distributed Execution**: Each partition is processed independently using the standard StatsForecast logic.
+
+4. **Result Aggregation**: Results are collected and returned in the same format as the input (Spark/Dask/Ray DataFrame).
+
+## Supported Backends
+
+- **Apache Spark**: For large-scale distributed processing
+- **Dask**: For flexible distributed computing with Python
+- **Ray**: For modern distributed machine learning workloads
+
+## Notes
+
+- Ensure your cluster has sufficient resources for the number of time series and models
+- The `unique_id` column should be string type for distributed operations
+- Use `.compute()` on Dask DataFrames to materialize results
+- Use `.show()` or `.collect()` on Spark DataFrames to view results
+
+## See Also
+
+- [Core StatsForecast Methods](core.html)
+- [Distributed Computing Examples](https://github.com/Nixtla/statsforecast/tree/main/experiments/ray)
+- [Fugue Documentation](https://fugue-tutorials.readthedocs.io/)
diff --git a/docs/src/core/models.html.md b/docs/src/core/models.html.md
new file mode 100644
index 000000000..06ed70b0d
--- /dev/null
+++ b/docs/src/core/models.html.md
@@ -0,0 +1,670 @@
+---
+description: >-
+  Models currently supported by StatsForecast
+output-file: models.html
+title: Models
+---
+
+StatsForecast offers a wide variety of statistical forecasting models grouped into the following categories:
+
+- **Auto Forecast**: Automatic forecasting tools that search for the best parameters and select the best possible model. Useful for large collections of univariate time series. Includes: AutoARIMA, AutoETS, AutoTheta, AutoCES, AutoMFLES, AutoTBATS.
+
+- **ARIMA Family**: AutoRegressive Integrated Moving Average models for capturing autocorrelations in time series data.
+
+- **Exponential Smoothing**: Uses weighted averages of past observations where weights decrease exponentially into the past. Suitable for data with clear trend and/or seasonality.
+
+- **Baseline Models**: Classical models for establishing baselines: HistoricAverage, Naive, RandomWalkWithDrift, SeasonalNaive, WindowAverage, SeasonalWindowAverage.
+
+- **Sparse or Intermittent**: Models suited for series with very few non-zero observations: ADIDA, CrostonClassic, CrostonOptimized, CrostonSBA, IMAPA, TSB.
+
+- **Multiple Seasonalities**: Models suited for signals with more than one clear seasonality. Useful for low-frequency data like electricity and logs: MSTL, MFLES, TBATS.
+
+- **Theta Models**: Fit two theta lines to a deseasonalized time series using different techniques: Theta, OptimizedTheta, DynamicTheta, DynamicOptimizedTheta.
+
+- **ARCH/GARCH Family**: Models for time series exhibiting non-constant volatility over time. Commonly used in finance.
+
+- **Machine Learning**: Wrapper for scikit-learn models to be used with StatsForecast.
+
+## Automatic Forecasting
+
+### AutoARIMA
+
+::: statsforecast.models.AutoARIMA
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoETS
+
+::: statsforecast.models.AutoETS
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoCES
+
+::: statsforecast.models.AutoCES
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoTheta
+
+::: statsforecast.models.AutoTheta
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoMFLES
+
+::: statsforecast.models.AutoMFLES
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoTBATS
+
+::: statsforecast.models.AutoTBATS
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+## ARIMA Family
+
+### ARIMA
+
+::: statsforecast.models.ARIMA
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### AutoRegressive
+
+::: statsforecast.models.AutoRegressive
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+## Exponential Smoothing
+
+### SimpleExponentialSmoothing
+
+::: statsforecast.models.SimpleExponentialSmoothing
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### SimpleExponentialSmoothingOptimized
+
+::: statsforecast.models.SimpleExponentialSmoothingOptimized
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### SeasonalExponentialSmoothing
+
+::: statsforecast.models.SeasonalExponentialSmoothing
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### SeasonalExponentialSmoothingOptimized
+
+::: statsforecast.models.SeasonalExponentialSmoothingOptimized
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### Holt
+
+::: statsforecast.models.Holt
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+### HoltWinters
+
+::: statsforecast.models.HoltWinters
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+## Baseline Models
+
+### HistoricAverage
+
+::: statsforecast.models.HistoricAverage
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### Naive
+
+::: statsforecast.models.Naive
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### RandomWalkWithDrift
+
+::: statsforecast.models.RandomWalkWithDrift
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### SeasonalNaive
+
+::: statsforecast.models.SeasonalNaive
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### WindowAverage
+
+::: statsforecast.models.WindowAverage
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### SeasonalWindowAverage
+
+::: statsforecast.models.SeasonalWindowAverage
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+## Sparse or Intermittent Models
+
+### ADIDA
+
+::: statsforecast.models.ADIDA
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### CrostonClassic
+
+::: statsforecast.models.CrostonClassic
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### CrostonOptimized
+
+::: statsforecast.models.CrostonOptimized
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### CrostonSBA
+
+::: statsforecast.models.CrostonSBA
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### IMAPA
+
+::: statsforecast.models.IMAPA
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### TSB
+
+::: statsforecast.models.TSB
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+## Multiple Seasonalities
+
+### MSTL
+
+::: statsforecast.models.MSTL
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### MFLES
+
+::: statsforecast.models.MFLES
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### TBATS
+
+::: statsforecast.models.TBATS
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+## Theta Family
+
+### Theta
+
+::: statsforecast.models.Theta
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+### OptimizedTheta
+
+::: statsforecast.models.OptimizedTheta
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+### DynamicTheta
+
+::: statsforecast.models.DynamicTheta
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+### DynamicOptimizedTheta
+
+::: statsforecast.models.DynamicOptimizedTheta
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+## ARCH/GARCH Family
+
+### GARCH
+
+::: statsforecast.models.GARCH
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### ARCH
+
+::: statsforecast.models.ARCH
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+## Machine Learning
+
+### SklearnModel
+
+::: statsforecast.models.SklearnModel
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+## Fallback Models
+
+These models are used as fallbacks when other models fail during forecasting.
+
+### ConstantModel
+
+::: statsforecast.models.ConstantModel
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+        - fit
+        - predict
+        - predict_in_sample
+        - forecast
+
+### ZeroModel
+
+::: statsforecast.models.ZeroModel
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+### NaNModel
+
+::: statsforecast.models.NaNModel
+    options:
+      show_source: true
+      heading_level: 4
+      members:
+        - __init__
+
+## Usage Examples
+
+### Basic Model Usage
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import AutoARIMA, Naive
+from statsforecast.utils import generate_series
+
+# Generate example data
+df = generate_series(n_series=10)
+
+# Create StatsForecast instance with models
+sf = StatsForecast(
+    models=[
+        AutoARIMA(season_length=7),
+        Naive()
+    ],
+    freq='D'
+)
+
+# Forecast
+forecasts = sf.forecast(df=df, h=7)
+```
+
+### Using Multiple Models
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import (
+    AutoARIMA,
+    AutoETS,
+    SeasonalNaive,
+    Theta,
+    HistoricAverage
+)
+
+# Combine multiple models for comparison
+models = [
+    AutoARIMA(season_length=12),
+    AutoETS(season_length=12),
+    SeasonalNaive(season_length=12),
+    Theta(season_length=12),
+    HistoricAverage()
+]
+
+sf = StatsForecast(models=models, freq='M', n_jobs=-1)
+forecasts = sf.forecast(df=df, h=12, level=[80, 95])
+```
+
+### Model with Prediction Intervals
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import AutoARIMA
+from statsforecast.utils import ConformalIntervals
+
+# Create model with conformal prediction intervals
+model = AutoARIMA(
+    season_length=12,
+    prediction_intervals=ConformalIntervals(n_windows=2, h=12),
+    alias='ConformalAutoARIMA'
+)
+
+sf = StatsForecast(models=[model], freq='M')
+forecasts = sf.forecast(df=df, h=12, level=[80, 95])
+```
+
+### Sparse/Intermittent Data
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import (
+    CrostonOptimized,
+    ADIDA,
+    IMAPA,
+    TSB
+)
+
+# Models specialized for sparse/intermittent data
+sparse_models = [
+    CrostonOptimized(),
+    ADIDA(),
+    IMAPA(),
+    TSB(alpha_d=0.2, alpha_p=0.2)
+]
+
+sf = StatsForecast(models=sparse_models, freq='D')
+forecasts = sf.forecast(df=sparse_df, h=30)
+```
+
+### Multiple Seasonalities
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import MSTL, AutoTBATS
+
+# For data with multiple seasonal patterns
+models = [
+    MSTL(season_length=[24, 168]),  # Hourly with daily and weekly seasonality
+    AutoTBATS(season_length=[24, 168])
+]
+
+sf = StatsForecast(models=models, freq='H')
+forecasts = sf.forecast(df=hourly_df, h=168)
+```
+
+### ARCH/GARCH for Volatility
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import GARCH, ARCH
+
+# Models for financial data with volatility
+volatility_models = [
+    GARCH(p=1, q=1),
+    ARCH(p=1)
+]
+
+sf = StatsForecast(models=volatility_models, freq='D')
+forecasts = sf.forecast(df=financial_df, h=30)
+```
+
+### Using Scikit-learn Models
+
+```python
+from statsforecast import StatsForecast
+from statsforecast.models import SklearnModel
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Ridge
+
+# Wrap scikit-learn models
+models = [
+    SklearnModel(RandomForestRegressor(n_estimators=100), alias='RF'),
+    SklearnModel(Ridge(alpha=1.0), alias='Ridge')
+]
+
+sf = StatsForecast(models=models, freq='D')
+forecasts = sf.forecast(df=df, h=14)
+```
+
+## Model Selection Tips
+
+- **For automatic selection**: Start with `AutoARIMA` or `AutoETS`
+- **For baseline comparison**: Use `Naive`, `SeasonalNaive`, or `HistoricAverage`
+- **For seasonal data**: Use models with `season_length` parameter
+- **For sparse data**: Use Croston family or ADIDA
+- **For multiple seasonalities**: Use MSTL or TBATS
+- **For volatile data**: Use GARCH or ARCH
+- **For ensemble approaches**: Combine multiple models and compare performance
+
+## References
+
+For detailed information on the statistical models and algorithms, please refer to the [source code](https://github.com/Nixtla/statsforecast/blob/main/python/statsforecast/models.py) and the original academic papers referenced in the docstrings.
diff --git a/docs/src/feature_engineering.html.md b/docs/src/feature_engineering.html.md
new file mode 100644
index 000000000..3842c76d0
--- /dev/null
+++ b/docs/src/feature_engineering.html.md
@@ -0,0 +1,50 @@
+---
+description: Generate features for downstream models
+output-file: feature_engineering.html
+title: Feature engineering
+---
+
+::: statsforecast.feature_engineering.mstl_decomposition
+
+```python
+import pandas as pd
+from fastcore.test import test_fail
+from utilsforecast.losses import smape
+
+from statsforecast.models import Naive
+from statsforecast.utils import generate_series
+
+series = generate_series(10, freq='D')
+series['unique_id'] = series['unique_id'].astype('int64')
+```
+
+```python
+horizon = 14
+model = MSTL(season_length=7)
+series = series.sample(frac=1.0)
+train_df, X_df = mstl_decomposition(series, model, 'D', horizon)
+```
+
+```python
+series_pl = generate_series(10, freq='D', engine='polars')
+series_pl = series_pl.with_columns(unique_id=pl.col('unique_id').cast(pl.Int64))
+train_df_pl, X_df_pl = mstl_decomposition(series_pl, model, '1d', horizon)
+```
+
+```python
+pd.testing.assert_series_equal(
+    train_df.groupby('unique_id')['ds'].max() + pd.offsets.Day(),
+    X_df.groupby('unique_id')['ds'].min()
+)
+assert X_df.shape[0] == train_df['unique_id'].nunique() * horizon
+pd.testing.assert_frame_equal(train_df, train_df_pl.to_pandas())
+pd.testing.assert_frame_equal(X_df, X_df_pl.to_pandas())
+with_estimate = train_df_pl.with_columns(estimate=pl.col('trend') + pl.col('seasonal'))
+assert smape(with_estimate, models=['estimate'])['estimate'].mean() < 0.1
+```
+
+```python
+model = MSTL(season_length=[7, 28])
+train_df, X_df = mstl_decomposition(series, model, 'D', horizon)
+assert train_df.columns.intersection(X_df.columns).tolist() == ['unique_id', 'ds', 'trend', 'seasonal7', 'seasonal28']
+```
diff --git a/docs/to_mdx.py b/docs/to_mdx.py
index f95afb84c..4236b5644 100644
--- a/docs/to_mdx.py
+++ b/docs/to_mdx.py
@@ -1,27 +1,60 @@
+import argparse
 import re
 from pathlib import Path
 
+from mkdocstrings_parser import MkDocstringsParser
+
 comment_pat = re.compile(r"<!--.*?-->", re.DOTALL)
 anchor_pat = re.compile(r"<a.*?>(.*?)</a>")
 output_path = Path("docs/mintlify")
 
 
-# process docs
-for file in Path("docs").glob("*.md"):
-    text = file.read_text()
-    text = comment_pat.sub("", text)
-    text = anchor_pat.sub("", text)
-    module_name = ".".join(file.name.split(".")[1:-1])
-    output_file = output_path / (module_name + ".mdx")
-    output_file.write_text(text)
+def process_files(input_dir):
+    """Process files with MkDocstrings parser, then clean with regex"""
+    # Step 1: Use MkDocstrings parser to generate initial MDX files
+    parser = MkDocstringsParser()
+    for file in Path(input_dir).rglob("*.md"):
+        folder_path = Path(input_dir) / "mintlify" / Path(*file.parent.parts[1:])
+        folder_path.mkdir(parents=True, exist_ok=True)
+        output_file = str(folder_path / file.with_suffix(".mdx").name)
+        print(f"Processing {file} -> {output_file}")
+        parser.process_file(str(file), output_file)
+
+    # Step 2: Clean up the generated MDX files with regex patterns
+    for mdx_file in (Path(input_dir) / "mintlify").glob("*.mdx"):
+        if mdx_file.name == "index.mdx":  # Skip index.mdx as it's handled separately
+            continue
+        print(f"Cleaning up {mdx_file}")
+        text = mdx_file.read_text()
+        text = comment_pat.sub("", text)
+        text = anchor_pat.sub("", text)
+        mdx_file.write_text(text)
 
-header = """---
+
+def copy_readme():
+    """Copy README.md to index.mdx with proper header"""
+    header = """---
 description: Lightning fast forecasting with statistical and econometric models
 title: "Statistical ⚡️ Forecast"
 ---
 """
-readme_text = Path("README.md").read_text()
-# replace url with .
-readme_text = re.sub(r"https?://nixtlaverse\.nixtla\.io/", "./", readme_text)
-readme_text = header + readme_text
-(output_path / "index.mdx").write_text(readme_text)
+    readme_text = Path("README.md").read_text()
+    # Skip the first 22 lines
+    lines = readme_text.split('\n')
+    readme_text = '\n'.join(lines[22:])
+    readme_text = header + readme_text
+    (output_path / "index.html.mdx").write_text(readme_text)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Process markdown files to MDX format")
+    parser.add_argument(
+        "input_dir", nargs="?", default="docs", help="Input directory (default: docs)"
+    )
+    args = parser.parse_args()
+
+    # Step 1: Process files with MkDocstrings parser, then clean with regex
+    process_files(args.input_dir)
+
+    # Step 2: Always copy the README
+    copy_readme()
diff --git a/nbs/.gitignore b/nbs/.gitignore
index a1889977c..4da429bd9 100644
--- a/nbs/.gitignore
+++ b/nbs/.gitignore
@@ -1,3 +1,4 @@
 /.quarto/
 
-lightning_logs/
\ No newline at end of file
+lightning_logs/
+**/*.quarto_ipynb
diff --git a/nbs/docs/getting-started/2_Getting_Started_complete.ipynb b/nbs/docs/getting-started/getting_Started_complete.ipynb
similarity index 99%
rename from nbs/docs/getting-started/2_Getting_Started_complete.ipynb
rename to nbs/docs/getting-started/getting_Started_complete.ipynb
index e85bccea3..9b9b68917 100644
--- a/nbs/docs/getting-started/2_Getting_Started_complete.ipynb
+++ b/nbs/docs/getting-started/getting_Started_complete.ipynb
@@ -36,13 +36,13 @@
             "metadata": {},
             "source": [
                 ":::{.callout-warning collapse=\"true\"}\n",
-                "## Prerequesites\n",
+                "## Prerequisites\n",
                 "This Guide assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](./1_Getting_Started_short).\n",
                 ":::\n",
                 "\n",
-                "Follow this article for a step to step guide on building a production-ready forecasting pipeline for multiple time series. \n",
+                "Follow this article for a step-by-step guide on building a production-ready forecasting pipeline for multiple time series. \n",
                 "\n",
-                "During this guide you will gain familiary with the core `StatsForecast`class and some relevant methods like `StatsForecast.plot`, `StatsForecast.forecast` and `StatsForecast.cross_validation.`\n",
+                "During this guide you will gain familiarity with the core `StatsForecast`class and some relevant methods like `StatsForecast.plot`, `StatsForecast.forecast` and `StatsForecast.cross_validation.`\n",
                 "\n",
                 "We will use a classical benchmarking dataset from the M4 competition. The dataset includes time series from different domains like finance, economy and sales. In this example, we will use a subset of the Hourly dataset. \n",
                 "\n",
@@ -98,7 +98,7 @@
                 "\n",
                 "* The `unique_id` (string, int or category) represents an identifier for the series. \n",
                 "\n",
-                "* The `ds` (datestamp or int) column should be either an integer indexing time or a datestampe ideally like YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.\n",
+                "* The `ds` (datestamp or int) column should be either an integer indexing time or a datestamp ideally like YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.\n",
                 "\n",
                 "* The `y` (numeric) represents the measurement we wish to forecast. The target column needs to be renamed to `y` if it has a different column name.\n",
                 "\n",
@@ -242,7 +242,7 @@
             "metadata": {},
             "source": [
                 ":::{.callout-note}\n",
-                "The `StatsForecast.plot` method uses Plotly as a defaul engine. You can change to MatPlotLib by setting `engine=\"matplotlib\"`. \n",
+                "The `StatsForecast.plot` method uses Plotly as a default engine. You can change to MatPlotLib by setting `engine=\"matplotlib\"`. \n",
                 ":::"
             ]
         },
@@ -313,7 +313,7 @@
                 "\n",
                 "* `SeasonalNaive`: Memory Efficient Seasonal Naive predictions. Ref: `SeasonalNaive`\n",
                 "\n",
-                "* `HistoricAverage`: arthimetic mean. Ref: `HistoricAverage`.\n",
+                "* `HistoricAverage`: arithmetic mean. Ref: `HistoricAverage`.\n",
                 "\n",
                 "* `DynamicOptimizedTheta`: The theta family of models has been shown to perform well in various datasets such as M3. Models the deseasonalized time series. Ref: `DynamicOptimizedTheta`."
             ]
@@ -366,7 +366,7 @@
                 "\n",
                 "* `freq`: a string indicating the frequency of the data. (See [pandas available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).)\n",
                 "\n",
-                "* `n_jobs`: n_jobs: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
+                "* `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
                 "\n",
                 "* `fallback_model`: a model to be used if a model fails. \n",
                 "\n",
@@ -417,7 +417,7 @@
             "metadata": {},
             "source": [
                 ":::{.callout-note}\n",
-                "The `forecast` method is compatible with distributed clusters, so it does not store any model parameters. If you want to store parameters for every model you can use the `fit` and `predict` methods. However, those methods are not defined for distrubed engines like Spark, Ray or Dask.\n",
+                "The `forecast` method is compatible with distributed clusters, so it does not store any model parameters. If you want to store parameters for every model you can use the `fit` and `predict` methods. However, those methods are not defined for distributed engines like Spark, Ray or Dask.\n",
                 ":::"
             ]
         },
@@ -704,7 +704,7 @@
                 "## Evaluate the model's performance\n",
                 "\n",
                 "\n",
-                "In previous steps, we've taken our historical data to predict the future. However, to asses its accuracy we would also like to know how the model would have performed in the past. To assess the accuracy and robustness of your models on your data perform Cross-Validation.\n",
+                "In previous steps, we've taken our historical data to predict the future. However, to assess its accuracy we would also like to know how the model would have performed in the past. To assess the accuracy and robustness of your models on your data perform Cross-Validation.\n",
                 "\n",
                 "With time series data, **Cross Validation** is done by defining a sliding window across the historical data and predicting the period following it. This form of cross-validation allows us to arrive at a better estimation of our model's predictive abilities across a wider range of temporal instances while also keeping the data in the training set contiguous as is required by our models.\n",
                 "\n",
@@ -775,7 +775,7 @@
                 "\n",
                 "* `ds`: datestamp or temporal index \n",
                 "\n",
-                "* `cutoff`: the last datestamp or temporal index for the `n_windows.` If `n_windows=1`, then one unique cuttoff value, if `n_windows=2` then two unique cutoff values. \n",
+                "* `cutoff`: the last datestamp or temporal index for the `n_windows.` If `n_windows=1`, then one unique cutoff value, if `n_windows=2` then two unique cutoff values. \n",
                 "\n",
                 "* `y`: true value \n",
                 "\n",
@@ -1339,4 +1339,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/nbs/docs/getting-started/3_Getting_Started_complete_polars.ipynb b/nbs/docs/getting-started/getting_Started_complete_polars.ipynb
similarity index 99%
rename from nbs/docs/getting-started/3_Getting_Started_complete_polars.ipynb
rename to nbs/docs/getting-started/getting_Started_complete_polars.ipynb
index e72249a1f..3146e2738 100644
--- a/nbs/docs/getting-started/3_Getting_Started_complete_polars.ipynb
+++ b/nbs/docs/getting-started/getting_Started_complete_polars.ipynb
@@ -33,13 +33,13 @@
             "metadata": {},
             "source": [
                 ":::{.callout-warning collapse=\"true\"}\n",
-                "## Prerequesites\n",
+                "## Prerequisites\n",
                 "This Guide assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](./Getting_Started_short)\n",
                 ":::\n",
                 "\n",
-                "Follow this article for a step to step guide on building a production-ready forecasting pipeline for multiple time series. \n",
+                "Follow this article for a step-by-step guide on building a production-ready forecasting pipeline for multiple time series. \n",
                 "\n",
-                "During this guide you will gain familiary with the core `StatsForecast`class and some relevant methods like `StatsForecast.plot`, `StatsForecast.forecast` and `StatsForecast.cross_validation.`\n",
+                "During this guide you will gain familiarity with the core `StatsForecast`class and some relevant methods like `StatsForecast.plot`, `StatsForecast.forecast` and `StatsForecast.cross_validation.`\n",
                 "\n",
                 "We will use a classical benchmarking dataset from the M4 competition. The dataset includes time series from different domains like finance, economy and sales. In this example, we will use a subset of the Hourly dataset. \n",
                 "\n",
@@ -80,7 +80,7 @@
             "source": [
                 "## Install libraries\n",
                 "\n",
-                "We assume you have StatsForecast already installed. Check this guide for instructions on [how to install StatsForecast](./Installation../getting-started/0_Installation)."
+                "We assume you have StatsForecast already installed. Check this guide for instructions on [how to install StatsForecast](./0_Installation)."
             ]
         },
         {
@@ -95,7 +95,7 @@
                 "\n",
                 "* The `unique_id` (string, int or category) represents an identifier for the series. \n",
                 "\n",
-                "* The `ds` (datestamp or int) column should be either an integer indexing time or a datestampe ideally like YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.\n",
+                "* The `ds` (datestamp or int) column should be either an integer indexing time or a datestamp ideally like YYYY-MM-DD for a date or YYYY-MM-DD HH:MM:SS for a timestamp.\n",
                 "\n",
                 "* The `y` (numeric) represents the measurement we wish to forecast. \n",
                 "\n",
@@ -267,7 +267,7 @@
                 "\n",
                 "* `SeasonalNaive`: Memory Efficient Seasonal Naive predictions. Ref: `SeasonalNaive`\n",
                 "\n",
-                "* `HistoricAverage`: arthimetic mean. Ref: `HistoricAverage`.\n",
+                "* `HistoricAverage`: arithmetic mean. Ref: `HistoricAverage`.\n",
                 "\n",
                 "* `DynamicOptimizedTheta`: The theta family of models has been shown to perform well in various datasets such as M3. Models the deseasonalized time series. Ref: `DynamicOptimizedTheta`."
             ]
@@ -320,7 +320,7 @@
                 "\n",
                 "* `freq`: a string indicating the frequency of the data. (See [panda's available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).) This is also available with Polars.\n",
                 "\n",
-                "* `n_jobs`: n_jobs: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
+                "* `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
                 "\n",
                 "* `fallback_model`: a model to be used if a model fails. \n",
                 "\n",
@@ -372,7 +372,7 @@
             "metadata": {},
             "source": [
                 ":::{.callout-note}\n",
-                "The `forecast` method is compatible with distributed clusters, so it does not store any model parameters. If you want to store parameters for every model you can use the `fit` and `predict` methods. However, those methods are not defined for distrubed engines like Spark, Ray or Dask.\n",
+                "The `forecast` method is compatible with distributed clusters, so it does not store any model parameters. If you want to store parameters for every model you can use the `fit` and `predict` methods. However, those methods are not defined for distributed engines like Spark, Ray or Dask.\n",
                 ":::"
             ]
         },
@@ -443,7 +443,7 @@
             "cell_type": "markdown",
             "metadata": {},
             "source": [
-                "Plot the results of 8 randon series using the `StatsForecast.plot` method. "
+                "Plot the results of 8 random series using the `StatsForecast.plot` method. "
             ]
         },
         {
@@ -525,7 +525,7 @@
                 "## Evaluate the model's performance\n",
                 "\n",
                 "\n",
-                "In previous steps, we've taken our historical data to predict the future. However, to asses its accuracy we would also like to know how the model would have performed in the past. To assess the accuracy and robustness of your models on your data perform Cross-Validation.\n",
+                "In previous steps, we've taken our historical data to predict the future. However, to assess its accuracy we would also like to know how the model would have performed in the past. To assess the accuracy and robustness of your models on your data perform Cross-Validation.\n",
                 "\n",
                 "With time series data, **Cross Validation** is done by defining a sliding window across the historical data and predicting the period following it. This form of cross-validation allows us to arrive at a better estimation of our model's predictive abilities across a wider range of temporal instances while also keeping the data in the training set contiguous as is required by our models.\n",
                 "\n",
@@ -596,7 +596,7 @@
                 "\n",
                 "* `ds`: datestamp or temporal index \n",
                 "\n",
-                "* `cutoff`: the last datestamp or temporal index for the `n_windows.` If `n_windows=1`, then one unique cuttoff value, if `n_windows=2` then two unique cutoff values. \n",
+                "* `cutoff`: the last datestamp or temporal index for the `n_windows.` If `n_windows=1`, then one unique cutoff value, if `n_windows=2` then two unique cutoff values. \n",
                 "\n",
                 "* `y`: true value \n",
                 "\n",
@@ -946,4 +946,4 @@
     },
     "nbformat": 4,
     "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/nbs/docs/getting-started/1_Getting_Started_short.ipynb b/nbs/docs/getting-started/getting_Started_short.ipynb
similarity index 100%
rename from nbs/docs/getting-started/1_Getting_Started_short.ipynb
rename to nbs/docs/getting-started/getting_Started_short.ipynb
diff --git a/nbs/docs/getting-started/0_Installation.ipynb b/nbs/docs/getting-started/installation.ipynb
similarity index 100%
rename from nbs/docs/getting-started/0_Installation.ipynb
rename to nbs/docs/getting-started/installation.ipynb
diff --git a/nbs/docs/how-to-guides/00_Automatic_Forecasting.ipynb b/nbs/docs/how-to-guides/Automatic_Forecasting.ipynb
similarity index 98%
rename from nbs/docs/how-to-guides/00_Automatic_Forecasting.ipynb
rename to nbs/docs/how-to-guides/Automatic_Forecasting.ipynb
index c239dff3b..73c17b7a2 100644
--- a/nbs/docs/how-to-guides/00_Automatic_Forecasting.ipynb
+++ b/nbs/docs/how-to-guides/Automatic_Forecasting.ipynb
@@ -30,7 +30,7 @@
    "metadata": {},
    "source": [
     "## 1. Install statsforecast and load data\n",
-    "Use pip to install statsforecast and load Air Passangers dataset as an example"
+    "Use pip to install statsforecast and load Air Passengers dataset as an example"
    ]
   },
   {
@@ -87,7 +87,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Instatiate the class\n",
+    "## 3. Instantiate the class\n",
     "Instantiate the StatsForecast class with the appropriate parameters"
    ]
   },
diff --git a/nbs/docs/how-to-guides/Exogenous.ipynb b/nbs/docs/how-to-guides/Exogenous.ipynb
index 390e22256..5399f124b 100644
--- a/nbs/docs/how-to-guides/Exogenous.ipynb
+++ b/nbs/docs/how-to-guides/Exogenous.ipynb
@@ -17,7 +17,7 @@
    "source": [
     "::: {.callout-warning collapse=\"true\"}\n",
     "\n",
-    "## Prerequesites\n",
+    "## Prerequisites\n",
     "\n",
     "This tutorial assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](../getting-started/1_Getting_Started_short)\n",
     ":::"
@@ -30,7 +30,7 @@
    "source": [
     "## Introduction \n",
     "\n",
-    "**Exogenous regressors** are variables that can affect the values of a time series. They may not be directly related to the variable that is beging forecasted, but they can still have an impact on it. Examples of exogenous regressors are weather data, economic indicators, or promotional sales. They are typically collected from external sources and by incorporating them into a forecasting model, they can improve the accuracy of our predictions. \n",
+    "**Exogenous regressors** are variables that can affect the values of a time series. They may not be directly related to the variable that is being forecasted, but they can still have an impact on it. Examples of exogenous regressors are weather data, economic indicators, or promotional sales. They are typically collected from external sources and by incorporating them into a forecasting model, they can improve the accuracy of our predictions. \n",
     "\n",
     "By the end of this tutorial, you'll have a good understanding of how to incorporate exogenous regressors into [StatsForecast](../../models)'s models. Furthermore, you'll see how to evaluate their performance and decide whether or not they can help enhance the forecast. "
    ]
@@ -137,7 +137,7 @@
    "id": "4481392d-59a9-4179-b28b-7179a551bb8b",
    "metadata": {},
    "source": [
-    "We can plot the sales of this product-store combination with the `statsforecast.plot` method from the [StatsForecast](../../core#class-statsforecast) class. This method has multiple parameters, and the requiered ones to generate the plots in this notebook are explained below. \n",
+    "We can plot the sales of this product-store combination with the `statsforecast.plot` method from the [StatsForecast](../../core#class-statsforecast) class. This method has multiple parameters, and the required ones to generate the plots in this notebook are explained below. \n",
     "\n",
     "- `df`: A pandas dataframe with columns [`unique_id`, `ds`, `y`].\n",
     "- `forecasts_df`: A pandas dataframe with columns [`unique_id`, `ds`] and models.\n",
@@ -616,7 +616,7 @@
    "id": "f2133e1d-5b6e-4a91-8df3-97d6869b1559",
    "metadata": {},
    "source": [
-    "To generate the forecast, we'll use [AutoARIMA](https://nixtlaverse.nixtla.io/statsforecast/docs/models/AutoARIMA), which is one of the models available in StatsForecast that allows exogenous regressors. To use this model, we first need to import it from `statsforecast.models` and then we need to instatiate it. Given that we're working with daily data, we need to set `season_length = 7`. "
+    "To generate the forecast, we'll use [AutoARIMA](https://nixtlaverse.nixtla.io/statsforecast/docs/models/AutoARIMA), which is one of the models available in StatsForecast that allows exogenous regressors. To use this model, we first need to import it from `statsforecast.models` and then we need to instantiate it. Given that we're working with daily data, we need to set `season_length = 7`. "
    ]
   },
   {
@@ -961,7 +961,7 @@
    "id": "7de253b8-7502-41f2-87b1-032624bd9363",
    "metadata": {},
    "source": [
-    "To check whether the exogenous regressors were useful or not, we need to generate the forecast again, now without them. To do this, we simple pass the dataframe wihtout exogenous variables to the `forecast` method. Notice that the data only includes `unique_id`, `ds`, and `y`. The `forecast` method no longer requieres the future values of the exogenous regressors `X_df`. "
+    "To check whether the exogenous regressors were useful or not, we need to generate the forecast again, now without them. To do this, we simply pass the dataframe without exogenous variables to the `forecast` method. Notice that the data only includes `unique_id`, `ds`, and `y`. The `forecast` method no longer requires the future values of the exogenous regressors `X_df`. "
    ]
   },
   {
diff --git a/nbs/docs/how-to-guides/migrating_R.qmd b/nbs/docs/how-to-guides/migrating_R.qmd
deleted file mode 100644
index 352be78f5..000000000
--- a/nbs/docs/how-to-guides/migrating_R.qmd
+++ /dev/null
@@ -1,6 +0,0 @@
----
-title: Migrating from R
----
-
-## 🚧 We are working on this site.
-This site is currently in development. If you are particularly interested in this section, please open a GitHub Issue, and we will prioritize it. 
\ No newline at end of file
diff --git a/nbs/docs/how-to-guides/numba_cache.ipynb b/nbs/docs/how-to-guides/numba_cache.ipynb
index 6ecebc2c4..1649e6ac2 100644
--- a/nbs/docs/how-to-guides/numba_cache.ipynb
+++ b/nbs/docs/how-to-guides/numba_cache.ipynb
@@ -15,7 +15,7 @@
    "id": "3981bf93-72a4-4457-b361-d851e426e15b",
    "metadata": {},
    "source": [
-    "`statsforecast` makes heavy use of [numba](https://numba.pydata.org/) to speed up several critical functions that estimate model parameters. This comes at a cost though, which is that the functions have to be [JIT compiled](https://en.wikipedia.org/wiki/Just-in-time_compilation) the first time they're run, which can be expensive. Once a function has ben JIT compiled, subsequent calls are significantly faster. One problem is that this compilation is saved (by default) on a per-session basis.\n",
+    "`statsforecast` makes heavy use of [numba](https://numba.pydata.org/) to speed up several critical functions that estimate model parameters. This comes at a cost though, which is that the functions have to be [JIT compiled](https://en.wikipedia.org/wiki/Just-in-time_compilation) the first time they're run, which can be expensive. Once a function has been JIT compiled, subsequent calls are significantly faster. One problem is that this compilation is saved (by default) on a per-session basis.\n",
     "\n",
     "In order to mitigate the compilation overhead numba offers the option to cache the function compiled code to a file, which can be then reused across sessions, and even copied over to different machines that share the same CPU characteristics ([more info](https://numba.pydata.org/numba-doc/latest/developer/caching.html#cache-sharing)).\n",
     "\n",
diff --git a/nbs/docs/tutorials/AnomalyDetection.ipynb b/nbs/docs/tutorials/AnomalyDetection.ipynb
index 9b13ff98b..0533568f3 100644
--- a/nbs/docs/tutorials/AnomalyDetection.ipynb
+++ b/nbs/docs/tutorials/AnomalyDetection.ipynb
@@ -17,7 +17,7 @@
    "source": [
     "::: {.callout-warning collapse=\"true\"}\n",
     "\n",
-    "## Prerequesites\n",
+    "## Prerequisites\n",
     "\n",
     "This tutorial assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](../getting-started/1_Getting_Started_short)\n",
     ":::"
@@ -229,7 +229,7 @@
    "id": "992e59e2-4b8e-4ae5-b905-455e834ee930",
    "metadata": {},
    "source": [
-    "The input to StatsForecast is always a data frame in [long format](https://www.theanalysisfactor.com/wide-and-long-data/) with three columns: `unique_id`, `df` and `y`.  \n",
+    "The input to StatsForecast is always a data frame in [long format](https://www.theanalysisfactor.com/wide-and-long-data/) with three columns: `unique_id`, `ds` and `y`.  \n",
     "\n",
     "- `unique_id`: (string, int or category) A unique identifier for the series. \n",
     "- `ds`: (timestamp or int) A timestamp in format YYYY-MM-DD or YYYY-MM-DD HH:MM:SS or an integer indexing time. \n",
@@ -371,7 +371,7 @@
    "id": "2de21a33-c11f-4b3f-a413-e5a62255b6f0",
    "metadata": {},
    "source": [
-    "We'll now predict the next 48 hours. To do this, we'll use the `forecast` method, which requieres the following arguments: \n",
+    "We'll now predict the next 48 hours. To do this, we'll use the `forecast` method, which requires the following arguments: \n",
     "\n",
     "- `df`: The dataframe with the training data.\n",
     "- `h`: The forecasting horizon. \n",
diff --git a/nbs/docs/tutorials/ConformalPrediction.ipynb b/nbs/docs/tutorials/ConformalPrediction.ipynb
index c45e22c4c..9e1d4a247 100644
--- a/nbs/docs/tutorials/ConformalPrediction.ipynb
+++ b/nbs/docs/tutorials/ConformalPrediction.ipynb
@@ -273,7 +273,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can plot these series using the `plot_series` function from the utilsforecast library. Thisfunctionmethod has multiple parameters, and the required ones to generate the plots in this notebook are explained below. \n",
+    "We can plot these series using the `plot_series` function from the utilsforecast library. This function method has multiple parameters, and the required ones to generate the plots in this notebook are explained below. \n",
     "\n",
     "- `df`: A `pandas` dataframe with columns [`unique_id`, `ds`, `y`]. \n",
     "- `forecasts_df`: A `pandas` dataframe with columns [`unique_id`, `ds`] and models. \n",
diff --git a/nbs/docs/tutorials/CrossValidation.ipynb b/nbs/docs/tutorials/CrossValidation.ipynb
index 99a35a208..2d8b72260 100644
--- a/nbs/docs/tutorials/CrossValidation.ipynb
+++ b/nbs/docs/tutorials/CrossValidation.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "::: {.callout-warning collapse=\"true\"}\n",
     "\n",
-    "## Prerequesites\n",
+    "## Prerequisites\n",
     "\n",
     "This tutorial assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](../getting-started/1_Getting_Started_short)\n",
     ":::"
@@ -97,7 +97,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from statsforecast import StatsForecast # required to instantiate StastForecast object and use cross-validation method"
+    "from statsforecast import StatsForecast # required to instantiate StatsForecast object and use cross-validation method"
    ]
   },
   {
@@ -276,7 +276,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For this example, we'll use StastForecast [AutoETS](../models/AutoETS). We first need to import it from `statsforecast.models` and then we need to instantiate a new `StatsForecast` object. "
+    "For this example, we'll use StatsForecast [AutoETS](../models/AutoETS). We first need to import it from `statsforecast.models` and then we need to instantiate a new `StatsForecast` object. "
    ]
   },
   {
@@ -287,7 +287,7 @@
     "\n",
     "- models: a list of models. Select the models you want from [models](../../models/) and import them.\n",
     "- freq: a string indicating the frequency of the data. See [panda’s available frequencies.](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)\n",
-    "- n_jobs: n_jobs: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
+    "- n_jobs: int, number of jobs used in the parallel processing, use -1 for all cores.\n",
     "\n",
     "Any settings are passed into the constructor. Then you call its fit method and pass in the historical data frame `df`. "
    ]
@@ -545,7 +545,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can now compute the accuracy of the forecast using an appropiate accuracy metric. Here we'll use the [Root Mean Squared Error (RMSE).](https://en.wikipedia.org/wiki/Root-mean-square_deviation)."
+    "We can now compute the accuracy of the forecast using an appropriate accuracy metric. Here we'll use the [Root Mean Squared Error (RMSE).](https://en.wikipedia.org/wiki/Root-mean-square_deviation)."
    ]
   },
   {
diff --git a/nbs/docs/tutorials/GARCH_tutorial.ipynb b/nbs/docs/tutorials/GARCH_tutorial.ipynb
index 1636eb17b..9e80c7d5d 100644
--- a/nbs/docs/tutorials/GARCH_tutorial.ipynb
+++ b/nbs/docs/tutorials/GARCH_tutorial.ipynb
@@ -19,7 +19,7 @@
    "source": [
     "::: {.callout-warning collapse=\"true\"}\n",
     "\n",
-    "## Prerequesites\n",
+    "## Prerequisites\n",
     "\n",
     "This tutorial assumes basic familiarity with StatsForecast. For a minimal example visit the [Quick Start](../getting-started/1_Getting_Started_short)\n",
     ":::"
@@ -1058,7 +1058,7 @@
    "id": "1c4054b3-d433-4156-b1ba-3f10d5ee7609",
    "metadata": {},
    "source": [
-    "Time series cross-validation is a method for evaluating how a model would have performed in the past. It works by defining a sliding window across the historical data and predicting the period following it. Here we'll use StatsForercast's `cross-validation` method to determine the most accurate model for the S&P 500 and the companies selected. \n",
+    "Time series cross-validation is a method for evaluating how a model would have performed in the past. It works by defining a sliding window across the historical data and predicting the period following it. Here we'll use StatsForecast's `cross-validation` method to determine the most accurate model for the S&P 500 and the companies selected. \n",
     "\n",
     "This method takes the following arguments: \n",
     "\n",
@@ -1091,7 +1091,7 @@
    "id": "03270241-e962-44aa-b1bc-32b1cbfb5715",
    "metadata": {},
    "source": [
-    "The `cv_df` object ia a dataframe with the following columns: \n",
+    "The `cv_df` object is a dataframe with the following columns: \n",
     "\n",
     "- `unique_id`: series identifier.\n",
     "- `ds`: datestamp or temporal index\n",
@@ -1546,7 +1546,7 @@
    "id": "481d21b2-a71e-4836-8ed5-b8091ca24f04",
    "metadata": {},
    "source": [
-    "Hence, the most accurate model to describe the logarithmic returns of Apple's stock is an GARCH(2, 1), for Amazon's stock is a GARCH(2,2), and so on. "
+    "Hence, the most accurate model to describe the logarithmic returns of Apple's stock is a GARCH(2, 1), for Amazon's stock is a GARCH(2,2), and so on. "
    ]
   },
   {
@@ -1564,7 +1564,7 @@
    "id": "75adaec0-6660-4d9a-8772-aebb1fba86d8",
    "metadata": {},
    "source": [
-    "We can now generate a forecast for the next quarter. To do this, we'll use the `forecast` method, which requieres the following arguments:\n",
+    "We can now generate a forecast for the next quarter. To do this, we'll use the `forecast` method, which requires the following arguments:\n",
     "\n",
     "- `h`: (int) The forecasting horizon.\n",
     "- `level`: (list[float]) The confidence levels of the prediction intervals\n",
@@ -1797,7 +1797,7 @@
    "id": "d3961240-086d-4f26-a44a-171654883e7e",
    "metadata": {},
    "source": [
-    "With the results of the previous section, we can choose the best model for the S&P 500 and the companies selected. Some of the plots are shown below. Notice that we're using somo additional arguments in the `plot` method: \n",
+    "With the results of the previous section, we can choose the best model for the S&P 500 and the companies selected. Some of the plots are shown below. Notice that we're using some additional arguments in the `plot` method: \n",
     "\n",
     "- `level`: (list[int]) The confidence levels for the prediction intervals (this was already defined). \n",
     "- `unique_ids`: (list[str, int or category]) The ids to plot. \n",
diff --git a/nbs/docs/tutorials/StatisticalNeuralMethods.ipynb b/nbs/docs/tutorials/StatisticalNeuralMethods.ipynb
index 3102429a6..79b5b2750 100644
--- a/nbs/docs/tutorials/StatisticalNeuralMethods.ipynb
+++ b/nbs/docs/tutorials/StatisticalNeuralMethods.ipynb
@@ -31305,7 +31305,7 @@
    "id": "944aed25-a9ac-4b41-8049-b0331574279f",
    "metadata": {},
    "source": [
-    "`StatsForecast` receives a list of models to fit each time series. Since we are dealing with Daily data, it would be benefitial to use 7 as seasonality."
+    "`StatsForecast` receives a list of models to fit each time series. Since we are dealing with Daily data, it would be beneficial to use 7 as seasonality."
    ]
   },
   {
@@ -59919,11 +59919,11 @@
    "id": "c14d8e9b-8df6-473f-950b-ab0fbd68b99e",
    "metadata": {},
    "source": [
-    "The crossvaldation_df object is a new data frame that includes the following columns:\n",
+    "The cross_validation_df object is a new data frame that includes the following columns:\n",
     "\n",
     "- `unique_id` index: (If you dont like working with index just run forecasts_cv_df.resetindex())\n",
     "- `ds`: datestamp or temporal index\n",
-    "- `cutoff`: the last datestamp or temporal index for the n_windows. If n_windows=1, then one unique cuttoff value, if n_windows=2 then two unique cutoff values.\n",
+    "- `cutoff`: the last datestamp or temporal index for the n_windows. If n_windows=1, then one unique cutoff value, if n_windows=2 then two unique cutoff values.\n",
     "- `y`: true value\n",
     "- `\"model\"`: columns with the model’s name and fitted value."
    ]
@@ -60241,11 +60241,11 @@
    "id": "a7b5f0c5-b119-4e55-887d-6f2449822d76",
    "metadata": {},
    "source": [
-    "The crossvaldation_df object is a new data frame that includes the following columns:\n",
+    "The cross_validation_df object is a new data frame that includes the following columns:\n",
     "\n",
     "- `unique_id` index: (If you dont like working with index just run forecasts_cv_df.resetindex())\n",
     "- `ds`: datestamp or temporal index\n",
-    "- `cutoff`: the last datestamp or temporal index for the n_windows. If n_windows=1, then one unique cuttoff value, if n_windows=2 then two unique cutoff values.\n",
+    "- `cutoff`: the last datestamp or temporal index for the n_windows. If n_windows=1, then one unique cutoff value, if n_windows=2 then two unique cutoff values.\n",
     "- `y`: true value\n",
     "- `\"model\"`: columns with the model’s name and fitted value."
    ]
@@ -75761,7 +75761,7 @@
    "id": "144a88a4-3fed-48f9-b414-0ab6ff0e5912",
    "metadata": {},
    "source": [
-    "Let's cleate a `dask` client."
+    "Let's create a `dask` client."
    ]
   },
   {
diff --git a/nbs/src/core/models_intro.qmd b/nbs/src/core/models_intro.qmd
index 715656e59..0ff58a8b1 100644
--- a/nbs/src/core/models_intro.qmd
+++ b/nbs/src/core/models_intro.qmd
@@ -8,10 +8,10 @@ Automatic forecasting tools search for the best parameters and select the best p
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`AutoARIMA`](../../models#class-autoarima)|✅|✅|✅|✅|✅|
-|[`AutoETS`](../../models#class-autoets)|✅|✅|✅|✅|✅|
-|[`AutoCES`](../../models#class-autoces)|✅|✅|✅|✅||
-|[`AutoTheta`](../../models#class-autotheta)|✅|✅|✅|✅|✅|
+|[`AutoARIMA`](../../models.html#autoarima)|✅|✅|✅|✅|✅|
+|[`AutoETS`](../../models.html#autoets)|✅|✅|✅|✅|✅|
+|[`AutoCES`](../../models.html#autoces)|✅|✅|✅|✅||
+|[`AutoTheta`](../../models.html#autotheta)|✅|✅|✅|✅|✅|
 
 
 ## ARIMA Family
@@ -19,8 +19,8 @@ These models exploit the existing autocorrelations in the time series.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`ARIMA`](../../models#class-arima)|✅|✅|✅|✅|✅|
-|[`AutoRegressive`](../../models#class-autoregressive)|✅|✅|✅|✅|✅|
+|[`ARIMA`](../../models.html#arima)|✅|✅|✅|✅|✅|
+|[`AutoRegressive`](../../models.html#autoregressive)|✅|✅|✅|✅|✅|
 
 
 ## Theta Family
@@ -28,10 +28,10 @@ Fit two theta lines to a deseasonalized time series, using different techniques
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`Theta`](../../models#class-theta)|✅|✅|✅|✅|✅|
-|[`OptimizedTheta`](../../models#class-optimizedtheta)|✅|✅|✅|✅|✅|
-|[`DynamicTheta`](../../models#class-dynamictheta)|✅|✅|✅|✅|✅|
-|[`DynamicOptimizedTheta`](../../models#class-dynamicoptimizedtheta)|✅|✅|✅|✅|✅|
+|[`Theta`](../../models.html#theta)|✅|✅|✅|✅|✅|
+|[`OptimizedTheta`](../../models.html#optimizedtheta)|✅|✅|✅|✅|✅|
+|[`DynamicTheta`](../../models.html#dynamictheta)|✅|✅|✅|✅|✅|
+|[`DynamicOptimizedTheta`](../../models.html#dynamicoptimizedtheta)|✅|✅|✅|✅|✅|
 
 
 ## Multiple Seasonalities
@@ -39,7 +39,7 @@ Suited for signals with more than one clear seasonality. Useful for low-frequenc
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`MSTL`](../../models#class-mstl)|✅|✅|✅|✅|✅|
+|[`MSTL`](../../models.html#mstl)|✅|✅|✅|✅|✅|
 
 
 ## GARCH and ARCH Models
@@ -47,8 +47,8 @@ Suited for modeling time series that exhibit non-constant volatility over time.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`GARCH`](../../models#class-garch)|✅|✅|✅|✅|✅|
-|[`ARCH`](../../models#class-arch)|✅|✅|✅|✅|✅|
+|[`GARCH`](../../models.html#garch)|✅|✅|✅|✅|✅|
+|[`ARCH`](../../models.html#arch)|✅|✅|✅|✅|✅|
 
 
 ## Baseline Models
@@ -56,12 +56,12 @@ Classical models for establishing baseline.
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`HistoricAverage`](../../models#class-historicaverage)|✅|✅|✅|✅|✅|
-|[`Naive`](../../models#class-naive)|✅|✅|✅|✅|✅|
-|[`RandomWalkWithDrift`](../../models#class-randomwalkwithdrift)|✅|✅|✅|✅|✅|
-|[`SeasonalNaive`](../../models#class-seasonalnaive)|✅|✅|✅|✅|✅|
-|[`WindowAverage`](../../models#class-windowaverage)|✅|||||
-|[`SeasonalWindowAverage`](../../models#class-seasonalwindowaverage)|✅|||||
+|[`HistoricAverage`](../../models.html#historicaverage)|✅|✅|✅|✅|✅|
+|[`Naive`](../../models.html#naive)|✅|✅|✅|✅|✅|
+|[`RandomWalkWithDrift`](../../models.html#randomwalkwithdrift)|✅|✅|✅|✅|✅|
+|[`SeasonalNaive`](../../models.html#seasonalnaive)|✅|✅|✅|✅|✅|
+|[`WindowAverage`](../../models.html#windowaverage)|✅|||||
+|[`SeasonalWindowAverage`](../../models.html#seasonalwindowaverage)|✅|||||
 
 
 ## Exponential Smoothing
@@ -69,10 +69,10 @@ Uses a weighted average of all past observations where the weights decrease expo
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`SimpleExponentialSmoothing`](../../models#class-simpleexponentialsmoothing)|✅|||||
-|[`SimpleExponentialSmoothingOptimized`](../../models#class-simpleexponentialsmoothingoptimized)|✅|||||
-|[`Holt`](../../models#class-holt)|✅|✅|✅|✅|✅|
-|[`HoltWinters`](../../models#class-holtwinters)|✅|✅|✅|✅|✅|
+|[`SimpleExponentialSmoothing`](../../models.html#simpleexponentialsmoothing)|✅|||||
+|[`SimpleExponentialSmoothingOptimized`](../../models.html#simpleexponentialsmoothingoptimized)|✅|||||
+|[`Holt`](../../models.html#holt)|✅|✅|✅|✅|✅|
+|[`HoltWinters`](../../models.html#holtwinters)|✅|✅|✅|✅|✅|
 
 
 ## Sparse or Intermittent 
@@ -80,12 +80,12 @@ Suited for series with very few non-zero observations
 
 |Model | Point Forecast | Probabilistic Forecast | Insample fitted values | Probabilistic fitted values |
 |:------|:-------------:|:----------------------:|:---------------------:|:----------------------------:|
-|[`ADIDA`](../../models#class-adida)|✅|||||
-|[`CrostonClassic`](../../models#class-crostonclassic)|✅|||||
-|[`CrostonOptimized`](../../models#class-crostonoptimized)|✅|||||
-|[`CrostonSBA`](../../models#class-crostonsba)|✅|||||
-|[`IMAPA`](../../models#class-imapa)|✅|||||
-|[`TSB`](../../models#class-tsb)|✅|||||
+|[`ADIDA`](../../models.html#adida)|✅|||||
+|[`CrostonClassic`](../../models.html#crostonclassic)|✅|||||
+|[`CrostonOptimized`](../../models.html#crostonoptimized)|✅|||||
+|[`CrostonSBA`](../../models.html#crostonsba)|✅|||||
+|[`IMAPA`](../../models.html#imapa)|✅|||||
+|[`TSB`](../../models.html#tsb)|✅|||||
 
 
 
diff --git a/pyproject.toml b/pyproject.toml
index c5c1f9061..9b9d5895a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,11 @@ ray = [
 spark = ["fugue[spark]>=0.8.1"]
 plotly = ["plotly", "plotly-resampler"]
 polars = ["polars[numpy]"]
+docs = [
+    "griffe2md",
+    "pyyaml",
+    "mkdocstrings-parser@git+https://github.com/Nixtla/mkdocstrings-parser.git",
+]
 all = [
     "dask<=2024.12.1",
     "fugue[dask]>=0.8.1",
diff --git a/python/statsforecast/core.py b/python/statsforecast/core.py
index 5a06d3d0a..373e7b9d8 100644
--- a/python/statsforecast/core.py
+++ b/python/statsforecast/core.py
@@ -472,41 +472,16 @@ def _get_n_jobs(n_groups, n_jobs):
     return min(n_groups, actual_n_jobs)
 
 
-_param_descriptions = {
-    "freq": """freq (str or int): Frequency of the data. Must be a valid pandas or polars offset alias, or an integer.""",
-    "df": """df (pandas or polars DataFrame): DataFrame with ids, times, targets and exogenous.""",
-    "fallback_model": """fallback_model (Any, optional): Model to be used if a model fails.
-            Only works with the `forecast` and `cross_validation` methods. Defaults to None.""",
-    "id_col": """id_col (str): Column that identifies each serie. Defaults to 'unique_id'.""",
-    "time_col": """time_col (str): Column that identifies each timestep, its values can be timestamps or integers. Defaults to 'ds'.""",
-    "target_col": """target_col (str): Column that contains the target. Defaults to 'y'.""",
-    "h": """h (int): Forecast horizon.""",
-    "X_df": """X_df (pandas or polars DataFrame, optional): DataFrame with ids, times and future exogenous. Defaults to None.""",
-    "level": """level (List[float], optional): Confidence levels between 0 and 100 for prediction intervals. Defaults to None.""",
-    "prediction_intervals": """prediction_intervals (ConformalIntervals, optional): Configuration to calibrate prediction intervals (Conformal Prediction). Defaults to None.""",
-    "fitted": """fitted (bool): Store in-sample predictions. Defaults to False.""",
-    "n_jobs": """n_jobs (int): Number of jobs used in the parallel processing, use -1 for all cores. Defaults to 1.""",
-    "verbose": """verbose (bool): Prints TQDM progress bar when `n_jobs=1`. Defaults to True.""",
-    "models": """models (List[Any]): List of instantiated objects models.StatsForecast.""",
-    "n_windows": """n_windows (int): Number of windows used for cross validation. Defaults to 1.""",
-    "step_size": """step_size (int): Step size between each window. Defaults to 1.""",
-    "test_size": """test_size (int, optional): Length of test size. If passed, set `n_windows=None`. Defaults to None.""",
-    "input_size": """input_size (int, optional): Input size for each window, if not none rolled windows. Defaults to None.""",
-    "refit": """refit (bool or int): Wether or not refit the model for each window.
-            If int, train the models every `refit` windows. Defaults to True.""",
-}
-
-
 class _StatsForecast:
     """The `StatsForecast` class allows you to efficiently fit multiple `StatsForecast` models
-    for large sets of time series. It operates on a DataFrame `df` with at least three columns
-    ids, times and targets.
+    for large sets of time series. It operates on a DataFrame `df` with at least three columns:
+    ids, times, and targets.
 
-    The class has memory-efficient `StatsForecast.forecast` method that avoids storing partial
-    model outputs. While the `StatsForecast.fit` and `StatsForecast.predict` methods with
+    The class has a memory-efficient `StatsForecast.forecast` method that avoids storing partial
+    model outputs, while the `StatsForecast.fit` and `StatsForecast.predict` methods with the
     Scikit-learn interface store the fitted models.
 
-    The `StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
+    The `StatsForecast` class offers parallelization utilities with Dask, Spark, and Ray back-ends.
     See distributed computing example [here](https://github.com/Nixtla/statsforecast/tree/main/experiments/ray).
     """
 
@@ -518,15 +493,29 @@ def __init__(
         fallback_model: Optional[Any] = None,
         verbose: bool = False,
     ):
-        """Train statistical models.
+        """Initialize StatsForecast with models and configuration.
+
+        The StatsForecast class enables efficient fitting and forecasting of multiple
+        statistical models across large sets of time series. It provides memory-efficient
+        methods, parallel processing support, and integrates with distributed computing
+        frameworks like Dask, Spark, and Ray.
 
         Args:
-            {models}
-            {freq}
-            {n_jobs}
-            {df}
-            {fallback_model}
-            {verbose}
+            models (List[Any]): List of instantiated StatsForecast model objects.
+                Each model should implement the forecast interface. Models must have
+                unique names, which can be set using the `alias` parameter.
+            freq (str or int): Frequency of the time series data. Must be a valid
+                pandas or polars offset alias (e.g., 'D' for daily, 'M' for monthly,
+                'H' for hourly), or an integer representing the number of observations
+                per cycle.
+            n_jobs (int, optional): Number of jobs to use for parallel processing.
+                Use -1 to utilize all available CPU cores.
+            fallback_model (Any, optional): Model to use when a primary model fails during
+                fitting or forecasting. Only works with the `forecast` and `cross_validation`
+                methods. If None, exceptions from failing models will be raised.
+            verbose (bool, optional): If True, displays TQDM progress bar during single-job
+                execution (when n_jobs=1).
+
         """
         # TODO @fede: needed for residuals, think about it later
         self.models = models
@@ -536,8 +525,6 @@ def __init__(
         self.fallback_model = fallback_model
         self.verbose = verbose
 
-    __init__.__doc__ = __init__.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def _validate_model_names(self):
         # Some test models don't have alias
         names = [getattr(model, "alias", lambda: None) for model in self.models]
@@ -613,20 +600,29 @@ def fit(
         time_col: str = "ds",
         target_col: str = "y",
     ):
-        """Fit statistical models.
+        """Fit statistical models to time series data.
 
-        Fit `models` to a large set of time series from DataFrame `df`
-        and store fitted models for later inspection.
+        Fits all models specified in the constructor to each time series in the input
+        DataFrame. The fitted models are stored internally and can be used later with
+        the `predict` method. This follows the scikit-learn fit/predict interface.
 
         Args:
-            {df}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            df (DataFrame): Input DataFrame containing time series data.
+                Must have columns for series identifiers, timestamps, and target values.
+                Can optionally include exogenous features.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction. If provided,
+                the models will be prepared to generate prediction intervals.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices. Values can be timestamps (datetime) or integers.
+            target_col (str, optional): Name of the column containing the target variable
+                to forecast.
 
         Returns:
-            StatsForecast: Returns with stored `StatsForecast` fitted `models`.
+            StatsForecast (StatsForecast): Returns self with fitted models stored in the `fitted_` attribute.
+                This allows for method chaining.
         """
         self._prepare_fit(
             df=df, id_col=id_col, time_col=time_col, target_col=target_col
@@ -641,8 +637,6 @@ def fit(
             self.fitted_ = self._fit_parallel()
         return self
 
-    fit.__doc__ = fit.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def _make_future_df(self, h: int):
         start_dates = ufp.offset_times(self.last_dates, freq=self.freq, n=1)
         dates = ufp.time_ranges(start_dates, freq=self.freq, periods=h)
@@ -685,19 +679,28 @@ def predict(
         h: int,
         X_df: Optional[DataFrame] = None,
         level: Optional[List[int]] = None,
-    ):
-        """Predict statistical models.
+    ) -> DataFrame:
+        """Generate forecasts using previously fitted models.
 
-        Use stored fitted `models` to predict large set of time series from DataFrame `df`.
+        Uses the models fitted via the `fit` method to generate predictions for the
+        specified forecast horizon. This follows the scikit-learn fit/predict interface.
 
         Args:
-            {h}
-            {X_df}
-            {level}
+            h (int): Forecast horizon, the number of time steps ahead to predict.
+            X_df (DataFrame, optional): DataFrame containing
+                future exogenous variables. Required if any models use exogenous features.
+                Must have the same structure as training data and include future values for
+                all time series and forecast horizon.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95] for 80% and 95% intervals).
+                If provided with models configured for prediction intervals, the output
+                will include lower and upper bounds.
 
         Returns:
-            pandas or polars DataFrame: DataFrame with `models` columns for point predictions and probabilistic
-                predictions for all fitted `models`.
+            DataFrame with forecasts for each model.
+                Contains the series identifiers, future timestamps, and one column per model
+                with point predictions. If `level` is specified, includes additional columns
+                for prediction interval bounds (e.g., 'model-lo-95', 'model-hi-95').
         """
         if not hasattr(self, "fitted_"):
             raise ValueError("You must call the fit method before calling predict.")
@@ -722,8 +725,6 @@ def predict(
         fcsts_df[cols] = fcsts
         return fcsts_df
 
-    predict.__doc__ = predict.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def fit_predict(
         self,
         h: int,
@@ -735,27 +736,36 @@ def fit_predict(
         time_col: str = "ds",
         target_col: str = "y",
     ) -> DataFrame:
-        """Fit and Predict with statistical models.
-
-        This method avoids memory burden due from object storage.
-        It is analogous to Scikit-Learn `fit_predict` without storing information.
-        It requires the forecast horizon `h` in advance.
+        """Fit models and generate predictions in a single step.
 
-        In contrast to `StatsForecast.forecast` this method stores partial models outputs.
+        Combines the `fit` and `predict` methods in a single operation. The fitted models
+        are stored internally in the `fitted_` attribute for later use, making this method
+        suitable when you need both training and immediate predictions.
 
         Args:
-            {h}
-            {df}
-            {X_df}
-            {level}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            h (int): Forecast horizon, the number of time steps ahead to predict.
+            df (DataFrame): Input DataFrame containing time series
+                data. Must have columns for series identifiers, timestamps, and target values.
+                Can optionally include exogenous features.
+            X_df (DataFrame, optional): DataFrame containing
+                future exogenous variables. Required if any models use exogenous features.
+                Must include future values for all time series and forecast horizon.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95]). Required if `prediction_intervals`
+                is specified.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices. Values can be timestamps (datetime) or integers.
+            target_col (str, optional): Name of the column containing the target variable
+                to forecast.
 
         Returns:
-            pandas or polars DataFrame: DataFrame with `models` columns for point predictions and probabilistic
-                predictions for all fitted `models`.
+            DataFrame with forecasts containing series
+                identifiers, future timestamps, and predictions from each model. Includes
+                prediction intervals if `level` is specified.
         """
         self._prepare_fit(
             df=df, id_col=id_col, time_col=time_col, target_col=target_col
@@ -780,8 +790,6 @@ def fit_predict(
         fcsts_df[cols] = fcsts
         return fcsts_df
 
-    fit_predict.__doc__ = fit_predict.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def forecast(
         self,
         h: int,
@@ -794,26 +802,38 @@ def forecast(
         time_col: str = "ds",
         target_col: str = "y",
     ) -> DataFrame:
-        """Memory Efficient predictions.
+        """Generate forecasts with memory-efficient model training.
 
-        This method avoids memory burden due from object storage.
-        It is analogous to Scikit-Learn `fit_predict` without storing information.
-        It requires the forecast horizon `h` in advance.
+        This is the primary forecasting method that trains models and generates predictions
+        without storing fitted model objects. It is more memory-efficient than `fit_predict`
+        when you don't need to inspect or reuse the fitted models. Models are trained and
+        used for forecasting within each time series, then discarded.
 
         Args:
-            {h}
-            {df}
-            {X_df}
-            {level}
-            {fitted}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            h (int): Forecast horizon, the number of time steps ahead to predict.
+            df (DataFrame): Input DataFrame containing time series
+                data. Must have columns for series identifiers, timestamps, and target values.
+                Can optionally include exogenous features for training.
+            X_df (DataFrame, optional): DataFrame containing
+                future exogenous variables. Required if any models use exogenous features.
+                Must include future values for all time series and forecast horizon.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95]).
+            fitted (bool, optional): If True, stores in-sample (fitted) predictions which
+                can be retrieved using `forecast_fitted_values()`.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices. Values can be timestamps (datetime) or integers.
+            target_col (str, optional): Name of the column containing the target variable
+                to forecast.
 
         Returns:
-            pandas or polars DataFrame: DataFrame with `models` columns for point predictions and probabilistic
-                predictions for all fitted `models`.
+            DataFrame with forecasts containing series
+                identifiers, future timestamps, and predictions from each model. Includes
+                prediction intervals if `level` is specified.
         """
         self.__dict__.pop("fcst_fitted_values_", None)
         self._prepare_fit(
@@ -851,19 +871,18 @@ def forecast(
         self.forecast_times_ = res_fcsts["times"]
         return fcsts_df
 
-    forecast.__doc__ = forecast.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def forecast_fitted_values(self):
-        """Access insample predictions.
+        """Retrieve in-sample predictions from the forecast method.
 
-        After executing `StatsForecast.forecast`, you can access the insample
-        prediction values for each model. To get them, you need to pass `fitted=True`
-        to the `StatsForecast.forecast` method and then use the
-        `StatsForecast.forecast_fitted_values` method.
+        Returns the fitted (in-sample) predictions generated during the last call to
+        `forecast()`. These are the model's predictions on the training data, useful
+        for assessing model fit quality and identifying patterns in residuals.
 
         Returns:
-            pandas.DataFrame | polars.DataFrame: DataFrame with insample `models` columns for point predictions and probabilistic
-                predictions for all fitted `models`.
+            pandas.DataFrame or polars.DataFrame: DataFrame containing in-sample predictions
+                with columns for series identifiers, timestamps, target values, and fitted
+                predictions from each model. Includes prediction intervals if they were
+                requested during forecasting.
         """
         if not hasattr(self, "fcst_fitted_values_"):
             raise Exception("Please run `forecast` method using `fitted=True`")
@@ -895,33 +914,47 @@ def cross_validation(
         time_col: str = "ds",
         target_col: str = "y",
     ) -> DataFrame:
-        """Temporal Cross-Validation.
-
-        Efficiently fits a list of `StatsForecast`
-        models through multiple training windows, in either chained or rolled manner.
+        """Perform temporal cross-validation for model evaluation.
 
-        `StatsForecast.models`' speed allows to overcome this evaluation technique
-        high computational costs. Temporal cross-validation provides better model's
-        generalization measurements by increasing the test's length and diversity.
+        Evaluates model performance across multiple time windows using a time series
+        cross-validation approach. This method trains models on expanding or rolling
+        windows and generates forecasts for each validation period, providing robust
+        assessment of forecast accuracy and generalization.
 
         Args:
-            {h}
-            {df}
-            {n_windows}
-            {step_size}
-            {test_size}
-            {input_size}
-            {level}
-            {fitted}
-            {refit}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            h (int): Forecast horizon for each validation window.
+            df (pandas.DataFrame or polars.DataFrame): Input DataFrame containing time series
+                data with columns for series identifiers, timestamps, and target values.
+            n_windows (int, optional): Number of validation windows to create. Cannot be
+                specified together with `test_size`.
+            step_size (int, optional): Number of time steps between consecutive validation
+                windows. Smaller values create overlapping windows.
+            test_size (int, optional): Total size of the test period. If provided, `n_windows`
+                is computed automatically. Overrides `n_windows` if specified.
+            input_size (int, optional): Maximum number of training observations to use for
+                each window. If None, uses expanding windows with all available history.
+                If specified, uses rolling windows of fixed size.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95]).
+            fitted (bool, optional): If True, stores in-sample predictions for each window,
+                accessible via `cross_validation_fitted_values()`.
+            refit (bool or int, optional): Controls model refitting frequency. If True,
+                refits models for every window. If False, fits once and uses the forward
+                method. If an integer n, refits every n windows. Models must implement the
+                `forward` method when refit is not True.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction. Requires
+                `level` to be specified.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices.
+            target_col (str, optional): Name of the column containing the target variable.
 
         Returns:
-            pandas or polars DataFrame: DataFrame with insample `models` columns for point predictions and probabilistic
-                predictions for all fitted `models`.
+            DataFrame with cross-validation results
+                including series identifiers, cutoff dates (last training observation),
+                forecast dates, actual values, and predictions from each model for all windows.
         """
         if n_windows is None and test_size is None:
             raise ValueError("you must define `n_windows` or `test_size`")
@@ -1006,19 +1039,18 @@ def cross_validation(
         )
         return fcsts_df
 
-    cross_validation.__doc__ = cross_validation.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
     def cross_validation_fitted_values(self) -> DataFrame:
-        """Access insample cross validated predictions.
+        """Retrieve in-sample predictions from cross-validation.
 
-        After executing `StatsForecast.cross_validation`, you can access the insample
-        prediction values for each model and window. To get them, you need to pass `fitted=True`
-        to the `StatsForecast.cross_validation` method and then use the
-        `StatsForecast.cross_validation_fitted_values` method.
+        Returns the fitted (in-sample) predictions for each cross-validation window.
+        These are the model's predictions on the training data for each window,
+        useful for analyzing how model fit changes across different training periods.
 
         Returns:
-            pandas or polars DataFrame: DataFrame with insample `models` columns for point predictions
-                and probabilistic predictions for all fitted `models`.
+            pandas.DataFrame or polars.DataFrame: DataFrame containing in-sample predictions
+                for each cross-validation window. Includes columns for series identifiers,
+                timestamps, cutoff dates (last training observation of each window), actual
+                values, and fitted predictions from each model.
         """
         if not hasattr(self, "cv_fitted_values_"):
             raise Exception("Please run `cross_validation` method using `fitted=True`")
@@ -1224,25 +1256,44 @@ def plot(
         target_col: str = "y",
         resampler_kwargs: Optional[Dict] = None,
     ):
-        """Plot forecasts and insample values.
+        """Visualize time series data with forecasts and prediction intervals.
+
+        Creates plots showing historical data, forecasts, and optional prediction intervals
+        for time series. Supports multiple plotting engines and interactive visualization.
 
         Args:
-            {df}
-            forecasts_df (pandas or polars DataFrame, optional): DataFrame ids, times and models. Defaults to None.
-            unique_ids (list of str, optional): ids to plot. If None, they're selected randomly. Defaults to None.
-            plot_random (bool): Select time series to plot randomly. Defaults to True.
-            models (List[str], optional): List of models to plot. Defaults to None.
-            level (List[float], optional): List of prediction intervals to plot if paseed. Defaults to None.
-            max_insample_length (int, optional): Max number of train/insample observations to be plotted. Defaults to None.
-            plot_anomalies (bool): Plot anomalies for each prediction interval. Defaults to False.
-            engine (str): Library used to plot. 'plotly', 'plotly-resampler' or 'matplotlib'. Defaults to 'matplotlib'.
-            {id_col}
-            {time_col}
-            {target_col}
-            resampler_kwargs (dict): Kwargs to be passed to plotly-resampler constructor.
-                For further custumization ("show_dash") call the method,
-                store the plotting object and add the extra arguments to
-                its `show_dash` method.
+            df (DataFrame): Input DataFrame containing historical
+                time series data with columns for series identifiers, timestamps, and target values.
+            forecasts_df (DataFrame, optional): DataFrame with forecast
+                results from `forecast()` or `cross_validation()`. Should contain series identifiers,
+                timestamps, and model predictions.
+            unique_ids (List[str] or numpy.ndarray, optional): Specific series identifiers to plot.
+                If None and `plot_random` is True, series are selected randomly.
+            plot_random (bool, optional): Whether to randomly select series to plot when
+                `unique_ids` is not specified.
+            models (List[str], optional): Names of specific models to include in the plot.
+                If None, plots all models present in `forecasts_df`.
+            level (List[float], optional): Confidence levels to plot as shaded regions around
+                forecasts (e.g., [80, 95]). Only applicable if prediction intervals are present
+                in `forecasts_df`.
+            max_insample_length (int, optional): Maximum number of historical observations to
+                display. Useful for focusing on recent history when series are long.
+            plot_anomalies (bool, optional): If True, highlights observations that fall outside
+                prediction intervals as anomalies.
+            engine (str, optional): Plotting library to use. Options are 'matplotlib' (static plots),
+                'plotly' (interactive plots), or 'plotly-resampler' (interactive with downsampling
+                for large datasets).
+            id_col (str, optional): Name of the column containing series identifiers.
+            time_col (str, optional): Name of the column containing timestamps.
+            target_col (str, optional): Name of the column containing the target variable.
+            resampler_kwargs (Dict, optional): Additional keyword arguments passed to the
+                plotly-resampler constructor when `engine='plotly-resampler'`. For further
+                customization (e.g., 'show_dash'), call this method, store the returned object,
+                and add arguments to its `show_dash` method.
+
+        Returns:
+            Plotting object from the selected engine (matplotlib Figure, plotly Figure, or
+            FigureResampler object), which can be further customized or displayed.
         """
         from utilsforecast.plotting import plot_series
 
@@ -1272,15 +1323,22 @@ def save(
         max_size: Optional[str] = None,
         trim: bool = False,
     ):
-        """Function that will save StatsForecast class with certain settings to make it
-        reproducible.
+        """Save the StatsForecast instance to disk using pickle.
+
+        Serializes the StatsForecast object including all fitted models and configuration
+        to a file for later use. The saved object can be loaded with the `load()` method
+        to restore the exact state for making predictions.
 
         Args:
-            path (str or pathlib.Path, optional): Path of the file to be saved. If `None` will create one in the current
-                directory using the current UTC timestamp. Defaults to None.
-            max_size (str, optional): StatsForecast object should not exceed this size.
-                Available byte naming: ['B', 'KB', 'MB', 'GB']. Defaults to None.
-            trim (bool): Delete any attributes not needed for inference. Defaults to False.
+            path (str or pathlib.Path, optional): File path where the object will be saved.
+                If None, creates a filename in the current directory using the format
+                'StatsForecast_YYYY-MM-DD_HH-MM-SS.pkl' with the current UTC timestamp.
+            max_size (str, optional): Maximum allowed size for the serialized object.
+                Should be specified as a number followed by a unit: 'B', 'KB', 'MB', or 'GB'
+                (e.g., '100MB', '1.5GB'). If the object exceeds this size, an OSError is raised.
+            trim (bool, optional): If True, removes fitted values from `forecast()` and
+                `cross_validation()` before saving to reduce file size. These values are
+                not needed for generating new predictions.
         """
         # Will be used to find the size of the fitted models
         # Never expecting anything higher than GB (even that's a lot')
@@ -1350,14 +1408,19 @@ def _get_cap_size(self, max_size, bytes_hmap):
 
     @staticmethod
     def load(path: Union[Path, str]):
-        """
-        Automatically loads the model into ready StatsForecast.
+        """Load a previously saved StatsForecast instance from disk.
+
+        Deserializes a StatsForecast object that was saved using the `save()` method,
+        restoring all fitted models and configuration. The loaded object is ready to
+        generate predictions immediately.
 
         Args:
-            path (str or pathlib.Path): Path to saved StatsForecast file.
+            path (str or pathlib.Path): File path to the saved StatsForecast pickle file.
+                Must point to a file created by the `save()` method.
 
         Returns:
-            StatsForecast: Previously saved StatsForecast
+            StatsForecast: The deserialized StatsForecast instance with all fitted models
+                and configuration restored, ready for prediction.
         """
         if not Path(path).exists():
             raise ValueError("Specified path does not exist, check again and retry.")
@@ -1368,9 +1431,6 @@ def __repr__(self):
         return f"StatsForecast(models=[{','.join(map(repr, self.models))}])"
 
 
-_StatsForecast.plot.__doc__ = _StatsForecast.plot.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
-
-
 class ParallelBackend:
     def forecast(
         self,
@@ -1464,7 +1524,40 @@ def forecast(
         id_col: str = "unique_id",
         time_col: str = "ds",
         target_col: str = "y",
-    ):
+    ) -> DataFrame:
+        """Generate forecasts with memory-efficient model training.
+
+        This is the primary forecasting method that trains models and generates predictions
+        without storing fitted model objects. It is more memory-efficient than `fit_predict`
+        when you don't need to inspect or reuse the fitted models. Models are trained and
+        used for forecasting within each time series, then discarded.
+
+        Args:
+            h (int): Forecast horizon, the number of time steps ahead to predict.
+            df (DataFrame): Input DataFrame containing time series
+                data. Must have columns for series identifiers, timestamps, and target values.
+                Can optionally include exogenous features for training.
+            X_df (DataFrame, optional): DataFrame containing
+                future exogenous variables. Required if any models use exogenous features.
+                Must include future values for all time series and forecast horizon.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95]).
+            fitted (bool, optional): If True, stores in-sample (fitted) predictions which
+                can be retrieved using `forecast_fitted_values()`.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices. Values can be timestamps (datetime) or integers.
+            target_col (str, optional): Name of the column containing the target variable
+                to forecast.
+
+        Returns:
+            DataFrame with forecasts containing series
+                identifiers, future timestamps, and predictions from each model. Includes
+                prediction intervals if `level` is specified.
+        """
         if prediction_intervals is not None and level is None:
             raise ValueError(
                 "You must specify `level` when using `prediction_intervals`"
@@ -1521,7 +1614,49 @@ def cross_validation(
         id_col: str = "unique_id",
         time_col: str = "ds",
         target_col: str = "y",
-    ):
+    ) -> DataFrame:
+        """Perform temporal cross-validation for model evaluation.
+
+        Evaluates model performance across multiple time windows using a time series
+        cross-validation approach. This method trains models on expanding or rolling
+        windows and generates forecasts for each validation period, providing robust
+        assessment of forecast accuracy and generalization.
+
+        Args:
+            h (int): Forecast horizon for each validation window.
+            df (DataFrame): Input DataFrame containing time series
+                data with columns for series identifiers, timestamps, and target values.
+            n_windows (int, optional): Number of validation windows to create. Cannot be
+                specified together with `test_size`.
+            step_size (int, optional): Number of time steps between consecutive validation
+                windows. Smaller values create overlapping windows.
+            test_size (int, optional): Total size of the test period. If provided, `n_windows`
+                is computed automatically. Overrides `n_windows` if specified.
+            input_size (int, optional): Maximum number of training observations to use for
+                each window. If None, uses expanding windows with all available history.
+                If specified, uses rolling windows of fixed size.
+            level (List[float], optional): Confidence levels between 0 and 100 for
+                prediction intervals (e.g., [80, 95]).
+            fitted (bool, optional): If True, stores in-sample predictions for each window,
+                accessible via `cross_validation_fitted_values()`.
+            refit (bool or int, optional): Controls model refitting frequency. If True,
+                refits models for every window. If False, fits once and uses the forward
+                method. If an integer n, refits every n windows. Models must implement the
+                `forward` method when refit is not True.
+            prediction_intervals (ConformalIntervals, optional): Configuration for
+                calibrating prediction intervals using Conformal Prediction. Requires
+                `level` to be specified.
+            id_col (str, optional): Name of the column containing unique identifiers for
+                each time series.
+            time_col (str, optional): Name of the column containing timestamps or time
+                indices.
+            target_col (str, optional): Name of the column containing the target variable.
+
+        Returns:
+            DataFrame with cross-validation results
+                including series identifiers, cutoff dates (last training observation),
+                forecast dates, actual values, and predictions from each model for all windows.
+        """
         if self._is_native(df=df):
             return super().cross_validation(
                 h=h,
diff --git a/python/statsforecast/distributed/fugue.py b/python/statsforecast/distributed/fugue.py
index a7cec0e09..a1b7bdf04 100644
--- a/python/statsforecast/distributed/fugue.py
+++ b/python/statsforecast/distributed/fugue.py
@@ -15,7 +15,6 @@
 
 from statsforecast.core import (
     ParallelBackend,
-    _param_descriptions,
     _StatsForecast,
     make_backend,
 )
@@ -301,18 +300,18 @@ def forecast(
         `core.StatsForecast`'s forecast to efficiently fit a list of StatsForecast models.
 
         Args:
-            {df}
-            {freq}
-            {models}
-            {fallback_model}
-            {X_df}
-            {h}
-            {level}
-            {fitted}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            df (DataFrame): Input DataFrame containing time series data. Must have columns for series identifiers, timestamps, and target values. Can optionally include exogenous features.
+            freq (str or int): Frequency of the time series data. Must be a valid pandas or polars offset alias (e.g., 'D' for daily, 'M' for monthly, 'H' for hourly), or an integer representing the number of observations per cycle.
+            models (List[Any]): List of instantiated StatsForecast model objects. Each model should implement the forecast interface. Models must have unique names, which can be set using the `alias` parameter.
+            fallback_model (Any, optional): Model to use when a primary model fails during fitting or forecasting. Only works with the `forecast` and `cross_validation` methods. If None, exceptions from failing models will be raised.
+            X_df (DataFrame, optional): DataFrame containing future exogenous variables. Required if any models use exogenous features. Must include future values for all time series and forecast horizon.
+            h (int): Forecast horizon, the number of time steps ahead to predict.
+            level (List[float], optional): Confidence levels between 0 and 100 for prediction intervals (e.g., [80, 95] for 80% and 95% intervals).
+            fitted (bool, optional): If True, stores in-sample (fitted) predictions which can be retrieved using `forecast_fitted_values()`.
+            prediction_intervals (ConformalIntervals, optional): Configuration for calibrating prediction intervals using Conformal Prediction.
+            id_col (str, optional): Name of the column containing unique identifiers for each time series. Defaults to 'unique_id'.
+            time_col (str, optional): Name of the column containing timestamps or time indices. Values can be timestamps (datetime) or integers. Defaults to 'ds'.
+            target_col (str, optional): Name of the column containing the target variable to forecast. Defaults to 'y'.
 
         Returns:
             pandas.DataFrame: DataFrame with `models` columns for point predictions and probabilistic predictions for all fitted `models`
@@ -376,7 +375,6 @@ def forecast(
             )
         return res
 
-    forecast.__doc__ = forecast.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
 
     def forecast_fitted_values(self):
         """Retrieve in-sample predictions"""
@@ -464,22 +462,22 @@ def cross_validation(
         and diversity.
 
         Args:
-            {df}
-            {freq}
-            {models}
-            {fallback_model}
-            {h}
-            {n_windows}
-            {step_size}
-            {test_size}
-            {input_size}
-            {level}
-            {refit}
-            {fitted}
-            {prediction_intervals}
-            {id_col}
-            {time_col}
-            {target_col}
+            df (DataFrame): Input DataFrame containing time series data with columns for series identifiers, timestamps, and target values.
+            freq (str or int): Frequency of the time series data. Must be a valid pandas or polars offset alias (e.g., 'D' for daily, 'M' for monthly, 'H' for hourly), or an integer representing the number of observations per cycle.
+            models (List[Any]): List of instantiated StatsForecast model objects. Each model should implement the forecast interface. Models must have unique names, which can be set using the `alias` parameter.
+            fallback_model (Any, optional): Model to use when a primary model fails during fitting or forecasting. Only works with the `forecast` and `cross_validation` methods. If None, exceptions from failing models will be raised.
+            h (int): Forecast horizon for each validation window.
+            n_windows (int, optional): Number of validation windows to create. Cannot be specified together with `test_size`.
+            step_size (int, optional): Number of time steps between consecutive validation windows. Smaller values create overlapping windows.
+            test_size (int, optional): Total size of the test period. If provided, `n_windows` is computed automatically. Overrides `n_windows` if specified.
+            input_size (int, optional): Maximum number of training observations to use for each window. If None, uses expanding windows with all available history. If specified, uses rolling windows of fixed size.
+            level (List[float], optional): Confidence levels between 0 and 100 for prediction intervals (e.g., [80, 95]).
+            refit (bool or int, optional): Controls model refitting frequency. If True, refits models for every window. If False, fits once and uses the forward method. If an integer n, refits every n windows. Models must implement the `forward` method when refit is not True.
+            fitted (bool, optional): If True, stores in-sample predictions for each window, accessible via `cross_validation_fitted_values()`.
+            prediction_intervals (ConformalIntervals, optional): Configuration for calibrating prediction intervals using Conformal Prediction. Requires `level` to be specified.
+            id_col (str, optional): Name of the column containing unique identifiers for each time series. Defaults to 'unique_id'.
+            time_col (str, optional): Name of the column containing timestamps or time indices. Defaults to 'ds'.
+            target_col (str, optional): Name of the column containing the target variable. Defaults to 'y'.
 
         Returns:
             pandas.DataFrame: DataFrame, with `models` columns for point predictions and probabilistic predictions for all fitted `models`.
@@ -524,7 +522,6 @@ def cross_validation(
             **self._transform_kwargs,
         )
 
-    cross_validation.__doc__ = cross_validation.__doc__.format(**_param_descriptions)  # type: ignore[union-attr]
 
 
 @make_backend.candidate(lambda obj, *args, **kwargs: isinstance(obj, ExecutionEngine))
diff --git a/python/statsforecast/models.py b/python/statsforecast/models.py
index 17dbc2f46..a91f3e2e8 100644
--- a/python/statsforecast/models.py
+++ b/python/statsforecast/models.py
@@ -2583,7 +2583,10 @@ def __init__(
 
         Also known as mean method. Uses a simple average of all past observations.
         Assuming there are $t$ observations, the one-step forecast is given by:
-        $$\hat{y}_{t+1} = \frac{1}{t} \sum_{j=1}^t y_j$$
+
+        ``` math
+        \hat{y}_{t+1} = \frac{1}{t} \sum_{j=1}^t y_j
+        ```
 
         References:
             - [Rob J. Hyndman and George Athanasopoulos (2018). "Forecasting principles and practice, Simple Methods"](https://otexts.com/fpp3/simple-methods.html).
@@ -2923,7 +2926,9 @@ def __init__(
         A variation of the naive method allows the forecasts to change over time.
         The amout of change, called drift, is the average change seen in the historical data.
 
-        $$\hat{y}_{t+1} = y_t+\frac{1}{t-1}\sum_{j=1}^t (y_j-y_{j-1}) = y_t+ \frac{y_t-y_1}{t-1}$$
+        ``` math
+        \hat{y}_{t+1} = y_t+\frac{1}{t-1}\sum_{j=1}^t (y_j-y_{j-1}) = y_t+ \frac{y_t-y_1}{t-1}
+        ```
 
         From the previous equation, we can see that this is equivalent to extrapolating a line between
         the first and the last observation.
@@ -3803,12 +3808,15 @@ def __init__(
         alias: str = "CrostonClassic",
         prediction_intervals: Optional[ConformalIntervals] = None,
     ):
-        """CrostonClassic model.
+        r"""CrostonClassic model.
 
         A method to forecast time series that exhibit intermittent demand.
         It decomposes the original time series into a non-zero demand size $z_t$ and
         inter-demand intervals $p_t$. Then the forecast is given by:
-        $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
+
+        ``` math
+        \hat{y}_t = \frac{\hat{z}_t}{\hat{p}_t}
+        ```
 
         where $\hat{z}_t$ and $\hat{p}_t$ are forecasted using SES. The smoothing parameter
         of both components is set equal to 0.1
@@ -3981,12 +3989,15 @@ def __init__(
         alias: str = "CrostonOptimized",
         prediction_intervals: Optional[ConformalIntervals] = None,
     ):
-        """CrostonOptimized model.
+        r"""CrostonOptimized model.
 
         A method to forecast time series that exhibit intermittent demand.
         It decomposes the original time series into a non-zero demand size $z_t$ and
         inter-demand intervals $p_t$. Then the forecast is given by:
-        $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
+
+        ``` math
+        \hat{y}_t = \frac{\hat{z}_t}{\hat{p}_t}
+        ```
 
         A variation of the classic Croston's method where the smooting paramater is optimally
         selected from the range $[0.1,0.3]$. Both the non-zero demand $z_t$ and the inter-demand
@@ -4129,16 +4140,22 @@ def __init__(
         alias: str = "CrostonSBA",
         prediction_intervals: Optional[ConformalIntervals] = None,
     ):
-        """CrostonSBA model.
+        r"""CrostonSBA model.
 
         A method to forecast time series that exhibit intermittent demand.
         It decomposes the original time series into a non-zero demand size $z_t$ and
         inter-demand intervals $p_t$. Then the forecast is given by:
-        $$\hat{y}_t = \\frac{\hat{z}_t}{\hat{p}_t}$$
+
+        ``` math
+        \hat{y}_t = \frac{\hat{z}_t}{\hat{p}_t}
+        ```
 
         A variation of the classic Croston's method that uses a debiasing factor, so that the
         forecast is given by:
-        $$\hat{y}_t = 0.95  \\frac{\hat{z}_t}{\hat{p}_t}$$
+
+        ``` math
+        \hat{y}_t = 0.95  \frac{\hat{z}_t}{\hat{p}_t}
+        ```
 
         References:
             - [Croston, J. D. (1972). Forecasting and stock control for intermittent demands. Journal of the Operational Research Society, 23(3), 289-303.](https://link.springer.com/article/10.1057/jors.1972.50).
@@ -4465,21 +4482,23 @@ def __init__(
         alias: str = "TSB",
         prediction_intervals: Optional[ConformalIntervals] = None,
     ):
-        """TSB model.
+        r"""TSB model.
 
         Teunter-Syntetos-Babai: A modification of Croston's method that replaces the inter-demand
         intervals with the demand probability $d_t$, which is defined as follows.
 
-        $$
-        d_t = \\begin{cases}
-            1  & \\text{if demand occurs at time t} \\\\
-            0  & \\text{otherwise.}
+        ``` math
+        d_t = \begin{cases}
+            1  & \text{if demand occurs at time t} \\
+            0  & \text{otherwise.}
         \end{cases}
-        $$
+        ```
 
         Hence, the forecast is given by
 
-        $$\hat{y}_t= \hat{d}_t\hat{z_t}$$
+        ``` math
+        \hat{y}_t= \hat{d}_t\hat{z_t}
+        ```
 
         Both $d_t$ and $z_t$ are forecasted using SES. The smooting paramaters of each may differ,
         like in the optimized Croston's method.
@@ -5270,11 +5289,16 @@ class GARCH(_TS):
     A method for modeling time series that exhibit non-constant volatility over time.
     The GARCH model assumes that at time $t$, $y_t$ is given by:
 
-    $$y_t = v_t \sigma_t$$
+    ``` math
+    y_t = v_t \sigma_t
+    ```
+    
 
     with
 
-    $$\sigma_t^2 = w + \sum_{i=1}^p a_i y_{t-i}^2 + \sum_{j=1}^q b_j \sigma_{t-j}^2$$.
+    ``` math
+    \sigma_t^2 = w + \sum_{i=1}^p a_i y_{t-i}^2 + \sum_{j=1}^q b_j \sigma_{t-j}^2.
+    ```
 
     Here $v_t$ is a sequence of iid random variables with zero mean and unit variance.
     The coefficients $w$, $a_i$, $i=1,...,p$, and $b_j$, $j=1,...,q$ must satisfy the following conditions:
@@ -5436,15 +5460,15 @@ class ARCH(GARCH):
     A particular case of the GARCH(p,q) model where $q=0$.
     It assumes that at time $t$, $y_t$ is given by:
 
-    $$
+    ``` math
     y_t = \epsilon_t \sigma_t
-    $$
+    ```
 
     with
 
-    $$
+    ``` math
     \sigma_t^2 = w0 + \sum_{i=1}^p a_i y_{t-i}^2
-    $$.
+    ```
 
     Here $\epsilon_t$ is a sequence of iid random variables with zero mean and unit variance.
     The coefficients $w$ and $a_i$, $i=1,...,p$ must be nonnegative and $\sum_{k=1}^p a_k < 1$.