From 767499b99c793ea8172d2357a7130bba1f68474c Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 26 Oct 2023 13:48:38 +0200
Subject: [PATCH] ENH Mention scaling behavior of binning and splines (#739)

Co-authored-by: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 ...dels_feature_engineering_classification.py | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/python_scripts/linear_models_feature_engineering_classification.py b/python_scripts/linear_models_feature_engineering_classification.py
index 9fd203f34..12a2997da 100644
--- a/python_scripts/linear_models_feature_engineering_classification.py
+++ b/python_scripts/linear_models_feature_engineering_classification.py
@@ -235,7 +235,10 @@ def plot_decision_boundary(model, title=None):
 # %%
 from sklearn.preprocessing import KBinsDiscretizer
 
-classifier = make_pipeline(KBinsDiscretizer(n_bins=5), LogisticRegression())
+classifier = make_pipeline(
+    KBinsDiscretizer(n_bins=5, encode="onehot"),  # already the default params
+    LogisticRegression(),
+)
 classifier
 
 # %%
@@ -279,15 +282,20 @@ def plot_decision_boundary(model, title=None):
 # We can see that the decision boundary is now smooth, and while it favors
 # axis-aligned decision rules when extrapolating in low density regions, it can
 # adopt a more curvy decision boundary in the high density regions.
-#
-# Note however, that the number of knots is a hyperparameter that needs to be
-# tuned. If we use too few knots, the model would underfit the data, as shown on
-# the moons dataset. If we use too many knots, the model would overfit the data.
-#
 # However, as for the binning transformation, the model still fails to separate
 # the data for the XOR dataset, irrespective of the number of knots, for the
 # same reasons: **the spline transformation is a feature-wise transformation**
 # and thus **cannot capture interactions** between features.
+#
+# Take into account that the number of knots is a hyperparameter that needs to be
+# tuned. If we use too few knots, the model would underfit the data, as shown on
+# the moons dataset. If we use too many knots, the model would overfit the data.
+#
+# ```{note}
+# Notice that `KBinsDiscretizer(encode="onehot")` and `SplineTransformer` do not
+# require additional scaling. Indeed, they can replace the scaling step for
+# numerical features: they both create features with values in the [0, 1] range.
+# ```
 
 # %% [markdown]
 #