From 5db388f19ac2fc759d481725166cedffc7a8f53a Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Mon, 29 Apr 2024 12:00:02 +0200 Subject: [PATCH] Add comment on scaling --- python_scripts/cross_validation_grouping.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python_scripts/cross_validation_grouping.py b/python_scripts/cross_validation_grouping.py index 3a95a825b..57cb4f302 100644 --- a/python_scripts/cross_validation_grouping.py +++ b/python_scripts/cross_validation_grouping.py @@ -19,6 +19,15 @@ # %% [markdown] # We create a model consisting of a logistic regression classifier with a # preprocessor to scale the data. +# +# ```{note} +# Here we use a `MinMaxScaler` as we know that each pixel's gray-scale is +# strictly bounded between 0 (white) and 16 (black). This makes `MinMaxScaler` +# more suited in this case than `StandardScaler`, as some pixels consistently +# have low variance (pixels at the borders might almost always be zero if most +# digits are centered in the image). Then, using `StandardScaler` can result in +# a very high scaled value due to division by a small number. +# ``` # %% from sklearn.preprocessing import MinMaxScaler