scikit-learn-contrib · thibaultcordier · Oct 22, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/examples/classification/4-tutorials/plot_ccp_class_tutorial.py b/examples/classification/4-tutorials/plot_ccp_class_tutorial.py
@@ -3,12 +3,13 @@
 Tutorial: Conditional CP for classification
 ============================================
 
-We will use a synthetic toy dataset for the tutorial of the CCP method, and
-its comparison with the other methods available in MAPIE. The CCP method
+The tutorial will explain how to use the CCP method for classification
+and will wompare it with the other methods available in MAPIE. The CCP method
 implements the method described in the Gibbs et al. (2023) paper [1].
 
 In this tutorial, the classifier will be
 :class:`~sklearn.linear_model.LogisticRegression`.
+We will use a synthetic toy dataset.
 
 We will compare the CCP method (using
 :class:`~mapie.futur.split.SplitCPRegressor`,
@@ -21,6 +22,13 @@
 predicted softmax, to keep all the classes above the threshold
 (``alpha`` is ``1 - target coverage``).
 
+Warning:
+In this tutorial, we use ``unsafe_approximation=True`` to have a faster
+computation (because Read The Docs examples require fast computation).
+This mode use an approximation, which make the inference (``predict``) faster,
+but induce a small miscoverage. It is recommanded not to use it, or be
+very careful and empirically check the coverage and a test set.
+
 [1] Isaac Gibbs, John J. Cherian, and Emmanuel J. Candès,
 "Conformal Prediction With Conditional Guarantees",
 `arXiv <https://arxiv.org/abs/2305.12616>`_, 2023.
@@ -45,6 +53,7 @@
 np.random.seed(random_state)
 
 ALPHA = 0.2
+UNSAFE_APPROXIMATION = True
 N_CLASSES = 5
 
 ##############################################################################
@@ -88,7 +97,7 @@ def generate_data(seed=1, n_train=2000, n_calib=2000, n_test=2000, ):
 # Let's visualize the data and its distribution
 
 
-x_train, y_train, *_ = generate_data(seed=None, n_train=2000)
+x_train, y_train, *_ = generate_data(seed=None, n_train=1000)
 
 for c in range(N_CLASSES):
     plt.scatter(x_train[y_train == c, 0], x_train[y_train == c, 1],
@@ -103,8 +112,9 @@ def generate_data(seed=1, n_train=2000, n_calib=2000, n_test=2000, ):
 
 
 def run_exp(
-    mapies, names, alpha, n_train=2000, n_calib=2000,
-    n_test=2000, grid_step=100, plot=True, seed=1, max_display=2000
+    mapies, names, alpha,
+    n_train=1000, n_calib=1000, n_test=1000,
+    grid_step=100, plot=True, seed=1, max_display=2000
 ):
     (
         x_train, y_train, x_calib, y_calib, x_test, y_test
@@ -148,7 +158,9 @@ def run_exp(
             mapie.fit(
                 np.vstack([x_train, x_calib]), np.hstack([y_train, y_calib])
             )
-            _, y_ps_test = mapie.predict(x_test)
+            _, y_ps_test = mapie.predict(
+                x_test, unsafe_approximation=UNSAFE_APPROXIMATION
+            )
             if plot:
                 y_pred_mesh, y_ps_mesh = mapie.predict(X_test_mesh)
         else:

diff --git a/examples/regression/3-scientific-articles/plot_gibbs2023_simulations.py b/examples/regression/3-scientific-articles/plot_gibbs2023_simulations.py
@@ -20,6 +20,12 @@
 MAPIE gives the same results as [1], and that the bounds of the PIs are
 obtained.
 
+It is important to note that we are checking here if the adaptativity property
+of the prediction intervals are well obtained. However, the paper do this
+computations with the full conformal prediction approach, whereas we
+implemented the faster but more conservatice split method. Thus, the results
+may vary a little.
+
 [1] Isaac Gibbs, John J. Cherian, Emmanuel J. Candès (2023).
 Conformal Prediction With Conditional Guarantees
 
@@ -352,22 +358,10 @@ def plot_results(X_test, y_test, n_trials=10,
 # 5. Reproduce experiment and results
 # -----------------------------------------------------------------------------
 
-plot_results(X_test, y_test, 50, experiment="Groups")
+plot_results(X_test, y_test, 20, experiment="Groups")
 
-plot_results(X_test, y_test, 50, experiment="Shifts")
+plot_results(X_test, y_test, 20, experiment="Shifts")
 
 
 ##############################################################################
 # We succesfully reproduced the experiement of the Gibbs et al. paper [1].
-
-##############################################################################
-# 6. Variant of the experiments: let's compare what is comparable
-# -----------------------------------------------------------------------------
-#
-# In the paper, the proposed method (used with not symetrical PI) is compared
-# to the split method with symetrical PI. Let's compare it to the split CP with
-# unsymetrical PI, to have a fair comparison.
-
-plot_results(X_test, y_test, 50, experiment="Groups", split_sym=False)
-
-plot_results(X_test, y_test, 50, experiment="Shifts", split_sym=False)
diff --git a/examples/regression/4-tutorials/plot_ccp_tutorial.py b/examples/regression/4-tutorials/plot_ccp_tutorial.py
@@ -3,16 +3,14 @@
 Tutorial: Conditional CP for regression
 ============================================
 
-We will use a synthetic toy dataset for the tutorial of the CCP method, and
-its comparison with the other methods available in MAPIE. The CCP method
+The tutorial will explain how to use the CCP method, and
+will compare it with the other methods available in MAPIE. The CCP method
 implements the method described in the Gibbs et al. (2023) paper [1].
 
 We will see in this tutorial how to use the method. It has a lot of advantages:
 
 - It is model agnostic (it doesn't depend on the model but only on the
   predictions, unlike `CQR`)
-- It uses the `split` approach (it require a calibration set, but is very fast
-  at inference time, unlike the `CV` approach)
 - It can create very adaptative intervals (with a varying width which truly
   eflects the model uncertainty)
 - while providing coverage guantee on all sub-groups of interest
@@ -24,8 +22,11 @@
 - The adaptativity depends on the calibrator we use: It can be difficult to
   choose the correct calibrator,
   with the best parameters (this tutorial will try to help you with this task).
-- If the inference is very fast, the calibration phase can be very long,
-  depending on the complexity of your calibrator
+- The calibration and even more the inference are much longer than for the
+  other methods. We can reduce the inference time using
+  ``unsafe_approximation=True``, but we lose the theoretical guarantees and
+  risk a small miscoverage
+  (even if, most of the time, the coverage is achieved).
 
 Conclusion on the method:
 
@@ -35,7 +36,8 @@
 
 ----
 
-In this tutorial, the estimator will be :class:`~sklearn.pipeline.Pipeline`
+In this tutorial, we will use a synthetic toy dataset.
+The estimator will be :class:`~sklearn.pipeline.Pipeline`
 with :class:`~sklearn.preprocessing.PolynomialFeatures` and
 :class:`~sklearn.linear_model.LinearRegression` (or
 :class:`~sklearn.linear_model.QuantileRegressor` for CQR).
@@ -51,6 +53,14 @@
 
 Recall that the ``alpha`` is ``1 - target coverage``.
 
+Warning:
+
+In this tutorial, we use ``unsafe_approximation=True`` to have a faster
+computation (because Read The Docs examples require fast computation).
+This mode use an approximation, which make the inference (``predict``) faster,
+but induce a small miscoverage. It is recommanded not to use it, or be
+very careful and empirically check the coverage and a test set.
+
 [1] Isaac Gibbs, John J. Cherian, and Emmanuel J. Candès,
 "Conformal Prediction With Conditional Guarantees",
 `arXiv <https://arxiv.org/abs/2305.12616>`_, 2023.
@@ -78,6 +88,7 @@
 np.random.seed(random_state)
 
 ALPHA = 0.1
+UNSAFE_APPROXIMATION = True
 
 ##############################################################################
 # 1. Data generation
@@ -90,8 +101,8 @@
 #   - between 0 and 5: normal distribution with a noise value which
 #     increase with ``x``
 #
-# We are going to use 3000 samples for training, 3000 for calibration and
-# 20 000 for testing (to have an accurate conditional coverage).
+# We are going to use 5000 samples for training, 5000 for calibration and
+# 5000 for testing.
 
 
 def x_sinx(x):
@@ -123,7 +134,7 @@ def get_1d_data_with_heteroscedastic_noise(
     return X.reshape(-1, 1), y, true_pi
 
 
-def generate_data(n_train=6000, n_test=20000, noise=0.8, power=2):
+def generate_data(n_train=10000, n_test=5000, noise=0.8, power=2):
     X, y, true_pi = get_1d_data_with_heteroscedastic_noise(
         x_sinx, -1, 5, n_train + n_test, noise, power)
     indexes = list(range(len(X)))
@@ -428,7 +439,9 @@ def plot_evaluation(titles, y_pis, X_test, y_test):
 mapie_ccp = SplitCPRegressor(estimator, calibrator=GaussianCCP(),
                              alpha=ALPHA, cv=cv)
 mapie_ccp.fit(X_train, y_train)
-y_pred_ccp, y_pi_ccp = mapie_ccp.predict(X_test)
+y_pred_ccp, y_pi_ccp = mapie_ccp.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 # ================== PLOT ==================
 mapies = [mapie_split, mapie_cv, mapie_cqr, mapie_ccp]
@@ -533,19 +546,25 @@ def plot_evaluation(titles, y_pis, X_test, y_test):
 mapie_ccp_1 = SplitCPRegressor(estimator, calibrator=calibrator_gauss1,
                                cv=cv, alpha=ALPHA)
 mapie_ccp_1.fit(X_train, y_train)
-y_pred_ccp_1, y_pi_ccp_1 = mapie_ccp_1.predict(X_test)
+y_pred_ccp_1, y_pi_ccp_1 = mapie_ccp_1.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 # # ================== CCP 2 ==================
 mapie_ccp_2 = SplitCPRegressor(estimator, calibrator=calibrator_gauss2,
                                cv=cv, alpha=ALPHA)
 mapie_ccp_2.fit(X_train, y_train)
-y_pred_ccp_2, y_pi_ccp_2 = mapie_ccp_2.predict(X_test)
+y_pred_ccp_2, y_pi_ccp_2 = mapie_ccp_2.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 # # ================== CCP 3  ==================
 mapie_ccp_3 = SplitCPRegressor(estimator, calibrator=calibrator_gauss3,
                                cv=cv, alpha=ALPHA)
 mapie_ccp_3.fit(X_train, y_train)
-y_pred_ccp_3, y_pi_ccp_3 = mapie_ccp_3.predict(X_test)
+y_pred_ccp_3, y_pi_ccp_3 = mapie_ccp_3.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 
 mapies = [mapie_split, mapie_cv, mapie_cqr,
@@ -605,19 +624,25 @@ def plot_evaluation(titles, y_pis, X_test, y_test):
 mapie_ccp_1 = SplitCPRegressor(estimator, calibrator=calibrator1,
                                cv=cv,  alpha=ALPHA)
 mapie_ccp_1.fit(X_train, y_train)
-y_pred_ccp_1, y_pi_ccp_1 = mapie_ccp_1.predict(X_test)
+y_pred_ccp_1, y_pi_ccp_1 = mapie_ccp_1.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 # ================== CCP 2  ==================
 mapie_ccp_2 = SplitCPRegressor(estimator, calibrator=calibrator2,
                                cv=cv, alpha=ALPHA)
 mapie_ccp_2.fit(X_train, y_train)
-y_pred_ccp_2, y_pi_ccp_2 = mapie_ccp_2.predict(X_test)
+y_pred_ccp_2, y_pi_ccp_2 = mapie_ccp_2.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 # ================== CCP 3  ==================
 mapie_ccp_3 = SplitCPRegressor(estimator, calibrator=calibrator3,
                                cv=cv, alpha=ALPHA)
 mapie_ccp_3.fit(X_train, y_train)
-y_pred_ccp_3, y_pi_ccp_3 = mapie_ccp_3.predict(X_test)
+y_pred_ccp_3, y_pi_ccp_3 = mapie_ccp_3.predict(
+    X_test, unsafe_approximation=UNSAFE_APPROXIMATION
+)
 
 mapies = [mapie_split, mapie_cv, mapie_cqr,
           mapie_ccp_1, mapie_ccp_2, mapie_ccp_3]