From a63f71f59e8ad9f005a4916a70020e6357d7cbd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20Dupr=C3=A9?= Date: Tue, 28 May 2024 11:31:53 +0200 Subject: [PATCH] Fix for multidimensional gaussian process (#1097) * Extend CI to test with onnxruntime==1.18.0 (#1093) * Extend CI to test with onnxruntime==1.18.0 Signed-off-by: Xavier Dupre * update doc Signed-off-by: Xavier Dupre * simplify pipelines Signed-off-by: Xavier Dupre * rename master into main Signed-off-by: Xavier Dupre * action Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * example Signed-off-by: Xavier Dupre * remove benchmark Signed-off-by: Xavier Dupre * doc Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix unittest Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre * Increase last supported opset to 19 in readme (#1087) See: https://github.com/onnx/sklearn-onnx/blob/d2029c1a9752f62a63fc5c4447b4d9fe75e8fe39/skl2onnx/__init__.py#L12 Signed-off-by: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Signed-off-by: Xavier Dupre * Fix the converters for scikit-learn==1.5.0 (#1095) * Extend CI to test with onnxruntime==1.18.0 Signed-off-by: Xavier Dupre * update doc Signed-off-by: Xavier Dupre * simplify pipelines Signed-off-by: Xavier Dupre * rename master into main Signed-off-by: Xavier Dupre * action Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * update CI Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * example Signed-off-by: Xavier Dupre * remove benchmark Signed-off-by: Xavier Dupre * doc Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix unittest Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix ci Signed-off-by: Xavier Dupre * fix title Signed-off-by: Xavier Dupre * fix disc Signed-off-by: Xavier Dupre * better ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * linear Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * fix two unit tests Signed-off-by: Xavier Dupre * fix PLSRegression Signed-off-by: Xavier Dupre * fix version Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * ci Signed-off-by: Xavier Dupre * precision Signed-off-by: Xavier Dupre * fix unit test Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre * StringNormalizer drops strings when they only contain stop words (#1031) * investigate Signed-off-by: Xavier Dupre * fix unit tests Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre * Fix for gaussian process Signed-off-by: Xavier Dupre * add test for issue 1073 Signed-off-by: Xavier Dupre * Fix multi dimensional GaussianRegressor Signed-off-by: Xavier Dupre * doc Signed-off-by: Xavier Dupre --------- Signed-off-by: Xavier Dupre Signed-off-by: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Co-authored-by: XiangRongLin <41164160+XiangRongLin@users.noreply.github.com> Co-authored-by: Liberty Askew Co-authored-by: dreivmeister --- CHANGELOGS.md | 2 ++ .../operator_converters/gaussian_process.py | 17 +++++----- ...test_sklearn_gaussian_process_regressor.py | 31 ++++++++++++++----- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/CHANGELOGS.md b/CHANGELOGS.md index 4abe085bd..9ac4dab96 100644 --- a/CHANGELOGS.md +++ b/CHANGELOGS.md @@ -2,6 +2,8 @@ ## 1.17.0 (development) +* Fix for multidimensional gaussian process + [#1097](https://github.com/onnx/sklearn-onnx/pull/1097) * Minor fixes to support scikit-learn==1.5.0 [#1095](https://github.com/onnx/sklearn-onnx/pull/1095) * Fix the conversion of pipeline including pipelines, diff --git a/skl2onnx/operator_converters/gaussian_process.py b/skl2onnx/operator_converters/gaussian_process.py index 886eac706..988894763 100644 --- a/skl2onnx/operator_converters/gaussian_process.py +++ b/skl2onnx/operator_converters/gaussian_process.py @@ -126,12 +126,12 @@ def convert_gaussian_process_regressor( if len(mean_y.shape) == 1: mean_y = mean_y.reshape(mean_y.shape + (1,)) - if not hasattr(op, "_y_train_std") or op._y_train_std == 1: + if not hasattr(op, "_y_train_std") or np.all(op._y_train_std == 1): if isinstance(y_mean_b, (np.float32, np.float64)): y_mean_b = np.array([y_mean_b]) if isinstance(mean_y, (np.float32, np.float64)): mean_y = np.array([mean_y]) - y_mean = OnnxAdd(y_mean_b, mean_y, op_version=opv) + y_mean = OnnxAdd(y_mean_b, mean_y.T, op_version=opv) else: # A bug was fixed in 0.23 and it changed # the predictions when return_std is True. @@ -145,13 +145,13 @@ def convert_gaussian_process_regressor( if isinstance(mean_y, (np.float32, np.float64)): mean_y = np.array([mean_y]) y_mean = OnnxAdd( - OnnxMul(y_mean_b, var_y, op_version=opv), mean_y, op_version=opv + OnnxMul(y_mean_b, var_y.T, op_version=opv), mean_y.T, op_version=opv ) y_mean.set_onnx_name_prefix("gpr") y_mean_reshaped = OnnxReshapeApi13( y_mean, - np.array([-1, 1], dtype=np.int64), + np.array([-1, mean_y.shape[0]], dtype=np.int64), op_version=opv, output_names=out[:1], ) @@ -192,12 +192,15 @@ def convert_gaussian_process_regressor( # y_var[y_var_negative] = 0.0 ys0_var = OnnxMax(ys_var, np.array([0], dtype=dtype), op_version=opv) - if hasattr(op, "_y_train_std") and op._y_train_std != 1: + if hasattr(op, "_y_train_std"): # y_var = y_var * self._y_train_std**2 - ys0_var = OnnxMul(ys0_var, var_y**2, op_version=opv) + ys0_var = OnnxMul( + ys0_var, (op._y_train_std**2).astype(dtype), op_version=opv + ) # var = np.sqrt(ys0_var) - var = OnnxSqrt(ys0_var, output_names=out[1:], op_version=opv) + var = OnnxSqrt(ys0_var, op_version=opv) + var = OnnxTranspose(var, output_names=out[1:], op_version=opv) var.set_onnx_name_prefix("gprv") outputs.append(var) diff --git a/tests/test_sklearn_gaussian_process_regressor.py b/tests/test_sklearn_gaussian_process_regressor.py index a55a34750..ca4ca85b5 100644 --- a/tests/test_sklearn_gaussian_process_regressor.py +++ b/tests/test_sklearn_gaussian_process_regressor.py @@ -24,13 +24,7 @@ WhiteKernel, ) from sklearn.model_selection import train_test_split - -try: - # scikit-learn >= 0.22 - from sklearn.utils._testing import ignore_warnings -except ImportError: - # scikit-learn < 0.22 - from sklearn.utils.testing import ignore_warnings +from sklearn.utils._testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning from skl2onnx.common.data_types import FloatTensorType, DoubleTensorType from skl2onnx import to_onnx @@ -1019,6 +1013,9 @@ def test_gpr_rbf_fitted_return_std_exp_sine_squared_false(self): model_onnx, verbose=False, basename="SklearnGaussianProcessExpSineSquaredStdF-Out0-Dec3", + # operator MatMul gets replaced by FusedMatMul but onnxruntime does not check + # the availability of the kernel for double. + disable_optimisation=True, ) self.check_outputs( gp, @@ -1418,6 +1415,7 @@ def test_x_issue_789_cdist(self): pipe.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel() ) + @ignore_warnings(category=ConvergenceWarning) def test_white_kernel_float(self): X, y = make_friedman2(n_samples=500, noise=0, random_state=0) tx1, vx1, ty1, vy1 = train_test_split(X, y) @@ -1436,6 +1434,7 @@ def test_white_kernel_float(self): gpr.predict(vx1.astype(np.float32)).ravel(), pred[0].ravel(), rtol=1e-3 ) + @ignore_warnings(category=ConvergenceWarning) def test_white_kernel_double(self): X, y = make_friedman2(n_samples=500, noise=0, random_state=0) tx1, vx1, ty1, vy1 = train_test_split(X, y) @@ -1498,6 +1497,23 @@ def test_kernel_white_kernel(self): m2 = ker(x, x) assert_almost_equal(m2, m1, decimal=5) + def test_issue_1073_multidimension_process(self): + # multioutput gpr + n_samples, n_features, n_targets = 1000, 8, 3 + X, y = make_regression(n_samples, n_features, n_targets=n_targets) + tx1, vx1, ty1, vy1 = train_test_split(X, y) + model = GaussianProcessRegressor() + model.fit(tx1, ty1) + initial_type = [("data_in", DoubleTensorType([None, X.shape[1]]))] + onx = to_onnx(model, initial_types=initial_type, target_opset=_TARGET_OPSET_) + sess = InferenceSession( + onx.SerializeToString(), providers=["CPUExecutionProvider"] + ) + pred = sess.run(None, {"data_in": vx1.astype(np.float64)}) + assert_almost_equal( + model.predict(vx1.astype(np.float64)).ravel(), pred[0].ravel() + ) + if __name__ == "__main__": # import logging @@ -1505,4 +1521,5 @@ def test_kernel_white_kernel(self): # log.setLevel(logging.DEBUG) # logging.basicConfig(level=logging.DEBUG) # TestSklearnGaussianProcessRegressor().test_kernel_white_kernel() + # TestSklearnGaussianProcessRegressor().test_issue_1073() unittest.main(verbosity=2)