Fix test related to tfidf in CI (#1039)

* minor changes Signed-off-by: Xavier Dupre <[email protected]> * disable test until a new version is released Signed-off-by: Xavier Dupre <[email protected]> * fix import issue Signed-off-by: Xavier Dupre <[email protected]> * update CI Signed-off-by: Xavier Dupre <[email protected]> * fix code for further version Signed-off-by: Xavier Dupre <[email protected]> * update tests Signed-off-by: Xavier Dupre <[email protected]> * unit tests Signed-off-by: Xavier Dupre <[email protected]> * backend Signed-off-by: Xavier Dupre <[email protected]> * fix tests Signed-off-by: Xavier Dupre <[email protected]> * fix last unit test Signed-off-by: Xavier Dupre <[email protected]> * disable ut Signed-off-by: Xavier Dupre <[email protected]> * disable unit test with onnx<1.16 Signed-off-by: Xavier Dupre <[email protected]> --------- Signed-off-by: Xavier Dupre <[email protected]>
onnx · Nov 5, 2023 · 0374eea · 0374eea
1 parent a551a44
commit 0374eea
Show file tree

Hide file tree

Showing 23 changed files with 270 additions and 31 deletions.
diff --git a/.azure-pipelines/linux-conda-CI.yml b/.azure-pipelines/linux-conda-CI.yml
@@ -19,7 +19,7 @@ jobs:
         python.version: '3.11'
         numpy.version: '>=1.21.1'
         scipy.version: '>=1.7.0'
-        onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
+        onnx.version: 'onnx==1.15.0'  # -i https://test.pypi.org/simple/ onnx==1.15.0rc2'
         onnx.target_opset: ''
         onnxrt.version: 'onnxruntime==1.16.1'
         sklearn.version: '>=1.3.1'
@@ -275,24 +275,40 @@ jobs:
       fi
     displayName: 'install onnx'
 
+  - script: |
+      pip show onnx
+    displayName: 'onnx version'
+
   - script: |
       pip install $(onnxrt.version)
     displayName: 'install onnxruntime'
 
+  - script: |
+      pip show onnx
+    displayName: 'onnx version'
+
   - script: |
       if [ '$(onnxcc.version)' == 'git' ]
       then
-        pip install git+https://github.com/microsoft/onnxconverter-common
+        pip install git+https://github.com/microsoft/onnxconverter-common --no-deps
       else
-        pip install onnxconverter-common$(onnxcc.version)
+        pip install onnxconverter-common$(onnxcc.version) --no-deps
       fi
     displayName: 'install onnxconverter-common'
 
+  - script: |
+      pip show onnx
+    displayName: 'onnx version'
+
   - script: |
       pip install -r requirements.txt
       pip install -r requirements-dev.txt
     displayName: 'install requirements'
 
+  - script: |
+      pip show onnx
+    displayName: 'onnx version'
+
   - script: |
       pip install -e .
     displayName: 'install'

diff --git a/.azure-pipelines/win32-conda-CI.yml b/.azure-pipelines/win32-conda-CI.yml
@@ -15,7 +15,7 @@ jobs:
     matrix:
       Py310-Onnx150-Rt161-Skl131:
         python.version: '3.11'
-        onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
+        onnx.version: 'onnx==1.15.0'  # '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
         onnx.target_opset: ''
         numpy.version: 'numpy>=1.22.3'
         scipy.version: 'scipy'
@@ -181,7 +181,7 @@ jobs:
 
   - script: |
       call activate skl2onnxEnvironment
-      pip install $(onnxcc.version)
+      pip install $(onnxcc.version) --no-deps
     displayName: 'install onnxconverter-common'
 
   - script: |

diff --git a/skl2onnx/operator_converters/gradient_boosting.py b/skl2onnx/operator_converters/gradient_boosting.py
@@ -39,7 +39,10 @@ def convert_sklearn_gradient_boosting_classifier(
     transform = "LOGISTIC" if op.n_classes_ == 2 else "SOFTMAX"
     if op.init == "zero":
         loss = op._loss if hasattr(op, "_loss") else op.loss_
-        base_values = np.zeros(loss.K)
+        if hasattr(loss, "K"):
+            base_values = np.zeros(loss.K)
+        else:
+            base_values = np.zeros(1)
     elif op.init is None:
         if hasattr(op.estimators_[0, 0], "n_features_in_"):
             # sklearn >= 1.2
@@ -142,7 +145,10 @@ def convert_sklearn_gradient_boosting_regressor(
 
     if op.init == "zero":
         loss = op._loss if hasattr(op, "_loss") else op.loss_
-        cst = np.zeros(loss.K)
+        if hasattr(loss, "K"):
+            cst = np.zeros(loss.K)
+        else:
+            cst = np.zeros(1)
     elif op.init is None:
         # constant_ was introduced in scikit-learn 0.21.
         if hasattr(op.init_, "constant_"):

diff --git a/tests/test_sklearn_bagging_converter.py b/tests/test_sklearn_bagging_converter.py
@@ -3,6 +3,7 @@
 
 import unittest
 import packaging.version as pv
+import onnx
 import onnxruntime
 
 try:
@@ -351,6 +352,10 @@ def test_bagging_regressor_sgd(self):
             X, model, model_onnx, basename="SklearnBaggingRegressorSGD-Dec4"
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="Fixed issue in more recent versions",
+    )
     @ignore_warnings(category=FutureWarning)
     def test_bagging_regressor_gradient_boosting(self):
         model, X = fit_regression_model(

diff --git a/tests/test_sklearn_count_vectorizer_converter.py b/tests/test_sklearn_count_vectorizer_converter.py
@@ -5,7 +5,9 @@
 """
 import unittest
 import sys
+import packaging.version as pv
 import numpy
+import onnx
 from sklearn.feature_extraction.text import CountVectorizer
 from skl2onnx import convert_sklearn
 from skl2onnx.common.data_types import StringTensorType
@@ -14,6 +16,10 @@
 
 class TestSklearnCountVectorizer(unittest.TestCase):
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     def test_model_count_vectorizer11(self):
         corpus = numpy.array(
             [
@@ -36,6 +42,10 @@ def test_model_count_vectorizer11(self):
             corpus, vect, model_onnx, basename="SklearnCountVectorizer11-OneOff-SklCol"
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer22(self):
         corpus = numpy.array(
@@ -59,6 +69,10 @@ def test_model_count_vectorizer22(self):
             corpus, vect, model_onnx, basename="SklearnCountVectorizer22-OneOff-SklCol"
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer12(self):
         corpus = numpy.array(
@@ -82,6 +96,10 @@ def test_model_count_vectorizer12(self):
             corpus, vect, model_onnx, basename="SklearnCountVectorizer12-OneOff-SklCol"
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer13(self):
         corpus = numpy.array(
@@ -105,6 +123,10 @@ def test_model_count_vectorizer13(self):
             corpus, vect, model_onnx, basename="SklearnCountVectorizer13-OneOff-SklCol"
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer_binary(self):
         corpus = numpy.array(
@@ -131,6 +153,10 @@ def test_model_count_vectorizer_binary(self):
             basename="SklearnCountVectorizerBinary-OneOff-SklCol",
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer11_locale(self):
         corpus = numpy.array(

diff --git a/tests/test_sklearn_count_vectorizer_converter_bug.py b/tests/test_sklearn_count_vectorizer_converter_bug.py
@@ -4,14 +4,20 @@
 Tests scikit-learn's count vectorizer converter.
 """
 import unittest
+import packaging.version as pv
 import numpy
+import onnx
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from skl2onnx import convert_sklearn
 from skl2onnx.common.data_types import StringTensorType
 from test_utils import dump_data_and_model, TARGET_OPSET
 
 
 class TestSklearnCountVectorizerBug(unittest.TestCase):
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer_custom_tokenizer(self):
         corpus = numpy.array(
@@ -47,6 +53,10 @@ def test_model_count_vectorizer_custom_tokenizer(self):
             basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol",
         )
 
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     @unittest.skipIf(TARGET_OPSET < 10, reason="not available")
     def test_model_count_vectorizer_wrong_ngram(self):
         corpus = numpy.array(

diff --git a/tests/test_sklearn_documentation.py b/tests/test_sklearn_documentation.py
@@ -7,7 +7,9 @@
 import unittest
 import urllib.error
 import warnings
+import packaging.version as pv
 import numpy as np
+import onnx
 
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.datasets import fetch_20newsgroups
@@ -73,6 +75,10 @@ class TestSklearnDocumentation(unittest.TestCase):
     @unittest.skipIf(
         TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
     )
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     def test_pipeline_tfidf(self):
         categories = ["alt.atheism", "talk.religion.misc"]
         try:
@@ -112,6 +118,10 @@ def test_pipeline_tfidf(self):
     @unittest.skipIf(
         TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
     )
+    @unittest.skipIf(
+        pv.Version(onnx.__version__) < pv.Version("1.16.0"),
+        reason="ReferenceEvaluator does not support tfidf with strings",
+    )
     def test_pipeline_tfidf_pipeline_minmax(self):
         categories = ["alt.atheism", "talk.religion.misc"]
         try:

diff --git a/tests/test_sklearn_glm_regressor_converter.py b/tests/test_sklearn_glm_regressor_converter.py
@@ -4,6 +4,7 @@
 
 import unittest
 import packaging.version as pv
+import onnx
 import numpy
 from numpy.testing import assert_almost_equal
 
@@ -54,6 +55,12 @@
 ort_version = ort_version.split("+")[0]
 skl_version = ".".join(sklearn_version.split(".")[:2])
 
+BACKEND = (
+    "onnxruntime"
+    if pv.Version(onnx.__version__) < pv.Version("1.16.0")
+    else "onnx;onnxruntime"
+)
+
 
 class TestGLMRegressorConverter(unittest.TestCase):
     @ignore_warnings(category=(FutureWarning, ConvergenceWarning))
@@ -778,6 +785,7 @@ def test_model_ransac_regressor_tree(self):
             model_onnx,
             verbose=False,
             basename="SklearnRANSACRegressorTree-Dec3",
+            backend=BACKEND,
         )
 
     @ignore_warnings(category=(FutureWarning, ConvergenceWarning))

diff --git a/tests/test_sklearn_gradient_boosting_converters.py b/tests/test_sklearn_gradient_boosting_converters.py
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
-
 import packaging.version as pv
 import unittest
 import numpy as np
+import onnx
 from pandas import DataFrame
 from sklearn import __version__ as skl_version
 from sklearn.datasets import make_classification
@@ -30,6 +30,12 @@
 ort_version = ort_version.split("+")[0]
 skl_version = skl_version.split("+")[0]
 
+BACKEND = (
+    "onnxruntime"
+    if pv.Version(onnx.__version__) < pv.Version("1.16.0")
+    else "onnx;onnxruntime"
+)
+
 
 class TestSklearnGradientBoostingModels(unittest.TestCase):
     @unittest.skipIf(
@@ -215,7 +221,11 @@ def test_gradient_boosting_regressor_ls_loss(self):
         )
         self.assertIsNotNone(model_onnx)
         dump_data_and_model(
-            X, model, model_onnx, basename="SklearnGradientBoostingRegressionLsLoss"
+            X,
+            model,
+            model_onnx,
+            basename="SklearnGradientBoostingRegressionLsLoss",
+            backend=BACKEND,
         )
 
     @unittest.skipIf(
@@ -233,7 +243,11 @@ def test_gradient_boosting_regressor_lad_loss(self):
         )
         self.assertIsNotNone(model_onnx)
         dump_data_and_model(
-            X, model, model_onnx, basename="SklearnGradientBoostingRegressionLadLoss"
+            X,
+            model,
+            model_onnx,
+            basename="SklearnGradientBoostingRegressionLadLoss",
+            backend=BACKEND,
         )
 
     def test_gradient_boosting_regressor_huber_loss(self):
@@ -248,7 +262,11 @@ def test_gradient_boosting_regressor_huber_loss(self):
         )
         self.assertIsNotNone(model_onnx)
         dump_data_and_model(
-            X, model, model_onnx, basename="SklearnGradientBoostingRegressionHuberLoss"
+            X,
+            model,
+            model_onnx,
+            basename="SklearnGradientBoostingRegressionHuberLoss",
+            backend=BACKEND,
         )
 
     def test_gradient_boosting_regressor_quantile_loss(self):
@@ -267,6 +285,7 @@ def test_gradient_boosting_regressor_quantile_loss(self):
             model,
             model_onnx,
             basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4",
+            backend=BACKEND,
         )
 
     def test_gradient_boosting_regressor_int(self):
@@ -300,6 +319,7 @@ def test_gradient_boosting_regressor_zero_init(self):
             model,
             model_onnx,
             basename="SklearnGradientBoostingRegressionZeroInit-Dec4",
+            backend=BACKEND,
         )
 
     @unittest.skipIf(

diff --git a/tests/test_sklearn_grid_search_cv_converter.py b/tests/test_sklearn_grid_search_cv_converter.py
@@ -61,10 +61,15 @@ def test_grid_search_multiclass_float(self):
             "GridSearchCV",
             [("input", FloatTensorType([None, X.shape[1]]))],
             target_opset=TARGET_OPSET,
+            options={"zipmap": False},
         )
         self.assertIsNotNone(model_onnx)
         dump_data_and_model(
-            X, model, model_onnx, basename="SklearnGridSearchMulticlassFloat"
+            X,
+            model,
+            model_onnx,
+            basename="SklearnGridSearchMulticlassFloat",
+            backend="onnxruntime",
         )
 
     def test_grid_search_binary_int(self):

diff --git a/tests/test_sklearn_nearest_neighbour_converter.py b/tests/test_sklearn_nearest_neighbour_converter.py
@@ -145,7 +145,7 @@ def test_model_knn_regressor(self):
 
     @unittest.skipIf(dont_test_radius(), reason="not available")
     @unittest.skipIf(
-        pv.Version(ort_version) < pv.Version("1.8.0"), reason="produces nan values"
+        pv.Version(ort_version) < pv.Version("1.15.0"), reason="produces nan values"
     )
     @ignore_warnings(category=DeprecationWarning)
     def test_model_knn_regressor_radius(self):

diff --git a/tests/test_sklearn_one_vs_rest_classifier_converter.py b/tests/test_sklearn_one_vs_rest_classifier_converter.py
@@ -409,7 +409,11 @@ def test_ovr_classification_float_binary_ensemble(self):
         )
         self.assertIsNotNone(model_onnx)
         dump_data_and_model(
-            X, model, model_onnx, basename="SklearnOVRClassificationFloatBinEnsemble"
+            X,
+            model,
+            model_onnx,
+            basename="SklearnOVRClassificationFloatBinEnsemble",
+            backend="onnxruntime",
         )
 
     @ignore_warnings(category=warnings_to_skip)