Skip to content

Commit

Permalink
Fix test related to tfidf in CI (#1039)
Browse files Browse the repository at this point in the history
* minor changes

Signed-off-by: Xavier Dupre <[email protected]>

* disable test until a new version is released

Signed-off-by: Xavier Dupre <[email protected]>

* fix import issue

Signed-off-by: Xavier Dupre <[email protected]>

* update CI

Signed-off-by: Xavier Dupre <[email protected]>

* fix code for further version

Signed-off-by: Xavier Dupre <[email protected]>

* update tests

Signed-off-by: Xavier Dupre <[email protected]>

* unit tests

Signed-off-by: Xavier Dupre <[email protected]>

* backend

Signed-off-by: Xavier Dupre <[email protected]>

* fix tests

Signed-off-by: Xavier Dupre <[email protected]>

* fix last unit test

Signed-off-by: Xavier Dupre <[email protected]>

* disable ut

Signed-off-by: Xavier Dupre <[email protected]>

* disable unit test with onnx<1.16

Signed-off-by: Xavier Dupre <[email protected]>

---------

Signed-off-by: Xavier Dupre <[email protected]>
  • Loading branch information
xadupre authored Nov 5, 2023
1 parent a551a44 commit 0374eea
Show file tree
Hide file tree
Showing 23 changed files with 270 additions and 31 deletions.
22 changes: 19 additions & 3 deletions .azure-pipelines/linux-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
python.version: '3.11'
numpy.version: '>=1.21.1'
scipy.version: '>=1.7.0'
onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
onnx.version: 'onnx==1.15.0' # -i https://test.pypi.org/simple/ onnx==1.15.0rc2'
onnx.target_opset: ''
onnxrt.version: 'onnxruntime==1.16.1'
sklearn.version: '>=1.3.1'
Expand Down Expand Up @@ -275,24 +275,40 @@ jobs:
fi
displayName: 'install onnx'
- script: |
pip show onnx
displayName: 'onnx version'
- script: |
pip install $(onnxrt.version)
displayName: 'install onnxruntime'
- script: |
pip show onnx
displayName: 'onnx version'
- script: |
if [ '$(onnxcc.version)' == 'git' ]
then
pip install git+https://github.com/microsoft/onnxconverter-common
pip install git+https://github.com/microsoft/onnxconverter-common --no-deps
else
pip install onnxconverter-common$(onnxcc.version)
pip install onnxconverter-common$(onnxcc.version) --no-deps
fi
displayName: 'install onnxconverter-common'
- script: |
pip show onnx
displayName: 'onnx version'
- script: |
pip install -r requirements.txt
pip install -r requirements-dev.txt
displayName: 'install requirements'
- script: |
pip show onnx
displayName: 'onnx version'
- script: |
pip install -e .
displayName: 'install'
Expand Down
4 changes: 2 additions & 2 deletions .azure-pipelines/win32-conda-CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
matrix:
Py310-Onnx150-Rt161-Skl131:
python.version: '3.11'
onnx.version: '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
onnx.version: 'onnx==1.15.0' # '-i https://test.pypi.org/simple/ onnx==1.15.0rc2'
onnx.target_opset: ''
numpy.version: 'numpy>=1.22.3'
scipy.version: 'scipy'
Expand Down Expand Up @@ -181,7 +181,7 @@ jobs:
- script: |
call activate skl2onnxEnvironment
pip install $(onnxcc.version)
pip install $(onnxcc.version) --no-deps
displayName: 'install onnxconverter-common'
- script: |
Expand Down
10 changes: 8 additions & 2 deletions skl2onnx/operator_converters/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ def convert_sklearn_gradient_boosting_classifier(
transform = "LOGISTIC" if op.n_classes_ == 2 else "SOFTMAX"
if op.init == "zero":
loss = op._loss if hasattr(op, "_loss") else op.loss_
base_values = np.zeros(loss.K)
if hasattr(loss, "K"):
base_values = np.zeros(loss.K)
else:
base_values = np.zeros(1)
elif op.init is None:
if hasattr(op.estimators_[0, 0], "n_features_in_"):
# sklearn >= 1.2
Expand Down Expand Up @@ -142,7 +145,10 @@ def convert_sklearn_gradient_boosting_regressor(

if op.init == "zero":
loss = op._loss if hasattr(op, "_loss") else op.loss_
cst = np.zeros(loss.K)
if hasattr(loss, "K"):
cst = np.zeros(loss.K)
else:
cst = np.zeros(1)
elif op.init is None:
# constant_ was introduced in scikit-learn 0.21.
if hasattr(op.init_, "constant_"):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_sklearn_bagging_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import unittest
import packaging.version as pv
import onnx
import onnxruntime

try:
Expand Down Expand Up @@ -351,6 +352,10 @@ def test_bagging_regressor_sgd(self):
X, model, model_onnx, basename="SklearnBaggingRegressorSGD-Dec4"
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="Fixed issue in more recent versions",
)
@ignore_warnings(category=FutureWarning)
def test_bagging_regressor_gradient_boosting(self):
model, X = fit_regression_model(
Expand Down
26 changes: 26 additions & 0 deletions tests/test_sklearn_count_vectorizer_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
"""
import unittest
import sys
import packaging.version as pv
import numpy
import onnx
from sklearn.feature_extraction.text import CountVectorizer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType
Expand All @@ -14,6 +16,10 @@

class TestSklearnCountVectorizer(unittest.TestCase):
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
def test_model_count_vectorizer11(self):
corpus = numpy.array(
[
Expand All @@ -36,6 +42,10 @@ def test_model_count_vectorizer11(self):
corpus, vect, model_onnx, basename="SklearnCountVectorizer11-OneOff-SklCol"
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer22(self):
corpus = numpy.array(
Expand All @@ -59,6 +69,10 @@ def test_model_count_vectorizer22(self):
corpus, vect, model_onnx, basename="SklearnCountVectorizer22-OneOff-SklCol"
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer12(self):
corpus = numpy.array(
Expand All @@ -82,6 +96,10 @@ def test_model_count_vectorizer12(self):
corpus, vect, model_onnx, basename="SklearnCountVectorizer12-OneOff-SklCol"
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer13(self):
corpus = numpy.array(
Expand All @@ -105,6 +123,10 @@ def test_model_count_vectorizer13(self):
corpus, vect, model_onnx, basename="SklearnCountVectorizer13-OneOff-SklCol"
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_binary(self):
corpus = numpy.array(
Expand All @@ -131,6 +153,10 @@ def test_model_count_vectorizer_binary(self):
basename="SklearnCountVectorizerBinary-OneOff-SklCol",
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer11_locale(self):
corpus = numpy.array(
Expand Down
10 changes: 10 additions & 0 deletions tests/test_sklearn_count_vectorizer_converter_bug.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@
Tests scikit-learn's count vectorizer converter.
"""
import unittest
import packaging.version as pv
import numpy
import onnx
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import StringTensorType
from test_utils import dump_data_and_model, TARGET_OPSET


class TestSklearnCountVectorizerBug(unittest.TestCase):
@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_custom_tokenizer(self):
corpus = numpy.array(
Expand Down Expand Up @@ -47,6 +53,10 @@ def test_model_count_vectorizer_custom_tokenizer(self):
basename="SklearnTfidfVectorizer11CustomTokenizer-OneOff-SklCol",
)

@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
@unittest.skipIf(TARGET_OPSET < 10, reason="not available")
def test_model_count_vectorizer_wrong_ngram(self):
corpus = numpy.array(
Expand Down
10 changes: 10 additions & 0 deletions tests/test_sklearn_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import unittest
import urllib.error
import warnings
import packaging.version as pv
import numpy as np
import onnx

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.datasets import fetch_20newsgroups
Expand Down Expand Up @@ -73,6 +75,10 @@ class TestSklearnDocumentation(unittest.TestCase):
@unittest.skipIf(
TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
)
@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
def test_pipeline_tfidf(self):
categories = ["alt.atheism", "talk.religion.misc"]
try:
Expand Down Expand Up @@ -112,6 +118,10 @@ def test_pipeline_tfidf(self):
@unittest.skipIf(
TARGET_OPSET < 10, reason="Encoding issue fixed in a later version"
)
@unittest.skipIf(
pv.Version(onnx.__version__) < pv.Version("1.16.0"),
reason="ReferenceEvaluator does not support tfidf with strings",
)
def test_pipeline_tfidf_pipeline_minmax(self):
categories = ["alt.atheism", "talk.religion.misc"]
try:
Expand Down
8 changes: 8 additions & 0 deletions tests/test_sklearn_glm_regressor_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import unittest
import packaging.version as pv
import onnx
import numpy
from numpy.testing import assert_almost_equal

Expand Down Expand Up @@ -54,6 +55,12 @@
ort_version = ort_version.split("+")[0]
skl_version = ".".join(sklearn_version.split(".")[:2])

BACKEND = (
"onnxruntime"
if pv.Version(onnx.__version__) < pv.Version("1.16.0")
else "onnx;onnxruntime"
)


class TestGLMRegressorConverter(unittest.TestCase):
@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
Expand Down Expand Up @@ -778,6 +785,7 @@ def test_model_ransac_regressor_tree(self):
model_onnx,
verbose=False,
basename="SklearnRANSACRegressorTree-Dec3",
backend=BACKEND,
)

@ignore_warnings(category=(FutureWarning, ConvergenceWarning))
Expand Down
28 changes: 24 additions & 4 deletions tests/test_sklearn_gradient_boosting_converters.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: Apache-2.0


import packaging.version as pv
import unittest
import numpy as np
import onnx
from pandas import DataFrame
from sklearn import __version__ as skl_version
from sklearn.datasets import make_classification
Expand All @@ -30,6 +30,12 @@
ort_version = ort_version.split("+")[0]
skl_version = skl_version.split("+")[0]

BACKEND = (
"onnxruntime"
if pv.Version(onnx.__version__) < pv.Version("1.16.0")
else "onnx;onnxruntime"
)


class TestSklearnGradientBoostingModels(unittest.TestCase):
@unittest.skipIf(
Expand Down Expand Up @@ -215,7 +221,11 @@ def test_gradient_boosting_regressor_ls_loss(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X, model, model_onnx, basename="SklearnGradientBoostingRegressionLsLoss"
X,
model,
model_onnx,
basename="SklearnGradientBoostingRegressionLsLoss",
backend=BACKEND,
)

@unittest.skipIf(
Expand All @@ -233,7 +243,11 @@ def test_gradient_boosting_regressor_lad_loss(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X, model, model_onnx, basename="SklearnGradientBoostingRegressionLadLoss"
X,
model,
model_onnx,
basename="SklearnGradientBoostingRegressionLadLoss",
backend=BACKEND,
)

def test_gradient_boosting_regressor_huber_loss(self):
Expand All @@ -248,7 +262,11 @@ def test_gradient_boosting_regressor_huber_loss(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X, model, model_onnx, basename="SklearnGradientBoostingRegressionHuberLoss"
X,
model,
model_onnx,
basename="SklearnGradientBoostingRegressionHuberLoss",
backend=BACKEND,
)

def test_gradient_boosting_regressor_quantile_loss(self):
Expand All @@ -267,6 +285,7 @@ def test_gradient_boosting_regressor_quantile_loss(self):
model,
model_onnx,
basename="SklearnGradientBoostingRegressionQuantileLoss-Dec4",
backend=BACKEND,
)

def test_gradient_boosting_regressor_int(self):
Expand Down Expand Up @@ -300,6 +319,7 @@ def test_gradient_boosting_regressor_zero_init(self):
model,
model_onnx,
basename="SklearnGradientBoostingRegressionZeroInit-Dec4",
backend=BACKEND,
)

@unittest.skipIf(
Expand Down
7 changes: 6 additions & 1 deletion tests/test_sklearn_grid_search_cv_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,15 @@ def test_grid_search_multiclass_float(self):
"GridSearchCV",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
options={"zipmap": False},
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X, model, model_onnx, basename="SklearnGridSearchMulticlassFloat"
X,
model,
model_onnx,
basename="SklearnGridSearchMulticlassFloat",
backend="onnxruntime",
)

def test_grid_search_binary_int(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sklearn_nearest_neighbour_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def test_model_knn_regressor(self):

@unittest.skipIf(dont_test_radius(), reason="not available")
@unittest.skipIf(
pv.Version(ort_version) < pv.Version("1.8.0"), reason="produces nan values"
pv.Version(ort_version) < pv.Version("1.15.0"), reason="produces nan values"
)
@ignore_warnings(category=DeprecationWarning)
def test_model_knn_regressor_radius(self):
Expand Down
6 changes: 5 additions & 1 deletion tests/test_sklearn_one_vs_rest_classifier_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,11 @@ def test_ovr_classification_float_binary_ensemble(self):
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X, model, model_onnx, basename="SklearnOVRClassificationFloatBinEnsemble"
X,
model,
model_onnx,
basename="SklearnOVRClassificationFloatBinEnsemble",
backend="onnxruntime",
)

@ignore_warnings(category=warnings_to_skip)
Expand Down
Loading

0 comments on commit 0374eea

Please sign in to comment.