Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Array API dispatching #2096

Draft
wants to merge 30 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8bedde1
ENH: array api dispatching
samir-nasibli Oct 2, 2024
b11fcf3
Deselect some scikit-learn Array API tests
samir-nasibli Oct 4, 2024
467634a
Merge branch 'intel:main' into enh/array_api_dispatching
samir-nasibli Oct 4, 2024
31030f7
Merge branch 'intel:main' into enh/array_api_dispatching
samir-nasibli Oct 8, 2024
943796e
deselect more tests
samir-nasibli Oct 8, 2024
ef42daa
deselect more tests
samir-nasibli Oct 8, 2024
3bc755d
disabled tests for
samir-nasibli Oct 8, 2024
76f1876
fix the deselection comment
samir-nasibli Oct 8, 2024
ce0b8e1
disabled test for Ridge regression
samir-nasibli Oct 8, 2024
404e8c0
Disabled tests and added comment
samir-nasibli Oct 8, 2024
ced43bf
ENH: Array API dispatching
samir-nasibli Oct 8, 2024
968365f
Merge branch 'intel:main' into enh/array_api_dispatching_testing
samir-nasibli Oct 9, 2024
c395d03
Revert adding dpctl into Array PI conformance testing
samir-nasibli Oct 9, 2024
9271479
Merge branch 'enh/array_api_dispatching_testing' of https://github.co…
samir-nasibli Oct 9, 2024
5784c25
minor refactoring onedal _array_api
samir-nasibli Oct 9, 2024
8d7f664
add tests
samir-nasibli Oct 9, 2024
63d8f30
addressed memory usage tests
samir-nasibli Oct 9, 2024
6bd0280
Address some array api test fails
samir-nasibli Oct 9, 2024
90411e7
linting
samir-nasibli Oct 9, 2024
2b7bbc5
addressed test_get_namespace
samir-nasibli Oct 9, 2024
b7b8f03
adding test case for validate_data check with Array API inputs
samir-nasibli Oct 9, 2024
169009d
minor refactoring
samir-nasibli Oct 9, 2024
9ca118c
addressed test_patch_map_match fail
samir-nasibli Oct 9, 2024
7ddcf40
Added docstrings for get_namespace
samir-nasibli Oct 9, 2024
ec90d43
docstrings for Array API tests
samir-nasibli Oct 9, 2024
6e7e547
updated minimal scikit-learn version for Array API dispatching
samir-nasibli Oct 9, 2024
e5db839
updated minimal scikit-learn version for Array API dispatching in _de…
samir-nasibli Oct 9, 2024
f99a92b
fix test test_get_namespace_with_config_context
samir-nasibli Oct 9, 2024
8844f0e
Merge branch 'intel:main' into enh/array_api_dispatching_testing
samir-nasibli Oct 10, 2024
3771fc2
refactor onedal/datatypes/_data_conversion.py
samir-nasibli Oct 11, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,26 @@
# will exclude deselection in versions 0.18.1, and 0.18.2 only.

deselected_tests:
# Array API support
# sklearnex functional Array API support doesn't guaranty namespace consistency for the estimator's array attributes.
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_input_and_values-array_api_strict-None-None]
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_input_and_values-array_api_strict-None-None]
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh')-check_array_api_get_precision-array_api_strict-None-None]
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,svd_solver='covariance_eigh',whiten=True)-check_array_api_get_precision-array_api_strict-None-None]
- linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_attributes-array_api_strict-None-None]
- linear_model/tests/test_ridge.py::test_ridge_array_api_compliance[Ridge(solver='svd')-check_array_api_input_and_values-array_api_strict-None-None]
# `train_test_split` inconsistency for Array API inputs.
- model_selection/tests/test_split.py::test_array_api_train_test_split[True-None-array_api_strict-None-None]
- model_selection/tests/test_split.py::test_array_api_train_test_split[True-stratify1-array_api_strict-None-None]
- model_selection/tests/test_split.py::test_array_api_train_test_split[False-None-array_api_strict-None-None]
# PCA. Array API functionally supported for all factorizations. power_iteration_normalizer=["LU", "QR"]
- decomposition/tests/test_pca.py::test_array_api_error_and_warnings_on_unsupported_params
# PCA. InvalidParameterError: The 'M' parameter of randomized_svd must be an instance of 'numpy.ndarray' or a sparse matrix.
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_input_and_values-array_api_strict-None-None]
- decomposition/tests/test_pca.py::test_pca_array_api_compliance[PCA(n_components=2,power_iteration_normalizer='QR',random_state=0,svd_solver='randomized')-check_array_api_get_precision-array_api_strict-None-None]
# Ridge regression. Array API functionally supported for all solvers. Not raising error for non-svd solvers.
- linear_model/tests/test_ridge.py::test_array_api_error_and_warnings_for_solver_parameter[array_api_strict]

# 'kulsinski' distance was deprecated in scipy 1.11 but still marked as supported in scikit-learn < 1.3
- neighbors/tests/test_neighbors.py::test_kneighbors_brute_backend[float64-kulsinski] <1.3
- neighbors/tests/test_neighbors.py::test_radius_neighbors_brute_backend[kulsinski] <1.3
Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ xgboost==2.1.1
lightgbm==4.5.0
catboost==1.2.7 ; python_version < '3.11' # TODO: Remove 3.11 condition when catboost supports numpy 2.0
shap==0.46.0
array-api-compat==1.8.0
array-api-strict==2.0.1
44 changes: 44 additions & 0 deletions sklearnex/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ def get_patch_map_core(preview=False):
from ._config import get_config as get_config_sklearnex
from ._config import set_config as set_config_sklearnex

# TODO:
# check the version of skl.
if sklearn_check_version("1.4"):
import sklearn.utils._array_api as _array_api_module

if sklearn_check_version("1.2.1"):
from .utils.parallel import _FuncWrapper as _FuncWrapper_sklearnex
else:
Expand Down Expand Up @@ -165,6 +170,15 @@ def get_patch_map_core(preview=False):
from .svm import NuSVC as NuSVC_sklearnex
from .svm import NuSVR as NuSVR_sklearnex

# TODO:
# check the version of skl.
if sklearn_check_version("1.4"):
from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex
from .utils._array_api import get_namespace as get_namespace_sklearnex
from .utils._array_api import (
yield_namespace_device_dtype_combinations as yield_namespace_device_dtype_combinations_sklearnex,
)

# DBSCAN
mapping.pop("dbscan")
mapping["dbscan"] = [[(cluster_module, "DBSCAN", DBSCAN_sklearnex), None]]
Expand Down Expand Up @@ -440,6 +454,36 @@ def get_patch_map_core(preview=False):
mapping["_funcwrapper"] = [
[(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None]
]
# TODO:
# check the version of skl.
if sklearn_check_version("1.4"):
# Necessary for array_api support
mapping["get_namespace"] = [
[
(
_array_api_module,
"get_namespace",
get_namespace_sklearnex,
),
None,
]
]
mapping["_convert_to_numpy"] = [
[
(_array_api_module, "_convert_to_numpy", _convert_to_numpy_sklearnex),
None,
]
]
mapping["yield_namespace_device_dtype_combinations"] = [
[
(
_array_api_module,
"yield_namespace_device_dtype_combinations",
yield_namespace_device_dtype_combinations_sklearnex,
),
None,
]
]
return mapping


Expand Down
133 changes: 106 additions & 27 deletions sklearnex/utils/_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,47 +16,124 @@

"""Tools to support array_api."""

import itertools

import numpy as np

from daal4py.sklearn._utils import sklearn_check_version
from onedal.utils._array_api import _get_sycl_namespace
from onedal.utils._array_api import _asarray, _get_sycl_namespace

# TODO:
# check the version of skl.
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
if sklearn_check_version("1.2"):
from sklearn.utils._array_api import get_namespace as sklearn_get_namespace
from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy

from onedal._device_offload import dpctl_available, dpnp_available

if dpctl_available:
import dpctl.tensor as dpt

if dpnp_available:
import dpnp

_NUMPY_NAMESPACE_NAMES = {"numpy", "array_api_compat.numpy"}

def get_namespace(*arrays):
"""Get namespace of arrays.

Introspect `arrays` arguments and return their common Array API
compatible namespace object, if any. NumPy 1.22 and later can
construct such containers using the `numpy.array_api` namespace
for instance.
def yield_namespaces(include_numpy_namespaces=True):
"""Yield supported namespace.

This function will return the namespace of SYCL-related arrays
which define the __sycl_usm_array_interface__ attribute
regardless of array_api support, the configuration of
array_api_dispatch, or scikit-learn version.
This is meant to be used for testing purposes only.

Parameters
----------
include_numpy_namespaces : bool, default=True
If True, also yield numpy namespaces.

Returns
-------
array_namespace : str
The name of the Array API namespace.
"""
for array_namespace in [
# The following is used to test the array_api_compat wrapper when
# array_api_dispatch is enabled: in particular, the arrays used in the
# tests are regular numpy arrays without any "device" attribute.
"numpy",
# Stricter NumPy-based Array API implementation. The
# array_api_strict.Array instances always have a dummy "device" attribute.
"array_api_strict",
"dpctl.tensor",
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
"cupy",
"torch",
]:
if not include_numpy_namespaces and array_namespace in _NUMPY_NAMESPACE_NAMES:
continue
yield array_namespace


def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True):
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
"""Yield supported namespace, device, dtype tuples for testing.

Use this to test that an estimator works with all combinations.

See: https://numpy.org/neps/nep-0047-array-api-standard.html
Parameters
----------
include_numpy_namespaces : bool, default=True
If True, also yield numpy namespaces.

If `arrays` are regular numpy arrays, an instance of the
`_NumPyApiWrapper` compatibility wrapper is returned instead.
Returns
-------
array_namespace : str
The name of the Array API namespace.

Namespace support is not enabled by default. To enabled it
call:
device : str
The name of the device on which to allocate the arrays. Can be None to
indicate that the default value should be used.

sklearn.set_config(array_api_dispatch=True)
dtype_name : str
The name of the data type to use for arrays. Can be None to indicate
that the default value should be used.
"""
for array_namespace in yield_namespaces(
include_numpy_namespaces=include_numpy_namespaces
):
if array_namespace == "torch":
for device, dtype in itertools.product(
("cpu", "cuda"), ("float64", "float32")
):
yield array_namespace, device, dtype
yield array_namespace, "mps", "float32"
elif array_namespace == "dpctl.tensor":
for device, dtype in itertools.product(
("cpu", "gpu"), ("float64", "float32")
):
yield array_namespace, device, dtype
else:
yield array_namespace, None, None


def _convert_to_numpy(array, xp):
"""Convert X into a NumPy ndarray on the CPU."""
xp_name = xp.__name__

# if dpctl_available and isinstance(array, dpctl.tensor):
if dpctl_available and xp_name in {
"dpctl.tensor",
}:
return dpt.to_numpy(array)
elif dpnp_available and isinstance(array, dpnp.ndarray):
return dpnp.asnumpy(array)
elif sklearn_check_version("1.2"):
return _sklearn_convert_to_numpy(array, xp)
else:
return _asarray(array, xp)

or:

with sklearn.config_context(array_api_dispatch=True):
# your code here
def get_namespace(*arrays, remove_none=True, remove_types=(str,), xp=None):
"""Get namespace of arrays.

Otherwise an instance of the `_NumPyApiWrapper`
compatibility wrapper is always returned irrespective of
the fact that arrays implement the `__array_namespace__`
protocol or not.
TBD

Parameters
----------
Expand All @@ -72,11 +149,13 @@ def get_namespace(*arrays):
True of the arrays are containers that implement the Array API spec.
"""

sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays)
sycl_type, xp_sycl_namespace, is_array_api_compliant = _get_sycl_namespace(*arrays)

if sycl_type:
return xp, is_array_api_compliant
return xp_sycl_namespace, is_array_api_compliant
elif sklearn_check_version("1.2"):
return sklearn_get_namespace(*arrays)
return sklearn_get_namespace(
*arrays, remove_none=remove_none, remove_types=remove_types, xp=xp
)
else:
return np, False
Loading