Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add model server raw deployment smoke test and ability to provide multiple default queries based on inference type #81

Merged
merged 28 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b362382
Create size-labeler.yml
rnetser Dec 18, 2024
3c6a875
Delete .github/workflows/size-labeler.yml
rnetser Dec 18, 2024
ccb63af
Merge branch 'main' of github.com:rnetser/opendatahub-tests
rnetser Dec 24, 2024
da0c898
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 25, 2024
94a82ec
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 26, 2024
c0c82dd
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 27, 2024
5feb447
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 30, 2024
19b9c56
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
e22ac1a
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
56ab9c5
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
5a17f03
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
ef5fe65
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Dec 31, 2024
1875a44
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 1, 2025
840d442
Merge branch 'main' of https://github.com/opendatahub-io/opendatahub-…
rnetser Jan 2, 2025
b6b2372
add bge-large-en-v1.5-caikit raw deployment smoke test
rnetser Jan 2, 2025
bf365be
add bge-large-en-v1.5-caikit raw deployment smoke test
rnetser Jan 2, 2025
88c0545
add infer data
rnetser Jan 2, 2025
1d67045
add infer data
rnetser Jan 2, 2025
9dcfcd1
add infer data
rnetser Jan 2, 2025
8b1ce99
add multi reponses
rnetser Jan 2, 2025
c8d1eb7
fix queries
rnetser Jan 2, 2025
d5d075a
fix default query and update doc
rnetser Jan 2, 2025
aca061a
fix default query and update doc
rnetser Jan 2, 2025
c40931c
fix default query and update doc
rnetser Jan 2, 2025
aca20ba
fix default query and update doc
rnetser Jan 2, 2025
82192a6
fix default query and update doc
rnetser Jan 2, 2025
4c26f95
fix default query and update doc
rnetser Jan 2, 2025
dc6202d
fix key name
rnetser Jan 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ To add a new runtime, you need to:
1. Add a new file under [manifests](utilities/manifests) directory.
2. Add `<runtime>_INFERENCE_CONFIG` dict with:
```code
"default_query_model": {
"support_multi_default_queries": True|False, # Optioanl, if set to True, `default_query_model` should contains a dict with corresponding inference_type
"default_query_model": {
"query_input": <default query to be sent to the model>,
"query_output": <expected output>,
"use_regex": True|False, # Optional, if set to True, `query_output` should be a regex
},
"<query type, for example: all-tokens>": {
"<protocol, for example HTTP>": {
Expand All @@ -60,3 +62,4 @@ To add a new runtime, you need to:
```
3. Add a new entry to [ModelInferenceRuntime](utilities.constants.ModelInferenceRuntime)
4. Add the new entry to [Mapping](utilities.constants.ModelInferenceRuntime.MAPPING)
5. See [caikit_standalone](utilities/manifests/caikit_standalone.py) for an example
10 changes: 5 additions & 5 deletions tests/model_serving/model_server/authentication/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from pyhelper_utils.shell import run_command
from pytest_testconfig import config as py_config

from utilities.infra import create_isvc_view_role, create_ns, s3_endpoint_secret
from tests.model_serving.model_server.utils import create_isvc, get_pods_by_isvc_label
from utilities.infra import create_isvc_view_role, create_ns, get_pods_by_isvc_label, s3_endpoint_secret
from tests.model_serving.model_server.utils import create_isvc
from utilities.constants import (
KServeDeploymentType,
ModelFormat,
Expand Down Expand Up @@ -199,17 +199,17 @@ def http_s3_caikit_serverless_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
http_s3_caikit_serving_runtime: ServingRuntime,
http_s3_caikit_tgis_serving_runtime: ServingRuntime,
s3_models_storage_uri: str,
http_model_service_account: ServiceAccount,
) -> InferenceService:
with create_isvc(
client=admin_client,
name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}",
namespace=model_namespace.name,
runtime=http_s3_caikit_serving_runtime.name,
runtime=http_s3_caikit_tgis_serving_runtime.name,
storage_uri=s3_models_storage_uri,
model_format=http_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name,
model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
deployment_mode=KServeDeploymentType.SERVERLESS,
model_service_account=http_model_service_account.name,
enable_auth=True,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest

from tests.model_serving.model_server.authentication.utils import (
verify_inference_response,
)
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import ModelFormat, ModelStoragePath, Protocols, ModelInferenceRuntime
from utilities.inference_utils import Inference

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from tests.model_serving.model_server.authentication.utils import verify_inference_response
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import ModelFormat, ModelStoragePath, Protocols, ModelInferenceRuntime
from utilities.inference_utils import Inference

Expand Down
95 changes: 0 additions & 95 deletions tests/model_serving/model_server/authentication/utils.py

This file was deleted.

3 changes: 2 additions & 1 deletion tests/model_serving/model_server/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def http_model_service_account(admin_client: DynamicClient, models_endpoint_s3_s


@pytest.fixture(scope="class")
def http_s3_caikit_serving_runtime(
def http_s3_caikit_tgis_serving_runtime(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
) -> ServingRuntime:
Expand Down
6 changes: 2 additions & 4 deletions tests/model_serving/model_server/model_car/test_oci_image.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import pytest

from tests.model_serving.model_server.authentication.utils import (
verify_inference_response,
)
from tests.model_serving.model_server.utils import get_pods_by_isvc_label
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.infra import get_pods_by_isvc_label
from utilities.constants import ModelName, Protocols, ModelInferenceRuntime
from utilities.inference_utils import Inference

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest

from tests.model_serving.model_server.authentication.utils import (
verify_inference_response,
)
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import (
ModelFormat,
ModelVersion,
Expand Down Expand Up @@ -31,7 +29,7 @@
],
indirect=True,
)
class TestOpenVINO:
class TestONNXServerless:
@pytest.mark.smoke
@pytest.mark.jira("RHOAIENG-9045")
def test_serverless_onnx_rest_inference(self, ovms_serverless_inference_service):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest

from tests.model_serving.model_server.authentication.utils import (
verify_inference_response,
)
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import (
ModelAndFormat,
ModelFormat,
Expand Down Expand Up @@ -33,7 +31,7 @@
],
indirect=True,
)
class TestOpenVINO:
class TestOpenVINOServerless:
@pytest.mark.smoke
@pytest.mark.polarion("ODS-2626")
def test_serverless_openvino_rest_inference(self, ovms_serverless_inference_service):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest

from tests.model_serving.model_server.authentication.utils import (
verify_inference_response,
)
from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import (
ModelStoragePath,
Protocols,
Expand All @@ -22,7 +20,7 @@
],
indirect=True,
)
class TestOpenVINO:
class TestOpenVINOModelMesh:
@pytest.mark.smoke
@pytest.mark.polarion("ODS-2053", "ODS-2054")
def test_model_mesh_openvino_rest_inference(self, http_s3_openvino_model_mesh_inference_service):
Expand Down
68 changes: 59 additions & 9 deletions tests/model_serving/model_server/raw_deployment/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,26 @@
from ocp_resources.serving_runtime import ServingRuntime

from tests.model_serving.model_server.utils import create_isvc
from utilities.constants import KServeDeploymentType, ModelFormat, Protocols
from utilities.constants import KServeDeploymentType, ModelInferenceRuntime, Protocols, RuntimeTemplates
from utilities.serving_runtime import ServingRuntimeFromTemplate


@pytest.fixture(scope="class")
def http_s3_caikit_raw_inference_service(
def http_s3_caikit_tgis_raw_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
http_s3_caikit_serving_runtime: ServingRuntime,
http_s3_caikit_tgis_serving_runtime: ServingRuntime,
s3_models_storage_uri: str,
http_model_service_account: ServiceAccount,
) -> InferenceService:
isvc_kwargs = {
"client": admin_client,
"name": f"{Protocols.HTTP}-{ModelFormat.CAIKIT}",
"name": request.param["name"],
"namespace": model_namespace.name,
"runtime": http_s3_caikit_serving_runtime.name,
"runtime": http_s3_caikit_tgis_serving_runtime.name,
"storage_uri": s3_models_storage_uri,
"model_format": http_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name,
"model_format": http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
"model_service_account": http_model_service_account.name,
"deployment_mode": KServeDeploymentType.RAW_DEPLOYMENT,
}
Expand All @@ -44,15 +45,64 @@ def http_s3_caikit_raw_inference_service(

@pytest.fixture()
def patched_http_s3_caikit_raw_isvc_visibility_label(
request: FixtureRequest, admin_client: DynamicClient, http_s3_caikit_raw_inference_service: InferenceService
request: FixtureRequest, admin_client: DynamicClient, http_s3_caikit_tgis_raw_inference_service: InferenceService
) -> InferenceService:
with ResourceEditor(
patches={
http_s3_caikit_raw_inference_service: {
http_s3_caikit_tgis_raw_inference_service: {
"metadata": {
"labels": {"networking.kserve.io/visibility": request.param["visibility"]},
}
}
}
):
yield http_s3_caikit_raw_inference_service
yield http_s3_caikit_tgis_raw_inference_service


@pytest.fixture(scope="class")
def http_s3_caikit_standalone_serving_runtime(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
) -> ServingRuntime:
with ServingRuntimeFromTemplate(
client=admin_client,
name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME}",
namespace=model_namespace.name,
template_name=RuntimeTemplates.CAIKIT_STANDALONE_SERVING,
multi_model=False,
enable_http=True,
enable_grpc=False,
) as model_runtime:
yield model_runtime


@pytest.fixture(scope="class")
def http_s3_caikit_standalone_raw_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
http_s3_caikit_standalone_serving_runtime: ServingRuntime,
s3_models_storage_uri: str,
http_model_service_account: ServiceAccount,
) -> InferenceService:
isvc_kwargs = {
"client": admin_client,
"name": request.param["name"],
"namespace": model_namespace.name,
"runtime": http_s3_caikit_standalone_serving_runtime.name,
"storage_uri": s3_models_storage_uri,
"model_format": http_s3_caikit_standalone_serving_runtime.instance.spec.supportedModelFormats[0].name,
"model_service_account": http_model_service_account.name,
"deployment_mode": KServeDeploymentType.RAW_DEPLOYMENT,
}

enable_auth = False

if hasattr(request, "param"):
enable_auth = request.param.get("enable-auth")

isvc_kwargs["enable_auth"] = enable_auth

with create_isvc(**isvc_kwargs) as isvc:
yield isvc
Loading
Loading