opendatahub-io · rnetser · Jan 2, 2025 · Dec 18, 2024 · Dec 18, 2024 · Dec 24, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -43,9 +43,11 @@ To add a new runtime, you need to:
 1. Add a new file under [manifests](utilities/manifests) directory.
 2. Add `<runtime>_INFERENCE_CONFIG` dict with:
 ```code
-"default_query_model": {
+    "support_multi_default_queries": True|False,  # Optioanl, if set to True, `default_query_model` should contains a dict with corresponding inference_type
+    "default_query_model": {
         "query_input": <default query to be sent to the model>,
         "query_output": <expected output>,
+        "use_regex": True|False, # Optional, if set to True, `query_output` should be a regex
     },
     "<query type, for example: all-tokens>": {
         "<protocol, for example HTTP>": {
@@ -60,3 +62,4 @@ To add a new runtime, you need to:
 ```
 3. Add a new entry to [ModelInferenceRuntime](utilities.constants.ModelInferenceRuntime)
 4. Add the new entry to [Mapping](utilities.constants.ModelInferenceRuntime.MAPPING)
+5. See [caikit_standalone](utilities/manifests/caikit_standalone.py) for an example
diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py
@@ -16,8 +16,8 @@
 from pyhelper_utils.shell import run_command
 from pytest_testconfig import config as py_config
 
-from utilities.infra import create_isvc_view_role, create_ns, s3_endpoint_secret
-from tests.model_serving.model_server.utils import create_isvc, get_pods_by_isvc_label
+from utilities.infra import create_isvc_view_role, create_ns, get_pods_by_isvc_label, s3_endpoint_secret
+from tests.model_serving.model_server.utils import create_isvc
 from utilities.constants import (
     KServeDeploymentType,
     ModelFormat,
@@ -199,17 +199,17 @@ def http_s3_caikit_serverless_inference_service(
     request: FixtureRequest,
     admin_client: DynamicClient,
     model_namespace: Namespace,
-    http_s3_caikit_serving_runtime: ServingRuntime,
+    http_s3_caikit_tgis_serving_runtime: ServingRuntime,
     s3_models_storage_uri: str,
     http_model_service_account: ServiceAccount,
 ) -> InferenceService:
     with create_isvc(
         client=admin_client,
         name=f"{Protocols.HTTP}-{ModelFormat.CAIKIT}",
         namespace=model_namespace.name,
-        runtime=http_s3_caikit_serving_runtime.name,
+        runtime=http_s3_caikit_tgis_serving_runtime.name,
         storage_uri=s3_models_storage_uri,
-        model_format=http_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name,
+        model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
         deployment_mode=KServeDeploymentType.SERVERLESS,
         model_service_account=http_model_service_account.name,
         enable_auth=True,

diff --git a/tests/model_serving/model_server/authentication/test_kserve_token_authentication.py b/tests/model_serving/model_server/authentication/test_kserve_token_authentication.py
@@ -1,8 +1,6 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import (
-    verify_inference_response,
-)
+from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import ModelFormat, ModelStoragePath, Protocols, ModelInferenceRuntime
 from utilities.inference_utils import Inference
 

diff --git a/tests/model_serving/model_server/authentication/test_non_admin_users.py b/tests/model_serving/model_server/authentication/test_non_admin_users.py
@@ -1,6 +1,6 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import verify_inference_response
+from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import ModelFormat, ModelStoragePath, Protocols, ModelInferenceRuntime
 from utilities.inference_utils import Inference
 

diff --git a/tests/model_serving/model_server/authentication/utils.py b/tests/model_serving/model_server/authentication/utils.py
diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py
@@ -68,7 +68,8 @@ def http_model_service_account(admin_client: DynamicClient, models_endpoint_s3_s
 
 
 @pytest.fixture(scope="class")
-def http_s3_caikit_serving_runtime(
+def http_s3_caikit_tgis_serving_runtime(
+    request: FixtureRequest,
     admin_client: DynamicClient,
     model_namespace: Namespace,
 ) -> ServingRuntime:

diff --git a/tests/model_serving/model_server/model_car/test_oci_image.py b/tests/model_serving/model_server/model_car/test_oci_image.py
@@ -1,9 +1,7 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import (
-    verify_inference_response,
-)
-from tests.model_serving.model_server.utils import get_pods_by_isvc_label
+from tests.model_serving.model_server.utils import verify_inference_response
+from utilities.infra import get_pods_by_isvc_label
 from utilities.constants import ModelName, Protocols, ModelInferenceRuntime
 from utilities.inference_utils import Inference
 

diff --git a/tests/model_serving/model_server/ovms/kserve/test_onnx_serverless.py b/tests/model_serving/model_server/ovms/kserve/test_onnx_serverless.py
@@ -1,8 +1,6 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import (
-    verify_inference_response,
-)
+from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import (
     ModelFormat,
     ModelVersion,
@@ -31,7 +29,7 @@
     ],
     indirect=True,
 )
-class TestOpenVINO:
+class TestONNXServerless:
     @pytest.mark.smoke
     @pytest.mark.jira("RHOAIENG-9045")
     def test_serverless_onnx_rest_inference(self, ovms_serverless_inference_service):

diff --git a/tests/model_serving/model_server/ovms/kserve/test_openvino_serverless.py b/tests/model_serving/model_server/ovms/kserve/test_openvino_serverless.py
@@ -1,8 +1,6 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import (
-    verify_inference_response,
-)
+from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import (
     ModelAndFormat,
     ModelFormat,
@@ -33,7 +31,7 @@
     ],
     indirect=True,
 )
-class TestOpenVINO:
+class TestOpenVINOServerless:
     @pytest.mark.smoke
     @pytest.mark.polarion("ODS-2626")
     def test_serverless_openvino_rest_inference(self, ovms_serverless_inference_service):

diff --git a/tests/model_serving/model_server/ovms/model_mesh/test_openvino.py b/tests/model_serving/model_server/ovms/model_mesh/test_openvino.py
@@ -1,8 +1,6 @@
 import pytest
 
-from tests.model_serving.model_server.authentication.utils import (
-    verify_inference_response,
-)
+from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import (
     ModelStoragePath,
     Protocols,
@@ -22,7 +20,7 @@
     ],
     indirect=True,
 )
-class TestOpenVINO:
+class TestOpenVINOModelMesh:
     @pytest.mark.smoke
     @pytest.mark.polarion("ODS-2053", "ODS-2054")
     def test_model_mesh_openvino_rest_inference(self, http_s3_openvino_model_mesh_inference_service):

diff --git a/tests/model_serving/model_server/raw_deployment/conftest.py b/tests/model_serving/model_server/raw_deployment/conftest.py
@@ -8,25 +8,26 @@
 from ocp_resources.serving_runtime import ServingRuntime
 
 from tests.model_serving.model_server.utils import create_isvc
-from utilities.constants import KServeDeploymentType, ModelFormat, Protocols
+from utilities.constants import KServeDeploymentType, ModelInferenceRuntime, Protocols, RuntimeTemplates
+from utilities.serving_runtime import ServingRuntimeFromTemplate
 
 
 @pytest.fixture(scope="class")
-def http_s3_caikit_raw_inference_service(
+def http_s3_caikit_tgis_raw_inference_service(
     request: FixtureRequest,
     admin_client: DynamicClient,
     model_namespace: Namespace,
-    http_s3_caikit_serving_runtime: ServingRuntime,
+    http_s3_caikit_tgis_serving_runtime: ServingRuntime,
     s3_models_storage_uri: str,
     http_model_service_account: ServiceAccount,
 ) -> InferenceService:
     isvc_kwargs = {
         "client": admin_client,
-        "name": f"{Protocols.HTTP}-{ModelFormat.CAIKIT}",
+        "name": request.param["name"],
         "namespace": model_namespace.name,
-        "runtime": http_s3_caikit_serving_runtime.name,
+        "runtime": http_s3_caikit_tgis_serving_runtime.name,
         "storage_uri": s3_models_storage_uri,
-        "model_format": http_s3_caikit_serving_runtime.instance.spec.supportedModelFormats[0].name,
+        "model_format": http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
         "model_service_account": http_model_service_account.name,
         "deployment_mode": KServeDeploymentType.RAW_DEPLOYMENT,
     }
@@ -44,15 +45,64 @@ def http_s3_caikit_raw_inference_service(
 
 @pytest.fixture()
 def patched_http_s3_caikit_raw_isvc_visibility_label(
-    request: FixtureRequest, admin_client: DynamicClient, http_s3_caikit_raw_inference_service: InferenceService
+    request: FixtureRequest, admin_client: DynamicClient, http_s3_caikit_tgis_raw_inference_service: InferenceService
 ) -> InferenceService:
     with ResourceEditor(
         patches={
-            http_s3_caikit_raw_inference_service: {
+            http_s3_caikit_tgis_raw_inference_service: {
                 "metadata": {
                     "labels": {"networking.kserve.io/visibility": request.param["visibility"]},
                 }
             }
         }
     ):
-        yield http_s3_caikit_raw_inference_service
+        yield http_s3_caikit_tgis_raw_inference_service
+
+
+@pytest.fixture(scope="class")
+def http_s3_caikit_standalone_serving_runtime(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+) -> ServingRuntime:
+    with ServingRuntimeFromTemplate(
+        client=admin_client,
+        name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME}",
+        namespace=model_namespace.name,
+        template_name=RuntimeTemplates.CAIKIT_STANDALONE_SERVING,
+        multi_model=False,
+        enable_http=True,
+        enable_grpc=False,
+    ) as model_runtime:
+        yield model_runtime
+
+
+@pytest.fixture(scope="class")
+def http_s3_caikit_standalone_raw_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    http_s3_caikit_standalone_serving_runtime: ServingRuntime,
+    s3_models_storage_uri: str,
+    http_model_service_account: ServiceAccount,
+) -> InferenceService:
+    isvc_kwargs = {
+        "client": admin_client,
+        "name": request.param["name"],
+        "namespace": model_namespace.name,
+        "runtime": http_s3_caikit_standalone_serving_runtime.name,
+        "storage_uri": s3_models_storage_uri,
+        "model_format": http_s3_caikit_standalone_serving_runtime.instance.spec.supportedModelFormats[0].name,
+        "model_service_account": http_model_service_account.name,
+        "deployment_mode": KServeDeploymentType.RAW_DEPLOYMENT,
+    }
+
+    enable_auth = False
+
+    if hasattr(request, "param"):
+        enable_auth = request.param.get("enable-auth")
+
+    isvc_kwargs["enable_auth"] = enable_auth
+
+    with create_isvc(**isvc_kwargs) as isvc:
+        yield isvc