[model server] Refactor fixtures to re-use code (#88)

* Create size-labeler.yml * Delete .github/workflows/size-labeler.yml * re-use fixtures * re-use fixtures * fix fixture name * fix kwargs names * fix wait replicas model mesh * fix onnx response * fix onnx response * fix caikit expected, remove spaces
opendatahub-io · Jan 8, 2025 · c7bd165 · c7bd165
1 parent 0183c30
commit c7bd165
Show file tree

Hide file tree

Showing 11 changed files with 168 additions and 183 deletions.
diff --git a/tests/model_serving/model_server/authentication/conftest.py b/tests/model_serving/model_server/authentication/conftest.py
@@ -108,27 +108,25 @@ def http_view_role(
 def http_role_binding(
     admin_client: DynamicClient,
     http_view_role: Role,
-    http_model_service_account: ServiceAccount,
+    model_service_account: ServiceAccount,
     http_s3_caikit_serverless_inference_service: InferenceService,
 ) -> RoleBinding:
     with RoleBinding(
         client=admin_client,
-        namespace=http_model_service_account.namespace,
-        name=f"{Protocols.HTTP}-{http_model_service_account.name}-view",
+        namespace=model_service_account.namespace,
+        name=f"{Protocols.HTTP}-{model_service_account.name}-view",
         role_ref_name=http_view_role.name,
         role_ref_kind=http_view_role.kind,
-        subjects_kind=http_model_service_account.kind,
-        subjects_name=http_model_service_account.name,
+        subjects_kind=model_service_account.kind,
+        subjects_name=model_service_account.name,
     ) as rb:
         yield rb
 
 
 @pytest.fixture(scope="class")
-def http_inference_token(http_model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str:
+def http_inference_token(model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str:
     return run_command(
-        command=shlex.split(
-            f"oc create token -n {http_model_service_account.namespace} {http_model_service_account.name}"
-        )
+        command=shlex.split(f"oc create token -n {model_service_account.namespace} {model_service_account.name}")
     )[1].strip()
 
 
@@ -201,7 +199,7 @@ def http_s3_caikit_serverless_inference_service(
     model_namespace: Namespace,
     http_s3_caikit_tgis_serving_runtime: ServingRuntime,
     s3_models_storage_uri: str,
-    http_model_service_account: ServiceAccount,
+    model_service_account: ServiceAccount,
 ) -> InferenceService:
     with create_isvc(
         client=admin_client,
@@ -211,7 +209,7 @@ def http_s3_caikit_serverless_inference_service(
         storage_uri=s3_models_storage_uri,
         model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
         deployment_mode=KServeDeploymentType.SERVERLESS,
-        model_service_account=http_model_service_account.name,
+        model_service_account=model_service_account.name,
         enable_auth=True,
     ) as isvc:
         yield isvc
@@ -301,3 +299,21 @@ def unprivileged_s3_caikit_serverless_inference_service(
         model_service_account=unprivileged_model_service_account.name,
     ) as isvc:
         yield isvc
+
+
+@pytest.fixture(scope="class")
+def http_s3_caikit_tgis_serving_runtime(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+) -> ServingRuntime:
+    with ServingRuntimeFromTemplate(
+        client=admin_client,
+        name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
+        namespace=model_namespace.name,
+        template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING,
+        multi_model=False,
+        enable_http=True,
+        enable_grpc=False,
+    ) as model_runtime:
+        yield model_runtime
diff --git a/tests/model_serving/model_server/conftest.py b/tests/model_serving/model_server/conftest.py
@@ -4,12 +4,13 @@
 from _pytest.fixtures import FixtureRequest
 from kubernetes.dynamic import DynamicClient
 from ocp_resources.cluster_service_version import ClusterServiceVersion
+from ocp_resources.inference_service import InferenceService
 from ocp_resources.namespace import Namespace
 from ocp_resources.secret import Secret
 from ocp_resources.service_account import ServiceAccount
 from ocp_resources.serving_runtime import ServingRuntime
 
-from utilities.constants import Protocols, ModelInferenceRuntime, RuntimeTemplates
+from tests.model_serving.model_server.utils import create_isvc
 from utilities.infra import s3_endpoint_secret
 from utilities.serving_runtime import ServingRuntimeFromTemplate
 
@@ -57,50 +58,71 @@ def models_endpoint_s3_secret(
 
 # HTTP model serving
 @pytest.fixture(scope="class")
-def http_model_service_account(admin_client: DynamicClient, models_endpoint_s3_secret: Secret) -> ServiceAccount:
+def model_service_account(admin_client: DynamicClient, models_endpoint_s3_secret: Secret) -> ServiceAccount:
     with ServiceAccount(
         client=admin_client,
         namespace=models_endpoint_s3_secret.namespace,
-        name=f"{Protocols.HTTP}-models-bucket-sa",
+        name="models-bucket-sa",
         secrets=[{"name": models_endpoint_s3_secret.name}],
     ) as sa:
         yield sa
 
 
-@pytest.fixture(scope="class")
-def http_s3_caikit_tgis_serving_runtime(
-    request: FixtureRequest,
-    admin_client: DynamicClient,
-    model_namespace: Namespace,
-) -> ServingRuntime:
-    with ServingRuntimeFromTemplate(
-        client=admin_client,
-        name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
-        namespace=model_namespace.name,
-        template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING,
-        multi_model=False,
-        enable_http=True,
-        enable_grpc=False,
-    ) as model_runtime:
-        yield model_runtime
-
-
 @pytest.fixture(scope="class")
 def serving_runtime_from_template(
     request: FixtureRequest,
     admin_client: DynamicClient,
     model_namespace: Namespace,
 ) -> Generator[ServingRuntime, Any, Any]:
-    with ServingRuntimeFromTemplate(
-        client=admin_client,
-        name=request.param["name"],
-        namespace=model_namespace.name,
-        template_name=request.param["template-name"],
-        multi_model=request.param["multi-model"],
-    ) as model_runtime:
+    runtime_kwargs = {
+        "client": admin_client,
+        "name": request.param["name"],
+        "namespace": model_namespace.name,
+        "template_name": request.param["template-name"],
+        "multi_model": request.param["multi-model"],
+    }
+
+    if enable_http := request.param.get("enable-http") is not None:
+        runtime_kwargs["enable_http"] = enable_http
+
+    if enable_grpc := request.param.get("enable-grpc") is not None:
+        runtime_kwargs["enable_grpc"] = enable_grpc
+
+    with ServingRuntimeFromTemplate(**runtime_kwargs) as model_runtime:
         yield model_runtime
 
 
 @pytest.fixture(scope="class")
 def ci_s3_storage_uri(request: FixtureRequest, ci_s3_bucket_name: str) -> str:
     return f"s3://{ci_s3_bucket_name}/{request.param['model-dir']}/"
+
+
+@pytest.fixture(scope="class")
+def s3_models_inference_service(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    model_namespace: Namespace,
+    serving_runtime_from_template: ServingRuntime,
+    s3_models_storage_uri: str,
+    model_service_account: ServiceAccount,
+) -> InferenceService:
+    isvc_kwargs = {
+        "client": admin_client,
+        "name": request.param["name"],
+        "namespace": model_namespace.name,
+        "runtime": serving_runtime_from_template.name,
+        "storage_uri": s3_models_storage_uri,
+        "model_format": serving_runtime_from_template.instance.spec.supportedModelFormats[0].name,
+        "model_service_account": model_service_account.name,
+        "deployment_mode": request.param["deployment-mode"],
+    }
+
+    enable_auth = False
+
+    if hasattr(request, "param"):
+        enable_auth = request.param.get("enable-auth")
+
+    isvc_kwargs["enable_auth"] = enable_auth
+
+    with create_isvc(**isvc_kwargs) as isvc:
+        yield isvc
diff --git a/tests/model_serving/model_server/raw_deployment/conftest.py b/tests/model_serving/model_server/raw_deployment/conftest.py
diff --git a/tests/model_serving/model_server/raw_deployment/test_bge_large_eng_caikit.py b/tests/model_serving/model_server/raw_deployment/test_bge_large_eng_caikit.py
@@ -2,10 +2,12 @@
 
 from tests.model_serving.model_server.utils import verify_inference_response
 from utilities.constants import (
+    KServeDeploymentType,
     ModelName,
     ModelStoragePath,
     Protocols,
     ModelInferenceRuntime,
+    RuntimeTemplates,
 )
 
 pytestmark = pytest.mark.usefixtures("valid_aws_config")
@@ -15,45 +17,49 @@
 @pytest.mark.raw_deployment
 @pytest.mark.jira("RHOAIENG-11749")
 @pytest.mark.parametrize(
-    "model_namespace, s3_models_storage_uri, http_s3_caikit_standalone_raw_inference_service",
+    "model_namespace, s3_models_storage_uri, serving_runtime_from_template, s3_models_inference_service",
     [
         pytest.param(
             {"name": "raw-deployment-caikit-bge"},
             {"model-dir": ModelStoragePath.EMBEDDING_MODEL},
-            {"name": "bge-large-en-caikit"},
+            {
+                "name": f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME}",
+                "template-name": RuntimeTemplates.CAIKIT_STANDALONE_SERVING,
+                "multi-model": False,
+                "enable-http": True,
+            },
+            {"name": "bge-large-en-caikit", "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT},
         )
     ],
     indirect=True,
 )
 class TestBgeLargeEnCaikit:
-    def test_caikit_bge_large_en_embedding_raw_internal_route(self, http_s3_caikit_standalone_raw_inference_service):
+    def test_caikit_bge_large_en_embedding_raw_internal_route(self, s3_models_inference_service):
         """Test Caikit bge-large-en embedding model inference using internal route"""
         verify_inference_response(
-            inference_service=http_s3_caikit_standalone_raw_inference_service,
+            inference_service=s3_models_inference_service,
             runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
             inference_type="embedding",
             protocol=Protocols.HTTP,
             model_name=ModelName.CAIKIT_BGE_LARGE_EN,
             use_default_query=True,
         )
 
-    def test_caikit_bge_large_en_rerank_raw_internal_route(self, http_s3_caikit_standalone_raw_inference_service):
+    def test_caikit_bge_large_en_rerank_raw_internal_route(self, s3_models_inference_service):
         """Test Caikit bge-large-en rerank model inference using internal route"""
         verify_inference_response(
-            inference_service=http_s3_caikit_standalone_raw_inference_service,
+            inference_service=s3_models_inference_service,
             runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
             inference_type="rerank",
             protocol=Protocols.HTTP,
             model_name=ModelName.CAIKIT_BGE_LARGE_EN,
             use_default_query=True,
         )
 
-    def test_caikit_bge_large_en_sentence_similarity_raw_internal_route(
-        self, http_s3_caikit_standalone_raw_inference_service
-    ):
+    def test_caikit_bge_large_en_sentence_similarity_raw_internal_route(self, s3_models_inference_service):
         """Test Caikit bge-large-en sentence-similarity model inference using internal route"""
         verify_inference_response(
-            inference_service=http_s3_caikit_standalone_raw_inference_service,
+            inference_service=s3_models_inference_service,
             runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
             inference_type="sentence-similarity",
             protocol=Protocols.HTTP,

diff --git a/tests/model_serving/model_server/routes/__init__.py b/tests/model_serving/model_server/routes/__init__.py
diff --git a/tests/model_serving/model_server/routes/conftest.py b/tests/model_serving/model_server/routes/conftest.py
@@ -0,0 +1,23 @@
+import pytest
+from _pytest.fixtures import FixtureRequest
+from kubernetes.dynamic import DynamicClient
+from ocp_resources.inference_service import InferenceService
+from ocp_resources.resource import ResourceEditor
+
+
+@pytest.fixture()
+def patched_http_s3_caikit_raw_isvc_visibility_label(
+    request: FixtureRequest,
+    admin_client: DynamicClient,
+    s3_models_inference_service: InferenceService,
+) -> InferenceService:
+    with ResourceEditor(
+        patches={
+            s3_models_inference_service: {
+                "metadata": {
+                    "labels": {"networking.kserve.io/visibility": request.param["visibility"]},
+                }
+            }
+        }
+    ):
+        yield s3_models_inference_service