Skip to content

Commit

Permalink
[model server] Refactor fixtures to re-use code (#88)
Browse files Browse the repository at this point in the history
* Create size-labeler.yml

* Delete .github/workflows/size-labeler.yml

* re-use fixtures

* re-use fixtures

* fix fixture name

* fix kwargs names

* fix wait replicas model mesh

* fix onnx response

* fix onnx response

* fix caikit expected, remove spaces
  • Loading branch information
rnetser authored Jan 8, 2025
1 parent 0183c30 commit c7bd165
Show file tree
Hide file tree
Showing 11 changed files with 168 additions and 183 deletions.
38 changes: 27 additions & 11 deletions tests/model_serving/model_server/authentication/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,27 +108,25 @@ def http_view_role(
def http_role_binding(
admin_client: DynamicClient,
http_view_role: Role,
http_model_service_account: ServiceAccount,
model_service_account: ServiceAccount,
http_s3_caikit_serverless_inference_service: InferenceService,
) -> RoleBinding:
with RoleBinding(
client=admin_client,
namespace=http_model_service_account.namespace,
name=f"{Protocols.HTTP}-{http_model_service_account.name}-view",
namespace=model_service_account.namespace,
name=f"{Protocols.HTTP}-{model_service_account.name}-view",
role_ref_name=http_view_role.name,
role_ref_kind=http_view_role.kind,
subjects_kind=http_model_service_account.kind,
subjects_name=http_model_service_account.name,
subjects_kind=model_service_account.kind,
subjects_name=model_service_account.name,
) as rb:
yield rb


@pytest.fixture(scope="class")
def http_inference_token(http_model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str:
def http_inference_token(model_service_account: ServiceAccount, http_role_binding: RoleBinding) -> str:
return run_command(
command=shlex.split(
f"oc create token -n {http_model_service_account.namespace} {http_model_service_account.name}"
)
command=shlex.split(f"oc create token -n {model_service_account.namespace} {model_service_account.name}")
)[1].strip()


Expand Down Expand Up @@ -201,7 +199,7 @@ def http_s3_caikit_serverless_inference_service(
model_namespace: Namespace,
http_s3_caikit_tgis_serving_runtime: ServingRuntime,
s3_models_storage_uri: str,
http_model_service_account: ServiceAccount,
model_service_account: ServiceAccount,
) -> InferenceService:
with create_isvc(
client=admin_client,
Expand All @@ -211,7 +209,7 @@ def http_s3_caikit_serverless_inference_service(
storage_uri=s3_models_storage_uri,
model_format=http_s3_caikit_tgis_serving_runtime.instance.spec.supportedModelFormats[0].name,
deployment_mode=KServeDeploymentType.SERVERLESS,
model_service_account=http_model_service_account.name,
model_service_account=model_service_account.name,
enable_auth=True,
) as isvc:
yield isvc
Expand Down Expand Up @@ -301,3 +299,21 @@ def unprivileged_s3_caikit_serverless_inference_service(
model_service_account=unprivileged_model_service_account.name,
) as isvc:
yield isvc


@pytest.fixture(scope="class")
def http_s3_caikit_tgis_serving_runtime(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
) -> ServingRuntime:
with ServingRuntimeFromTemplate(
client=admin_client,
name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
namespace=model_namespace.name,
template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING,
multi_model=False,
enable_http=True,
enable_grpc=False,
) as model_runtime:
yield model_runtime
78 changes: 50 additions & 28 deletions tests/model_serving/model_server/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from ocp_resources.cluster_service_version import ClusterServiceVersion
from ocp_resources.inference_service import InferenceService
from ocp_resources.namespace import Namespace
from ocp_resources.secret import Secret
from ocp_resources.service_account import ServiceAccount
from ocp_resources.serving_runtime import ServingRuntime

from utilities.constants import Protocols, ModelInferenceRuntime, RuntimeTemplates
from tests.model_serving.model_server.utils import create_isvc
from utilities.infra import s3_endpoint_secret
from utilities.serving_runtime import ServingRuntimeFromTemplate

Expand Down Expand Up @@ -57,50 +58,71 @@ def models_endpoint_s3_secret(

# HTTP model serving
@pytest.fixture(scope="class")
def http_model_service_account(admin_client: DynamicClient, models_endpoint_s3_secret: Secret) -> ServiceAccount:
def model_service_account(admin_client: DynamicClient, models_endpoint_s3_secret: Secret) -> ServiceAccount:
with ServiceAccount(
client=admin_client,
namespace=models_endpoint_s3_secret.namespace,
name=f"{Protocols.HTTP}-models-bucket-sa",
name="models-bucket-sa",
secrets=[{"name": models_endpoint_s3_secret.name}],
) as sa:
yield sa


@pytest.fixture(scope="class")
def http_s3_caikit_tgis_serving_runtime(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
) -> ServingRuntime:
with ServingRuntimeFromTemplate(
client=admin_client,
name=f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_TGIS_RUNTIME}",
namespace=model_namespace.name,
template_name=RuntimeTemplates.CAIKIT_TGIS_SERVING,
multi_model=False,
enable_http=True,
enable_grpc=False,
) as model_runtime:
yield model_runtime


@pytest.fixture(scope="class")
def serving_runtime_from_template(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
) -> Generator[ServingRuntime, Any, Any]:
with ServingRuntimeFromTemplate(
client=admin_client,
name=request.param["name"],
namespace=model_namespace.name,
template_name=request.param["template-name"],
multi_model=request.param["multi-model"],
) as model_runtime:
runtime_kwargs = {
"client": admin_client,
"name": request.param["name"],
"namespace": model_namespace.name,
"template_name": request.param["template-name"],
"multi_model": request.param["multi-model"],
}

if enable_http := request.param.get("enable-http") is not None:
runtime_kwargs["enable_http"] = enable_http

if enable_grpc := request.param.get("enable-grpc") is not None:
runtime_kwargs["enable_grpc"] = enable_grpc

with ServingRuntimeFromTemplate(**runtime_kwargs) as model_runtime:
yield model_runtime


@pytest.fixture(scope="class")
def ci_s3_storage_uri(request: FixtureRequest, ci_s3_bucket_name: str) -> str:
return f"s3://{ci_s3_bucket_name}/{request.param['model-dir']}/"


@pytest.fixture(scope="class")
def s3_models_inference_service(
request: FixtureRequest,
admin_client: DynamicClient,
model_namespace: Namespace,
serving_runtime_from_template: ServingRuntime,
s3_models_storage_uri: str,
model_service_account: ServiceAccount,
) -> InferenceService:
isvc_kwargs = {
"client": admin_client,
"name": request.param["name"],
"namespace": model_namespace.name,
"runtime": serving_runtime_from_template.name,
"storage_uri": s3_models_storage_uri,
"model_format": serving_runtime_from_template.instance.spec.supportedModelFormats[0].name,
"model_service_account": model_service_account.name,
"deployment_mode": request.param["deployment-mode"],
}

enable_auth = False

if hasattr(request, "param"):
enable_auth = request.param.get("enable-auth")

isvc_kwargs["enable_auth"] = enable_auth

with create_isvc(**isvc_kwargs) as isvc:
yield isvc
108 changes: 0 additions & 108 deletions tests/model_serving/model_server/raw_deployment/conftest.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@

from tests.model_serving.model_server.utils import verify_inference_response
from utilities.constants import (
KServeDeploymentType,
ModelName,
ModelStoragePath,
Protocols,
ModelInferenceRuntime,
RuntimeTemplates,
)

pytestmark = pytest.mark.usefixtures("valid_aws_config")
Expand All @@ -15,45 +17,49 @@
@pytest.mark.raw_deployment
@pytest.mark.jira("RHOAIENG-11749")
@pytest.mark.parametrize(
"model_namespace, s3_models_storage_uri, http_s3_caikit_standalone_raw_inference_service",
"model_namespace, s3_models_storage_uri, serving_runtime_from_template, s3_models_inference_service",
[
pytest.param(
{"name": "raw-deployment-caikit-bge"},
{"model-dir": ModelStoragePath.EMBEDDING_MODEL},
{"name": "bge-large-en-caikit"},
{
"name": f"{Protocols.HTTP}-{ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME}",
"template-name": RuntimeTemplates.CAIKIT_STANDALONE_SERVING,
"multi-model": False,
"enable-http": True,
},
{"name": "bge-large-en-caikit", "deployment-mode": KServeDeploymentType.RAW_DEPLOYMENT},
)
],
indirect=True,
)
class TestBgeLargeEnCaikit:
def test_caikit_bge_large_en_embedding_raw_internal_route(self, http_s3_caikit_standalone_raw_inference_service):
def test_caikit_bge_large_en_embedding_raw_internal_route(self, s3_models_inference_service):
"""Test Caikit bge-large-en embedding model inference using internal route"""
verify_inference_response(
inference_service=http_s3_caikit_standalone_raw_inference_service,
inference_service=s3_models_inference_service,
runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
inference_type="embedding",
protocol=Protocols.HTTP,
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
use_default_query=True,
)

def test_caikit_bge_large_en_rerank_raw_internal_route(self, http_s3_caikit_standalone_raw_inference_service):
def test_caikit_bge_large_en_rerank_raw_internal_route(self, s3_models_inference_service):
"""Test Caikit bge-large-en rerank model inference using internal route"""
verify_inference_response(
inference_service=http_s3_caikit_standalone_raw_inference_service,
inference_service=s3_models_inference_service,
runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
inference_type="rerank",
protocol=Protocols.HTTP,
model_name=ModelName.CAIKIT_BGE_LARGE_EN,
use_default_query=True,
)

def test_caikit_bge_large_en_sentence_similarity_raw_internal_route(
self, http_s3_caikit_standalone_raw_inference_service
):
def test_caikit_bge_large_en_sentence_similarity_raw_internal_route(self, s3_models_inference_service):
"""Test Caikit bge-large-en sentence-similarity model inference using internal route"""
verify_inference_response(
inference_service=http_s3_caikit_standalone_raw_inference_service,
inference_service=s3_models_inference_service,
runtime=ModelInferenceRuntime.CAIKIT_STANDALONE_RUNTIME,
inference_type="sentence-similarity",
protocol=Protocols.HTTP,
Expand Down
Empty file.
23 changes: 23 additions & 0 deletions tests/model_serving/model_server/routes/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest
from _pytest.fixtures import FixtureRequest
from kubernetes.dynamic import DynamicClient
from ocp_resources.inference_service import InferenceService
from ocp_resources.resource import ResourceEditor


@pytest.fixture()
def patched_http_s3_caikit_raw_isvc_visibility_label(
request: FixtureRequest,
admin_client: DynamicClient,
s3_models_inference_service: InferenceService,
) -> InferenceService:
with ResourceEditor(
patches={
s3_models_inference_service: {
"metadata": {
"labels": {"networking.kserve.io/visibility": request.param["visibility"]},
}
}
}
):
yield s3_models_inference_service
Loading

0 comments on commit c7bd165

Please sign in to comment.