generated from RedHatQE/python-template-repository
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
317 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 0 additions & 27 deletions
27
tests/model_serving/model_server/multi_node/test_multi_node.py
This file was deleted.
Oops, something went wrong.
56 changes: 56 additions & 0 deletions
56
tests/model_serving/model_server/multi_node/test_nvidia_multi_node.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import pytest | ||
|
||
from tests.model_serving.model_server.multi_node.utils import verify_nvidia_gpu_status, verify_ray_status | ||
from tests.model_serving.model_server.utils import verify_inference_response | ||
from utilities.constants import ModelInferenceRuntime, Protocols, StorageClassName | ||
|
||
pytestmark = pytest.mark.usefixtures("skip_if_no_gpu_nodes", "skip_if_no_nfs_storage_class") | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"model_namespace, models_bucket_downloaded_model_data, model_pvc, " | ||
"serving_runtime_from_template, multi_node_inference_service", | ||
[ | ||
pytest.param( | ||
{"name": "gpu-multi-node"}, | ||
{"model-dir": "granite-8b-code-base"}, | ||
{ | ||
"access-modes": "ReadWriteMany", | ||
"storage-class-name": StorageClassName.NFS, | ||
"pvc-size": "40Gi", | ||
}, | ||
{ | ||
"name": "granite-runtime", | ||
"template-name": "vllm-multinode-runtime-template", | ||
"multi-model": False, | ||
}, | ||
{"name": "multi-vllm"}, | ||
) | ||
], | ||
indirect=True, | ||
) | ||
class TestMultiNode: | ||
def test_multi_node_ray_status(self, multi_node_predictor_pods_scope_class): | ||
"""Test multi node ray status""" | ||
verify_ray_status(pods=multi_node_predictor_pods_scope_class) | ||
|
||
def test_multi_node_nvidia_gpu_status(self, multi_node_predictor_pods_scope_class): | ||
"""Test multi node ray status""" | ||
verify_nvidia_gpu_status(pod=multi_node_predictor_pods_scope_class[0]) | ||
|
||
def test_multi_node_default_config(self, serving_runtime_from_template, multi_node_predictor_pods_scope_class): | ||
"""Test multi node inference service with default config""" | ||
runtime_worker_spec = serving_runtime_from_template.instance.spec.workerSpec | ||
|
||
if runtime_worker_spec.tensorParallelSize != 1 or runtime_worker_spec.pipelineParallelSize != 2: | ||
pytest.fail("Multinode runtime default worker spec is not as expected, {runtime_worker_spec") | ||
|
||
def test_multi_node_basic_inference(self, multi_node_inference_service): | ||
"""Test multi node basic inference""" | ||
verify_inference_response( | ||
inference_service=multi_node_inference_service, | ||
runtime=ModelInferenceRuntime.VLLM_RUNTIME, | ||
inference_type="completions", | ||
protocol=Protocols.HTTP, | ||
use_default_query=True, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import re | ||
import shlex | ||
from typing import Dict, List | ||
|
||
from ocp_resources.pod import Pod | ||
|
||
|
||
def verify_ray_status(pods: List[Pod]) -> None: | ||
cmd = shlex.split("ray status") | ||
ray_failures: Dict[str, List[str]] = {} | ||
res = None | ||
for pod in pods: | ||
res = pod.execute(command=cmd) | ||
if res_regex := re.search( | ||
r"Active:\n(?P<active>.*)\nPending:\n(?P<pending>.*)\nRecent.*CPU\n(?P<gpu>.*)GPU", | ||
res, | ||
re.IGNORECASE | re.DOTALL, | ||
): | ||
ray_formatted_result = res_regex.groupdict() | ||
if len(ray_formatted_result["active"].split("\n")) != len(pods): | ||
ray_failures.setdefault(pod.name, []).append("Wrong number of active nodes") | ||
|
||
if "no pending nodes" not in ray_formatted_result["pending"]: | ||
ray_failures.setdefault(pod.name, []).append("Some nodes are pending") | ||
|
||
if (gpus := ray_formatted_result["gpu"].strip().split("/")) and gpus[0] != gpus[1]: | ||
ray_failures.setdefault(pod.name, []).append("Wrong number of GPUs") | ||
|
||
assert not ray_failures, f"Failure in ray status check: {ray_failures}, {res}" | ||
|
||
|
||
def verify_nvidia_gpu_status(pod: Pod) -> None: | ||
res = pod.execute(command=shlex.split("nvidia-smi --query-gpu=memory.used --format=csv")) | ||
mem_regex = re.search(r"(\d+)", res) | ||
|
||
if not mem_regex: | ||
raise ValueError(f"Could not find memory usage in response, {res}") | ||
|
||
elif mem_regex and int(mem_regex.group(1)) == 0: | ||
raise ValueError(f"GPU memory is not used, {res}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.