From 850e53f99db100aae7e202f136803fd9dd0ee7d6 Mon Sep 17 00:00:00 2001 From: tarukumar <93319437+tarukumar@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:52:26 +0530 Subject: [PATCH] Add support for rhelai granite-7b-redhat-lab (#1690) Signed-off-by: Tarun Kumar --- .../1007__model_serving_llm_models.robot | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot b/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot index 025fbe4e0..e10f6bd98 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot @@ -1180,6 +1180,66 @@ Verify User Can Serve And Query Granite-7b Speculative Decoding Using Draft Mode ... AND ... Run Keyword If "${KSERVE_MODE}"=="RawDeployment" Terminate Process llm-query-process kill=true +Verify User Can Serve And Query RHAL AI Granite-7b-redhat-lab Model + [Documentation] Basic tests for preparing, deploying and querying a LLM model + ... using Kserve using vllm runtime + [Tags] RHOAIENG-10155 VLLM + Setup Test Variables model_name=granite-7b-lab use_pvc=${USE_PVC} use_gpu=${USE_GPU} + ... kserve_mode=${KSERVE_MODE} model_path=granite-7b-redhat-lab + Set Project And Runtime runtime=${RUNTIME_NAME} namespace=${test_namespace} + ... download_in_pvc=${DOWNLOAD_IN_PVC} model_name=${model_name} protocol=${PROTOCOL} + ... storage_size=40Gi model_path=${model_path} + ${requests}= Create Dictionary memory=40Gi + IF "${OVERLAY}" != "${EMPTY}" + ${overlays}= Create List ${OVERLAY} + ELSE + ${overlays}= Create List + END + Compile Inference Service YAML isvc_name=${model_name} + ... sa_name=${EMPTY} + ... model_storage_uri=${storage_uri} + ... model_format=${MODEL_FORMAT} serving_runtime=${RUNTIME_NAME} + ... limits_dict=${limits} requests_dict=${requests} kserve_mode=${KSERVE_MODE} + ... overlays=${overlays} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Model KServe Deployment To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} + ... namespace=${test_namespace} runtime=${RUNTIME_NAME} timeout=900s + ${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name} + Run Keyword If "${KSERVE_MODE}"=="RawDeployment" + ... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name} + IF "${RUNTIME_NAME}" == "tgis-runtime" or "${KSERVE_MODE}" == "RawDeployment" + Set Test Variable ${RUNTIME_NAME} tgis-runtime + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 protocol=${PROTOCOL} + ... namespace=${test_namespace} query_idx=0 validate_response=${FALSE} # temp + ... port_forwarding=${use_port_forwarding} + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} + ... inference_type=streaming n_times=1 protocol=${PROTOCOL} + ... namespace=${test_namespace} query_idx=0 validate_response=${FALSE} + ... port_forwarding=${use_port_forwarding} + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} + ... inference_type=model-info n_times=0 + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... port_forwarding=${use_port_forwarding} + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} + ... inference_type=tokenize n_times=0 query_idx=0 + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... port_forwarding=${use_port_forwarding} + ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless" + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http + ... inference_type=chat-completions n_times=1 query_idx=12 + ... namespace=${test_namespace} string_check_only=${TRUE} + Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http + ... inference_type=completions n_times=1 query_idx=11 + ... namespace=${test_namespace} string_check_only=${TRUE} + END + [Teardown] Run Keywords + ... Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + ... kserve_mode=${KSERVE_MODE} + ... AND + ... Run Keyword If "${KSERVE_MODE}"=="RawDeployment" Terminate Process llm-query-process kill=true *** Keywords *** Suite Setup