From 7d49d0ba4d5ead016be9dd74553343a560782889 Mon Sep 17 00:00:00 2001 From: Luca Giorgi Date: Wed, 7 Aug 2024 08:01:31 +0200 Subject: [PATCH] Make memory request tunable for pvc download pod (#1695) Signed-off-by: lugi0 --- .../Resources/CLI/ModelServing/llm.resource | 15 ++++++++++----- .../Files/llm/download_model_in_pvc.yaml | 2 +- .../ODH/ODHDashboard/ODHModelServing.resource | 3 +++ ...06__model_serving_ovms_on_kserve_cli.robot | 19 ++++++++++++------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 5490dcd46..46d34c6ca 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -68,12 +68,14 @@ Set Project And Runtime [Arguments] ${namespace} ${enable_metrics}=${FALSE} ${runtime}=caikit-tgis-runtime ${protocol}=grpc ... ${access_key_id}=${S3.AWS_ACCESS_KEY_ID} ${access_key}=${S3.AWS_SECRET_ACCESS_KEY} ... ${endpoint}=${MODELS_BUCKET.ENDPOINT} ${verify_ssl}=${TRUE} - ... ${download_in_pvc}=${FALSE} ${storage_size}=70Gi ${model_name}=${NONE} ${model_path}=${model_name} ${download_timeout}=600s + ... ${download_in_pvc}=${FALSE} ${storage_size}=70Gi ${model_name}=${NONE} + ... ${model_path}=${model_name} ${download_timeout}=600s ${memory_request}=40Gi Set Up Test OpenShift Project test_ns=${namespace} IF ${download_in_pvc} - Create PVC And Download Model From S3 model_name=${model_name} namespace=${namespace} bucket_name=${MODELS_BUCKET.NAME} - ... use_https=${USE_BUCKET_HTTPS} download_timeout=${download_timeout} - ... storage_size=${storage_size} model_path=${model_path} + Create PVC And Download Model From S3 model_name=${model_name} namespace=${namespace} + ... bucket_name=${MODELS_BUCKET.NAME} use_https=${USE_BUCKET_HTTPS} + ... download_timeout=${download_timeout} storage_size=${storage_size} model_path=${model_path} + ... memory_request=${memory_request} ELSE Create Secret For S3-Like Buckets endpoint=${endpoint} ... region=${MODELS_BUCKET.REGION} namespace=${namespace} @@ -734,7 +736,7 @@ Clean Up Test Project Create PVC And Download Model From S3 [Arguments] ${model_name} ${bucket_name} ... ${use_https} ${namespace} ${storage_size} - ... ${model_path} ${download_timeout}=500s + ... ${model_path} ${memory_request} ${download_timeout}=500s Set Log Level NONE Set Test Variable ${model_name} Set Test Variable ${bucket_name} @@ -743,10 +745,13 @@ Create PVC And Download Model From S3 Set Test Variable ${namespace} Set Test Variable ${storage_size} Set Test Variable ${model_path} + Set Test Variable ${memory_request} Set Log Level INFO Create File From Template ${DOWNLOAD_PVC_FILEPATH} ${DOWNLOAD_PVC_FILLED_FILEPATH} ${rc} ${out}= Run And Return Rc And Output oc -n ${namespace} apply -f ${DOWNLOAD_PVC_FILLED_FILEPATH} Should Be Equal As Integers ${rc} ${0} + # No reason to keep this file around once it's applied + Remove File ${DOWNLOAD_PVC_FILLED_FILEPATH} Run Keyword And Continue On Failure Wait For Pods To Be Ready label_selector=name=download-${model_name} namespace=${namespace} Wait For Pods To Succeed label_selector=name=download-${model_name} namespace=${namespace} ... timeout=${download_timeout} diff --git a/ods_ci/tests/Resources/Files/llm/download_model_in_pvc.yaml b/ods_ci/tests/Resources/Files/llm/download_model_in_pvc.yaml index 7bab4db7d..c102acae7 100644 --- a/ods_ci/tests/Resources/Files/llm/download_model_in_pvc.yaml +++ b/ods_ci/tests/Resources/Files/llm/download_model_in_pvc.yaml @@ -33,7 +33,7 @@ spec: containers: - resources: requests: - memory: 40Gi + memory: ${memory_request} name: download-model imagePullPolicy: IfNotPresent image: quay.io/modh/kserve-storage-initializer@sha256:330af2d517b17dbf0cab31beba13cdbe7d6f4b9457114dea8f8485a011e3b138 diff --git a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource index 224fc61ad..42136f03c 100644 --- a/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource +++ b/ods_ci/tests/Resources/Page/ODH/ODHDashboard/ODHModelServing.resource @@ -312,6 +312,9 @@ Get Model Inference END ${inference_output}= Run ${curl_cmd} + # Passes if file does not exist, cleans up otherwise. No point keeping these after executing the curl call. + Remove File openshift_ca_istio_knative.crt + Remove File openshift_ca.crt RETURN ${inference_output} Verify Model Inference diff --git a/ods_ci/tests/Tests/1000__model_serving/1006__model_serving_ovms_on_kserve_cli.robot b/ods_ci/tests/Tests/1000__model_serving/1006__model_serving_ovms_on_kserve_cli.robot index f61a2678e..b4eca9ad3 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1006__model_serving_ovms_on_kserve_cli.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1006__model_serving_ovms_on_kserve_cli.robot @@ -10,7 +10,7 @@ Test Tags KServe-OVMS *** Variables *** ${TEST_NS}= ovmsmodel -${RUNTIME_NAME}= ovms-runtime +${RUNTIME_NAME}= ovms-runtime ${USE_PVC}= ${TRUE} ${DOWNLOAD_IN_PVC}= ${TRUE} ${USE_GPU}= ${FALSE} @@ -36,8 +36,8 @@ Verify User Can Serve And Query ovms Model ... kserve_mode=${KSERVE_MODE} Set Project And Runtime runtime=${RUNTIME_NAME} protocol=${PROTOCOL} namespace=${test_namespace} ... download_in_pvc=${DOWNLOAD_IN_PVC} model_name=${model_name} - ... storage_size=5Gi - ${requests}= Create Dictionary memory=5Gi + ... storage_size=100Mi memory_request=100Mi + ${requests}= Create Dictionary memory=1Gi Compile Inference Service YAML isvc_name=${model_name} ... sa_name=${EMPTY} ... model_storage_uri=${storage_uri} @@ -45,16 +45,21 @@ Verify User Can Serve And Query ovms Model ... limits_dict=${limits} requests_dict=${requests} kserve_mode=${KSERVE_MODE} Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} ... namespace=${test_namespace} + # File is not needed anymore after applying + Remove File ${INFERENCESERVICE_FILLED_FILEPATH} Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name} ... namespace=${test_namespace} - ${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name} - ${service_port}= Extract Service Port service_name=${model_name}-predictor protocol=TCP namespace=${test_namespace} + ${pod_name}= Get Pod Name namespace=${test_namespace} + ... label_selector=serving.kserve.io/inferenceservice=${model_name} + ${service_port}= Extract Service Port service_name=${model_name}-predictor protocol=TCP + ... namespace=${test_namespace} Run Keyword If "${KSERVE_MODE}"=="RawDeployment" ... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name} local_port=${service_port} ... remote_port=${service_port} process_alias=ovms-process Verify Model Inference With Retries model_name=${model_name} inference_input=${INFERENCE_INPUT} - ... expected_inference_output=${EXPECTED_INFERENCE_OUTPUT} project_title=${test_namespace} deployment_mode="Cli" kserve_mode=${KSERVE_MODE} - ... service_port=${service_port} end_point=/v2/models/${model_name}/infer retries=10 + ... expected_inference_output=${EXPECTED_INFERENCE_OUTPUT} project_title=${test_namespace} + ... deployment_mode="Cli" kserve_mode=${KSERVE_MODE} service_port=${service_port} + ... end_point=/v2/models/${model_name}/infer retries=10 [Teardown] Run Keywords ... Clean Up Test Project test_ns=${test_namespace}