Skip to content

Commit

Permalink
Watsonx-serving: keep working on the scale test (openshift-psap#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
kpouget committed Aug 25, 2023
2 parents 690108f + 61b2518 commit 931a4f6
Show file tree
Hide file tree
Showing 15 changed files with 362 additions and 181 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Auto-generated file, do not edit manually ...
# Toolbox generate command: repo generate_ansible_default_settings
# Source component: Watsonx_Serving.deploy_model

# the namespace in which the model should be deployed
# Mandatory value
watsonx_serving_deploy_model_namespace:

# the name to give to the serving runtime
# Mandatory value
watsonx_serving_deploy_model_serving_runtime_name:

# the image of the serving runtime
# Mandatory value
watsonx_serving_deploy_model_serving_runtime_image:

# the resource request of the serving runtime
# Mandatory value
watsonx_serving_deploy_model_serving_runtime_resource_request:

# the name to give to the inference service
# Mandatory value
watsonx_serving_deploy_model_inference_service_name:

# [S3] URI where the model is stored
# Mandatory value
watsonx_serving_deploy_model_storage_uri:

# name of the service account to use for running the Pod
# Mandatory value
watsonx_serving_deploy_model_sa_name:

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
dependencies:
- role: check_deps
125 changes: 125 additions & 0 deletions roles/watsonx_serving/watsonx_serving_deploy_model/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
---
- name: Create the src directory
file:
path: "{{ artifact_extra_logs_dir }}/src"
state: directory
mode: '0755'

- name: Create the artifacts directory
file:
path: "{{ artifact_extra_logs_dir }}/artifacts"
state: directory
mode: '0755'

# SMMR

- name: Delete the tracking timestamps
command:
oc delete cm -ltopsail.time-tracking -n {{ watsonx_serving_deploy_model_namespace }}

- name: Save timestamp
shell: |
NAME=start-deploy-model
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
- name: Prepare the SMMR
block:
- name: Patch the SMMR
command: |
oc patch smmr/default \
-n istio-system \
--type=json \
-p="[{'op': 'add', 'path': '/spec/members/-', 'value': \"{{ watsonx_serving_deploy_model_namespace }}\"}]"
register: patch_smmr_cmd
failed_when: false

- name: Check that the namespace is already registered
when: patch_smmr_cmd.rc != 0
shell:
oc get smmr/default -n istio-system -ojsonpath={.spec.members} | jq .[] -r
register: smmr_members_cmd
failed_when: watsonx_serving_deploy_model_namespace not in smmr_members_cmd.stdout_lines

- name: Wait for the namespace to be registered
shell:
set -o pipefail;
oc get smmr/default
-n istio-system
-ojsonpath={.status.configuredMembers}
| jq '. | index("{{ watsonx_serving_deploy_model_namespace }}")'
register: smmr_registered_namespace_cmd
retries: 60
delay: 10
until: smmr_registered_namespace_cmd.stdout != "null"

- name: Save timestamp
shell: |
NAME=smmr-registered-namespace
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
always:
- name: Capture the SMMR resource
shell:
oc get smmr/default
-n istio-system
-oyaml
> {{ artifact_extra_logs_dir }}/artifacts/istio-system_smmr-default.yaml

# Serving Runtime

- name: Prepare the ServingRuntime template
template:
src: "{{ serving_runtime_template }}"
dest: "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml"
mode: 0400

- name: Create the ServingRuntime
command:
oc apply -f "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml"

# Inference Service

- name: Prepare the InferenceService template
template:
src: "{{ inference_service_template }}"
dest: "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
mode: 0400

- name: Create the InferenceService
command:
oc apply -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"

- name: Prepare the InferenceService
block:
- name: Wait for the InferenceService to be loaded
shell:
set -o pipefail;
oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
-ojsonpath={.status.modelStatus.states.targetModelState}
register: inference_service_state_cmd
# wait 20 minutes
retries: 240
delay: 5
until: inference_service_state_cmd.stdout == "Loaded"

- name: Save timestamp
shell: |
NAME=inference-service-loaded
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
always:
- name: Capture the state of the InferenceService resource
shell:
oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
-oyaml
> {{ artifact_extra_logs_dir }}/artifacts/inference_service.yaml

- name: Save the timestamp configmaps
shell:
oc get cm -oyaml
-ltopsail.time-tracking=yes
-n {{ watsonx_serving_deploy_model_namespace }}
> {{ artifact_extra_logs_dir }}/artifacts/time_tracking_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
serving.knative.openshift.io/enablePassthrough: "true"
sidecar.istio.io/inject: "true"
sidecar.istio.io/rewriteAppHTTPProbers: "true"
name: {{ watsonx_serving_deploy_model_inference_service_name }}
namespace: {{ watsonx_serving_deploy_model_namespace }}
spec:
predictor:
serviceAccountName: {{ watsonx_serving_deploy_model_sa_name }}
model:
modelFormat:
name: caikit
runtime: {{ watsonx_serving_deploy_model_serving_runtime_name }}
storageUri: {{ watsonx_serving_deploy_model_storage_uri }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: {{ watsonx_serving_deploy_model_serving_runtime_name }}
namespace: {{ watsonx_serving_deploy_model_namespace }}
spec:
containers:
- env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
image: {{ watsonx_serving_deploy_model_serving_runtime_image }}
name: kserve-container
ports:
# Note, KServe only allows a single port, this is the gRPC port. Subject to change in the future
- containerPort: 8085
name: h2c
protocol: TCP
resources:
requests:
{{ watsonx_serving_deploy_model_serving_runtime_resource_request }}
multiModel: false
supportedModelFormats:
# Note: this currently *only* supports caikit format models
- autoSelect: true
name: caikit
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
serving_runtime_template: templates/serving_runtime.yaml.j2
inference_service_template: templates/inference_service.yaml.j2
31 changes: 30 additions & 1 deletion testing/common/prepare_user_pods.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import os
import logging
import pathlib
import base64
import yaml

from common import env, config, run, sizing

Expand Down Expand Up @@ -135,4 +138,30 @@ def prepare_user_pods(namespace):
secret_name = config.ci_artifacts.get_config("secrets.dir.name")
secret_env_key = config.ci_artifacts.get_config("secrets.dir.env_key")

run.run(f"oc create secret generic {secret_name} --from-file=$(echo ${secret_env_key}/* | tr ' ' ,) -n {namespace} --dry-run=client -oyaml | oc apply -f-")

secret_yaml_str = run.run(f"oc create secret generic {secret_name} --from-file=$(find ${secret_env_key}/* -maxdepth 1 -not -type d | tr '\\n' ,)/dev/null -n {namespace} --dry-run=client -oyaml", capture_stdout=True).stdout
with open(pathlib.Path(os.environ[secret_env_key]) / ".awscred", "rb") as f:
file_content = f.read()
base64_secret = base64.b64encode(file_content).decode("ascii")
secret_yaml = yaml.safe_load(secret_yaml_str)
secret_yaml["data"][".awscred"] = base64_secret
del secret_yaml["data"]["null"]

save_and_create("secret.yaml", yaml.dump(secret_yaml), namespace, is_secret=True)

run.run(f"oc describe secret {secret_name} -n {namespace} > {env.ARTIFACT_DIR}/secret_{secret_name}.descr")


def save_and_create(name, content, namespace, is_secret=False):
file_path = pathlib.Path("/tmp") / name if is_secret \
else env.ARTIFACT_DIR / "src" / name

try:
with open(file_path, "w") as f:
print(content, file=f)

with open(file_path) as f:
run.run(f"oc apply -f- -n {namespace}", stdin_file=f)
finally:
if is_secret:
file_path.unlink(missing_ok=True)
15 changes: 15 additions & 0 deletions testing/watsonx-serving/command_args.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,18 @@ local_ci run_multi/scale:

git_pull: null #refs/pull/716/head
capture_prom_db: "{{ tests.capture_prom }}"

#
# Test WatsonX Serving scale: run one
#

watsonx_serving deploy_model:
namespace: {{ tests.scale.namespace }}
sa_name: {{ watsonx_serving.sa_name }}

serving_runtime_name: {{ watsonx_serving.serving_runtime.name }}
serving_runtime_image: {{ watsonx_serving.serving_runtime.image }}
serving_runtime_resource_request: {{ watsonx_serving.serving_runtime.resource_request }}

inference_service_name: {{ watsonx_serving.inference_service.name }}
storage_uri: {{ watsonx_serving.inference_service.storage_uri }}
12 changes: 11 additions & 1 deletion testing/watsonx-serving/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ secrets:
s3_ldap_password_file: s3_ldap.passwords
keep_cluster_password_file: get_cluster.password
brew_registry_redhat_io_token_file: brew.registry.redhat.io.token
aws_cred: .awscred
clusters:
create:
type: single # can be: single, ocp, managed
Expand Down Expand Up @@ -126,12 +127,21 @@ base_image:
bucket_name: watsonx-serving-scale-test-bucket

watsonx_serving:
sa_name: sa
serving_runtime:
name: caikit-runtime
image: quay.io/opendatahub/caikit-tgis-serving:stable
resource_request:
cpu: 4
memory: 8Gi

storage_config:
name: storage-config
region: us-east-1
endpoint: s3.amazonaws.com
use_https: 1
inference_service:
name: caikit-isvc
storage_uri: "s3://psap-watsonx-models/flan-t5-small-caikit/"
tests:
mode: scale

Expand Down
38 changes: 0 additions & 38 deletions testing/watsonx-serving/poc/deploy-minio.sh

This file was deleted.

3 changes: 1 addition & 2 deletions testing/watsonx-serving/poc/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,10 @@ git checkout FETCH_HEAD

git show --no-patch | tee $ARTIFACT_DIR/caikit-tgis-serving.commit

cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/{kserve-install.sh,deploy-minio.sh} \
cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/kserve-install.sh \
scripts/install/

cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/deploy-model.sh \
scripts/test/

bash -ex scripts/install/kserve-install.sh
bash -ex scripts/install/deploy-minio.sh
14 changes: 11 additions & 3 deletions testing/watsonx-serving/prepare_watsonx_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@ def prepare():
if config.ci_artifacts.get_config("clusters.sutest.compute.dedicated"):
# this is required to properly create the namespace used to preload the image
test_namespace = config.ci_artifacts.get_config("tests.scale.namespace")
test_scale.prepare_user_namespace(test_namespace, register_namespace_smmr=False)

run.run("./run_toolbox.py from_config cluster preload_image --prefix sutest --suffix watsonx-serving-runtime")
test_scale.prepare_user_namespace(test_namespace)

RETRIES = 3
for i in range(RETRIES):
try:
run.run("./run_toolbox.py from_config cluster preload_image --prefix sutest --suffix watsonx-serving-runtime")
break
except Exception:
logging.warning("Watsonx Serving Runtime image preloading try #{i}/{RETRIES} failed :/")
if i == RETRIES:
raise
Loading

0 comments on commit 931a4f6

Please sign in to comment.