Watsonx-serving: keep working on the scale test (openshift-psap#14)

ccamacho · Aug 25, 2023 · 931a4f6 · 931a4f6
2 parents 690108f + 61b2518
commit 931a4f6
Show file tree

Hide file tree

Showing 15 changed files with 362 additions and 181 deletions.
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/defaults/main/config.yml b/roles/watsonx_serving/watsonx_serving_deploy_model/defaults/main/config.yml
@@ -0,0 +1,32 @@
+# Auto-generated file, do not edit manually ... 
+# Toolbox generate command: repo generate_ansible_default_settings
+# Source component: Watsonx_Serving.deploy_model
+
+# the namespace in which the model should be deployed
+# Mandatory value
+watsonx_serving_deploy_model_namespace:
+
+# the name to give to the serving runtime
+# Mandatory value
+watsonx_serving_deploy_model_serving_runtime_name:
+
+# the image of the serving runtime
+# Mandatory value
+watsonx_serving_deploy_model_serving_runtime_image:
+
+# the resource request of the serving runtime
+# Mandatory value
+watsonx_serving_deploy_model_serving_runtime_resource_request:
+
+# the name to give to the inference service
+# Mandatory value
+watsonx_serving_deploy_model_inference_service_name:
+
+# [S3] URI where the model is stored
+# Mandatory value
+watsonx_serving_deploy_model_storage_uri:
+
+# name of the service account to use for running the Pod
+# Mandatory value
+watsonx_serving_deploy_model_sa_name:
+
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/files/.keep b/roles/watsonx_serving/watsonx_serving_deploy_model/files/.keep
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/meta/main.yml b/roles/watsonx_serving/watsonx_serving_deploy_model/meta/main.yml
@@ -0,0 +1,3 @@
+---
+dependencies:
+  - role: check_deps
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/tasks/main.yml b/roles/watsonx_serving/watsonx_serving_deploy_model/tasks/main.yml
@@ -0,0 +1,125 @@
+---
+- name: Create the src directory
+  file:
+    path: "{{ artifact_extra_logs_dir }}/src"
+    state: directory
+    mode: '0755'
+
+- name: Create the artifacts directory
+  file:
+    path: "{{ artifact_extra_logs_dir }}/artifacts"
+    state: directory
+    mode: '0755'
+
+# SMMR
+
+- name: Delete the tracking timestamps
+  command:
+    oc delete cm -ltopsail.time-tracking -n {{ watsonx_serving_deploy_model_namespace }}
+
+- name: Save timestamp
+  shell: |
+    NAME=start-deploy-model
+    oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
+    oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
+
+- name: Prepare the SMMR
+  block:
+  - name: Patch the SMMR
+    command: |
+      oc patch smmr/default \
+         -n istio-system \
+         --type=json \
+         -p="[{'op': 'add', 'path': '/spec/members/-', 'value': \"{{ watsonx_serving_deploy_model_namespace }}\"}]"
+    register: patch_smmr_cmd
+    failed_when: false
+
+  - name: Check that the namespace is already registered
+    when: patch_smmr_cmd.rc != 0
+    shell:
+      oc get smmr/default -n istio-system  -ojsonpath={.spec.members} | jq .[] -r
+    register: smmr_members_cmd
+    failed_when: watsonx_serving_deploy_model_namespace not in smmr_members_cmd.stdout_lines
+
+  - name: Wait for the namespace to be registered
+    shell:
+      set -o pipefail;
+      oc get smmr/default
+         -n istio-system
+         -ojsonpath={.status.configuredMembers}
+         | jq '. | index("{{ watsonx_serving_deploy_model_namespace }}")'
+    register: smmr_registered_namespace_cmd
+    retries: 60
+    delay: 10
+    until: smmr_registered_namespace_cmd.stdout != "null"
+
+  - name: Save timestamp
+    shell: |
+      NAME=smmr-registered-namespace
+      oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
+      oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
+
+  always:
+  - name: Capture the SMMR resource
+    shell:
+      oc get smmr/default
+         -n istio-system
+         -oyaml
+         > {{ artifact_extra_logs_dir }}/artifacts/istio-system_smmr-default.yaml
+
+# Serving Runtime
+
+- name: Prepare the ServingRuntime template
+  template:
+    src: "{{ serving_runtime_template }}"
+    dest: "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml"
+    mode: 0400
+
+- name: Create the ServingRuntime
+  command:
+    oc apply -f "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml"
+
+# Inference Service
+
+- name: Prepare the InferenceService template
+  template:
+    src: "{{ inference_service_template }}"
+    dest: "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
+    mode: 0400
+
+- name: Create the InferenceService
+  command:
+    oc apply -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
+
+- name: Prepare the InferenceService
+  block:
+  - name: Wait for the InferenceService to be loaded
+    shell:
+      set -o pipefail;
+      oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
+         -ojsonpath={.status.modelStatus.states.targetModelState}
+    register: inference_service_state_cmd
+    # wait 20 minutes
+    retries: 240
+    delay: 5
+    until: inference_service_state_cmd.stdout == "Loaded"
+
+  - name: Save timestamp
+    shell: |
+      NAME=inference-service-loaded
+      oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }}
+      oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }}
+
+  always:
+  - name: Capture the state of the InferenceService resource
+    shell:
+      oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml"
+         -oyaml
+         > {{ artifact_extra_logs_dir }}/artifacts/inference_service.yaml
+
+  - name: Save the timestamp configmaps
+    shell:
+      oc get cm -oyaml
+         -ltopsail.time-tracking=yes
+         -n {{ watsonx_serving_deploy_model_namespace }}
+         > {{ artifact_extra_logs_dir }}/artifacts/time_tracking_cm.yaml
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/templates/inference_service.yaml.j2 b/roles/watsonx_serving/watsonx_serving_deploy_model/templates/inference_service.yaml.j2
@@ -0,0 +1,17 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    serving.knative.openshift.io/enablePassthrough: "true"
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+  name: {{ watsonx_serving_deploy_model_inference_service_name }}
+  namespace: {{ watsonx_serving_deploy_model_namespace }}
+spec:
+  predictor:
+    serviceAccountName: {{ watsonx_serving_deploy_model_sa_name }}
+    model:
+      modelFormat:
+        name: caikit
+      runtime: {{ watsonx_serving_deploy_model_serving_runtime_name }}
+      storageUri: {{ watsonx_serving_deploy_model_storage_uri }}
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/templates/serving_runtime.yaml.j2 b/roles/watsonx_serving/watsonx_serving_deploy_model/templates/serving_runtime.yaml.j2
@@ -0,0 +1,25 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  name: {{ watsonx_serving_deploy_model_serving_runtime_name }}
+  namespace: {{ watsonx_serving_deploy_model_namespace }}
+spec:
+  containers:
+  - env:
+    - name: RUNTIME_LOCAL_MODELS_DIR
+      value: /mnt/models
+    image: {{ watsonx_serving_deploy_model_serving_runtime_image }}
+    name: kserve-container
+    ports:
+    # Note, KServe only allows a single port, this is the gRPC port. Subject to change in the future
+    - containerPort: 8085
+      name: h2c
+      protocol: TCP
+    resources:
+      requests:
+        {{ watsonx_serving_deploy_model_serving_runtime_resource_request }}
+  multiModel: false
+  supportedModelFormats:
+  # Note: this currently *only* supports caikit format models
+  - autoSelect: true
+    name: caikit
diff --git a/roles/watsonx_serving/watsonx_serving_deploy_model/vars/main/resources.yml b/roles/watsonx_serving/watsonx_serving_deploy_model/vars/main/resources.yml
@@ -0,0 +1,2 @@
+serving_runtime_template: templates/serving_runtime.yaml.j2
+inference_service_template: templates/inference_service.yaml.j2
diff --git a/testing/common/prepare_user_pods.py b/testing/common/prepare_user_pods.py
@@ -1,5 +1,8 @@
 import os
 import logging
+import pathlib
+import base64
+import yaml
 
 from common import env, config, run, sizing
 
@@ -135,4 +138,30 @@ def prepare_user_pods(namespace):
     secret_name = config.ci_artifacts.get_config("secrets.dir.name")
     secret_env_key = config.ci_artifacts.get_config("secrets.dir.env_key")
 
-    run.run(f"oc create secret generic {secret_name} --from-file=$(echo ${secret_env_key}/* | tr ' ' ,) -n {namespace} --dry-run=client -oyaml | oc apply -f-")
+
+    secret_yaml_str = run.run(f"oc create secret generic {secret_name} --from-file=$(find ${secret_env_key}/* -maxdepth 1 -not -type d | tr '\\n' ,)/dev/null -n {namespace} --dry-run=client -oyaml", capture_stdout=True).stdout
+    with open(pathlib.Path(os.environ[secret_env_key]) / ".awscred", "rb") as f:
+        file_content = f.read()
+    base64_secret = base64.b64encode(file_content).decode("ascii")
+    secret_yaml = yaml.safe_load(secret_yaml_str)
+    secret_yaml["data"][".awscred"] = base64_secret
+    del secret_yaml["data"]["null"]
+
+    save_and_create("secret.yaml", yaml.dump(secret_yaml), namespace, is_secret=True)
+
+    run.run(f"oc describe secret {secret_name} -n {namespace} > {env.ARTIFACT_DIR}/secret_{secret_name}.descr")
+
+
+def save_and_create(name, content, namespace, is_secret=False):
+    file_path = pathlib.Path("/tmp") / name if is_secret \
+        else env.ARTIFACT_DIR / "src" / name
+
+    try:
+        with open(file_path, "w") as f:
+            print(content, file=f)
+
+        with open(file_path) as f:
+            run.run(f"oc apply -f- -n {namespace}", stdin_file=f)
+    finally:
+        if is_secret:
+            file_path.unlink(missing_ok=True)
diff --git a/testing/watsonx-serving/command_args.yml.j2 b/testing/watsonx-serving/command_args.yml.j2
@@ -128,3 +128,18 @@ local_ci run_multi/scale:
 
   git_pull: null #refs/pull/716/head
   capture_prom_db: "{{ tests.capture_prom }}"
+
+#
+# Test WatsonX Serving scale: run one
+#
+
+watsonx_serving deploy_model:
+  namespace: {{ tests.scale.namespace }}
+  sa_name: {{ watsonx_serving.sa_name }}
+
+  serving_runtime_name: {{ watsonx_serving.serving_runtime.name }}
+  serving_runtime_image: {{ watsonx_serving.serving_runtime.image }}
+  serving_runtime_resource_request: {{ watsonx_serving.serving_runtime.resource_request }}
+
+  inference_service_name: {{ watsonx_serving.inference_service.name }}
+  storage_uri: {{ watsonx_serving.inference_service.storage_uri }}
diff --git a/testing/watsonx-serving/config.yaml b/testing/watsonx-serving/config.yaml
@@ -34,6 +34,7 @@ secrets:
   s3_ldap_password_file: s3_ldap.passwords
   keep_cluster_password_file: get_cluster.password
   brew_registry_redhat_io_token_file: brew.registry.redhat.io.token
+  aws_cred: .awscred
 clusters:
   create:
     type: single # can be: single, ocp, managed
@@ -126,12 +127,21 @@ base_image:
     bucket_name: watsonx-serving-scale-test-bucket
 
 watsonx_serving:
+  sa_name: sa
   serving_runtime:
+    name: caikit-runtime
     image: quay.io/opendatahub/caikit-tgis-serving:stable
     resource_request:
       cpu: 4
       memory: 8Gi
-
+  storage_config:
+    name: storage-config
+    region: us-east-1
+    endpoint: s3.amazonaws.com
+    use_https: 1
+  inference_service:
+    name: caikit-isvc
+    storage_uri: "s3://psap-watsonx-models/flan-t5-small-caikit/"
 tests:
   mode: scale
 

diff --git a/testing/watsonx-serving/poc/deploy-minio.sh b/testing/watsonx-serving/poc/deploy-minio.sh
diff --git a/testing/watsonx-serving/poc/prepare.sh b/testing/watsonx-serving/poc/prepare.sh
@@ -91,11 +91,10 @@ git checkout FETCH_HEAD
 
 git show --no-patch | tee $ARTIFACT_DIR/caikit-tgis-serving.commit
 
-cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/{kserve-install.sh,deploy-minio.sh} \
+cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/kserve-install.sh \
    scripts/install/
 
 cp ${TOPSAIL_DIR}/testing/watsonx-serving/poc/deploy-model.sh \
    scripts/test/
 
 bash -ex scripts/install/kserve-install.sh
-bash -ex scripts/install/deploy-minio.sh
diff --git a/testing/watsonx-serving/prepare_watsonx_serving.py b/testing/watsonx-serving/prepare_watsonx_serving.py
@@ -42,6 +42,14 @@ def prepare():
     if config.ci_artifacts.get_config("clusters.sutest.compute.dedicated"):
         # this is required to properly create the namespace used to preload the image
         test_namespace = config.ci_artifacts.get_config("tests.scale.namespace")
-        test_scale.prepare_user_namespace(test_namespace, register_namespace_smmr=False)
-
-        run.run("./run_toolbox.py from_config cluster preload_image --prefix sutest --suffix watsonx-serving-runtime")
+        test_scale.prepare_user_namespace(test_namespace)
+
+        RETRIES = 3
+        for i in range(RETRIES):
+            try:
+                run.run("./run_toolbox.py from_config cluster preload_image --prefix sutest --suffix watsonx-serving-runtime")
+                break
+            except Exception:
+                logging.warning("Watsonx Serving Runtime image preloading try #{i}/{RETRIES} failed :/")
+                if i == RETRIES:
+                    raise