Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added TrustyAI components + default payloadProcessor value for MM configuration #315

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions kfdef/kfctl_trustyai.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: kfdef.apps.kubeflow.org/v1
kind: KfDef
metadata:
name: odh-trustyai
namespace: opendatahub
spec:
applications:
- kustomizeConfig:
repoRef:
name: manifests
path: odh-common
name: odh-common
- kustomizeConfig:
repoRef:
name: manifests
path: trustyai-service
name: trustyai
repos:
- name: manifests
uri: https://github.com/opendatahub-io/odh-manifests/tarball/master
version: master
1 change: 1 addition & 0 deletions model-mesh/default/config-defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
# These are the system defaults which users can override with a user config
podsPerRuntime: 2
payloadProcessors: ""
headlessService: true
modelMeshImage:
name: $(odh-modelmesh)
Expand Down
3 changes: 3 additions & 0 deletions model-mesh/internal/base/deployment.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ spec:
- name: MM_SERVICE_NAME
value: {{.ServiceName}}
# External gRPC port of the service, should match ports.containerPort
- name: MM_PAYLOAD_PROCESSORS
value: {{.PayloadProcessors}}
# External endpoint for a payload processing service
- name: MM_SVC_GRPC_PORT
value: "{{.Port}}"
- name: WKUBE_POD_NAME
Expand Down
136 changes: 136 additions & 0 deletions tests/basictests/trustyai.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/bin/bash

source $TEST_DIR/common

MY_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)

source ${MY_DIR}/../util
RESOURCEDIR="${MY_DIR}/../resources"

TEST_USER=${OPENSHIFT_TESTUSER_NAME:-"admin"} #Username used to login to the ODH Dashboard
TEST_PASS=${OPENSHIFT_TESTUSER_PASS:-"admin"} #Password used to login to the ODH Dashboard
OPENSHIFT_OAUTH_ENDPOINT="https://$(oc get route -n openshift-authentication oauth-openshift -o json | jq -r '.spec.host')"
MM_NAMESPACE="${ODHPROJECT}-model"


os::test::junit::declare_suite_start "$MY_SCRIPT"

function check_trustyai_resources() {
header "Checking that TrustyAI resources have spun up"
oc project $ODHPROJECT
os::cmd::try_until_text "oc get deployment modelmesh-controller" "modelmesh-controller" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get deployment trustyai-service" "trustyai-service" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get route trustyai-service-route" "trustyai-service-route" $odhdefaulttimeout $odhdefaultinterval

oc wait --for=condition=Ready $(oc get pod -o name | grep trustyai) --timeout=${odhdefaulttimeout}ms
}

function deploy_model() {
header "Deploying model into ModelMesh"
oc new-project $MM_NAMESPACE || true
os::cmd::expect_success "oc project $MM_NAMESPACE"
oc label namespace $MM_NAMESPACE "modelmesh-enabled=true" --overwrite=true || echo "Failed to apply modelmesh-enabled label."
os::cmd::expect_success "oc apply -f ${RESOURCEDIR}/trustyai/secret.yaml -n ${MM_NAMESPACE}"
os::cmd::expect_success "oc apply -f ${RESOURCEDIR}/trustyai/odh-mlserver-0.x.yaml -n ${MM_NAMESPACE}"
os::cmd::expect_success "oc apply -f ${RESOURCEDIR}/trustyai/model.yaml -n ${MM_NAMESPACE}"
}

function check_mm_resources() {
header "Checking that ModelMesh resources have spun up"
oc project $MM_NAMESPACE
os::cmd::try_until_text "oc get route example-sklearn-isvc" "example-sklearn-isvc" $odhdefaulttimeout $odhdefaultinterval
INFER_ROUTE=$(oc get route example-sklearn-isvc --template={{.spec.host}}{{.spec.path}})
os::cmd::try_until_text "oc get pod | grep modelmesh-serving" "5/5" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "curl -k https://$INFER_ROUTE/infer -d @${RESOURCEDIR}/trustyai/data.json" "example-sklearn-isvc"
}

function check_communication(){
header "Check communication between TrustyAI and ModelMesh"
oc project $MM_NAMESPACE

# send some data to modelmesh
os::cmd::expect_success_and_text "curl -k https://$INFER_ROUTE/infer -d @${RESOURCEDIR}/trustyai/data.json" "model_name"
oc project ${ODHPROJECT}
os::cmd::try_until_text "oc logs $(oc get pods -o name | grep trustyai-service)" "Received partial input payload" $odhdefaulttimeout $odhdefaultinterval
}

function generate_data(){
header "Generate some data for TrustyAI (this will take a sec)"
oc project $MM_NAMESPACE

# send a bunch of random data to the model
DIVISOR=128.498 # divide bash's $RANDOM by this to get a float range of [0.,255.], for MNIST
for i in {1..500};
do
DATA=$(sed "s/\[40.83, 3.5, 0.5, 0\]/\[$(($RANDOM % 2)),$(($RANDOM / 128)),$(($RANDOM / 128)), $(($RANDOM / 128)) \]/" ${RESOURCEDIR}/trustyai/data.json)
curl -k https://$INFER_ROUTE/infer -d "$DATA" >/dev/null 2>&1
done
}

function schedule_and_check_request(){
header "Create a metric request and confirm calculation"
oc project $ODHPROJECT
TRUSTY_ROUTE=$(oc get route/trustyai --template={{.spec.host}})

os::cmd::expect_success_and_text "curl --location http://$TRUSTY_ROUTE/metrics/spd/request \
--header 'Content-Type: application/json' \
--data '{
\"modelId\": \"example-sklearn-isvc\",
\"protectedAttribute\": \"input-0\",
\"favorableOutcome\": {
\"type\": \"INT64\",
\"value\": 0.0
},
\"outcomeName\": \"output-0\",
\"privilegedAttribute\": {
\"type\": \"DOUBLE\",
\"value\": 0.0
},
\"unprivilegedAttribute\": {
\"type\": \"DOUBLE\",
\"value\": 1.0
}
}'" "requestId"
os::cmd::try_until_text "curl http://$TRUSTY_ROUTE/q/metrics" "trustyai_spd"
}


function test_prometheus_scraping(){
header "Ensure metrics are in Prometheus"
oc adm policy add-role-to-user view -n ${ODHPROJECT} --rolebinding-name "view-$TEST_USER" $TEST_USER
TESTUSER_BEARER_TOKEN="$(curl -kiL -u $TEST_USER:$TEST_PASS 'X-CSRF-Token: xxx' $OPENSHIFT_OAUTH_ENDPOINT'/oauth/authorize?response_type=token&client_id=openshift-challenging-client' | grep -oP 'access_token=\K[^&]*')"
MODEL_MONITORING_ROUTE=$(oc get route -n ${ODHPROJECT} odh-model-monitoring --template={{.spec.host}})
os::cmd::try_until_text "curl -k --location -g --request GET 'https://'$MODEL_MONITORING_ROUTE'//api/v1/query?query=trustyai_spd' -H 'Authorization: Bearer '$TESTUSER_BEARER_TOKEN)" "value" $odhdefaulttimeout $odhdefaultinterval
}

function teardown_trustyai_test() {
header "Cleaning up the TrustyAI test"
oc project $ODHPROJECT

REQUEST_ID="$(curl http://$TRUSTY_ROUTE/metrics/spd/requests | jq '.requests [0].id')"

os::cmd::expect_success_and_text "curl -X DELETE --location http://$TRUSTY_ROUTE/metrics/spd/request \
-H 'Content-Type: application/json' \
-d '{
\"requestId\": \"'"$REQUEST_ID"'\"
}'" "Removed"
os::cmd::expect_success "oc delete -f ${RESOURCEDIR}/modelmesh/service_account.yaml"

oc project $MM_NAMESPACE
os::cmd::expect_success "oc delete -f ${RESOURCEDIR}/trustyai/secret.yaml"
os::cmd::expect_success "oc delete -f ${RESOURCEDIR}/trustyai/odh-mlserver-0.x.yaml"
os::cmd::expect_success "oc delete -f ${RESOURCEDIR}/trustyai/model.yaml"
os::cmd::expect_success "oc delete project $MM_NAMESPACE"

}


deploy_model
check_mm_resources
check_communication
generate_data
schedule_and_check_request
teardown_trustyai_test


os::test::junit::declare_suite_end
10 changes: 10 additions & 0 deletions tests/resources/trustyai/data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"inputs": [
{
"name": "predict",
"shape": [1, 4],
"datatype": "FP64",
"data": [40.83, 3.5, 0.5, 0]
}
]
}
13 changes: 13 additions & 0 deletions tests/resources/trustyai/model.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: example-sklearn-isvc
annotations:
serving.kserve.io/deploymentMode: ModelMesh
spec:
predictor:
model:
modelFormat:
name: sklearn
runtime: mlserver-0.x
storageUri: "https://github.com/trustyai-explainability/trustyai-explainability/raw/main/explainability-service/demo/models/model.joblib?raw=true"
77 changes: 77 additions & 0 deletions tests/resources/trustyai/odh-mlserver-0.x.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2021 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: mlserver-0.x
annotations:
enable-route: "true"
labels:
name: modelmesh-serving-mlserver-0.x-SR
spec:
supportedModelFormats:
- name: sklearn
version: "0" # v0.23.1
autoSelect: true
- name: xgboost
version: "1" # v1.1.1
autoSelect: true
- name: lightgbm
version: "3" # v3.2.1
autoSelect: true

protocolVersions:
- grpc-v2
multiModel: true

grpcEndpoint: "port:8085"
grpcDataEndpoint: "port:8001"

containers:
- name: mlserver
image: quay.io/opendatahub/mlserver:0.5.2
env:
- name: MLSERVER_MODELS_DIR
value: "/models/_mlserver_models/"
- name: MLSERVER_GRPC_PORT
value: "8001"
# default value for HTTP port is 8080 which conflicts with MMesh's
# Litelinks port
- name: MLSERVER_HTTP_PORT
value: "8002"
- name: MLSERVER_LOAD_MODELS_AT_STARTUP
value: "false"
# Set a dummy model name via environment so that MLServer doesn't
# error on a RepositoryIndex call when no models exist
- name: MLSERVER_MODEL_NAME
value: dummy-model-fixme
# Set server addr to localhost to ensure MLServer only listen inside the pod
- name: MLSERVER_HOST
value: "127.0.0.1"
# Increase gRPC max message size to support larger payloads
# Unlimited because it will be restricted at the model mesh layer
- name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
value: "-1"
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: "5"
memory: 1Gi
builtInAdapter:
serverType: mlserver
runtimeManagementPort: 8001
memBufferBytes: 134217728
modelLoadingTimeoutMillis: 90000
38 changes: 38 additions & 0 deletions tests/resources/trustyai/secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
apiVersion: v1
kind: Secret
metadata:
name: aws-connection-minio-data-connection
labels:
opendatahub.io/dashboard: 'true'
opendatahub.io/managed: 'true'
annotations:
opendatahub.io/connection-type: s3
openshift.io/display-name: Minio Data Connection
data:
AWS_ACCESS_KEY_ID: VEhFQUNDRVNTS0VZ
AWS_DEFAULT_REGION: dXMtc291dGg=
AWS_S3_BUCKET: bW9kZWxtZXNoLWV4YW1wbGUtbW9kZWxz
AWS_S3_ENDPOINT: aHR0cDovL21pbmlvOjkwMDA=
AWS_SECRET_ACCESS_KEY: VEhFU0VDUkVUS0VZ
type: Opaque
---
apiVersion: v1
kind: Secret
metadata:
name: model-serving-etcd
stringData:
etcd_connection: |
{
"endpoints": "http://etcd:2379",
"root_prefix": "modelmesh-serving",
"userid": "root",
"password": "<etcd_password>"
}
---
apiVersion: v1
kind: Secret
metadata:
name: etcd-passwords
stringData:
root: <etcd_password>
5 changes: 5 additions & 0 deletions tests/setup/kfctl_openshift.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ spec:
name: manifests
path: data-science-pipelines
name: data-science-pipelines
- kustomizeConfig:
repoRef:
name: manifests
path: trustyai-service
name: trustyai
repos:
- name: manifests
uri: https://github.com/red-hat-data-services/odh-manifests/tarball/master
Expand Down
32 changes: 32 additions & 0 deletions trustyai-service/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# TrustyAI Service

TrustyAI is a service to provide fairness metrics to ModelMesh served models.


### Installation process

Following are the steps to install Model Mesh as a part of OpenDataHub install:

1. Install the OpenDataHub operator
2. Create a KfDef that includes the model-mesh component with the odh-model-controller overlay.
3. Set the `payloadProcessor` value within `model-serving-config-defaults` ConfigMap
to `http://trustyai-service/consumer/kserve/v2`
4. Create a TrustyAI KfDef:
```
apiVersion: kfdef.apps.kubeflow.org/v1
kind: KfDef
metadata:
name: trustyai
spec:
applications:
- kustomizeConfig:
repoRef:
name: manifests
path: trustyai-service
name: trustyai
repos:
- name: manifests
uri: https://github.com/red-hat-data-services/odh-manifests/tarball/master
version: master
```

11 changes: 11 additions & 0 deletions trustyai-service/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
commonLabels:
app: trustyai
app.kubernetes.io/part-of: trustyai
resources:
- ../default
- ../servicemonitors
- trustyai-configmap.yaml
- route.yaml
- pvc.yaml
11 changes: 11 additions & 0 deletions trustyai-service/base/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: trustyai-service-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
volumeMode: Filesystem
Loading