Skip to content

Commit

Permalink
Experiment
Browse files Browse the repository at this point in the history
Signed-off-by: Thanawan Atchariyachanvanit <[email protected]>
  • Loading branch information
thanawan-atc committed Jul 7, 2023
1 parent 13149e8 commit d08be5a
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
77 changes: 77 additions & 0 deletions .ci/run-model-tracing
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env bash
#
# Entrypoint to run model autotracing

set -e

export MODEL_ID="dummy_model_id"
export MODEL_VERSION="dummy_model_version"
export TRACING_FORMAT="dummy_model_format"
export EMBEDDING_DIMENSIONS=""
export POOLING_MODE="dummy_pooling_mode"

# Default environment variables
#export TEST_SUITE="${TEST_SUITE:=oss}"
export PYTHON_VERSION="${PYTHON_VERSION:=3.9}"
export PANDAS_VERSION=${PANDAS_VERSION-1.5.0}
export PYTHON_CONNECTION_CLASS="${PYTHON_CONNECTION_CLASS:=Urllib3HttpConnection}"
export CLUSTER="${1:-opensearch}"
export SECURE_INTEGRATION="${2:-true}"
export OPENSEARCH_VERSION="${3:-latest}"
#export IS_DOC="${4:-false}"
if [[ "$SECURE_INTEGRATION" == "true" ]]; then
export OPENSEARCH_URL_EXTENSION="https"
else
export OPENSEARCH_URL_EXTENSION="http"
fi

export IS_UNRELEASED=false


echo -e "\033[1m>>>>> Unreleased is $IS_UNRELEASED >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
script_path=$(dirname $(realpath -s $0))
echo -e $script_path

source $script_path/imports.sh
set -euo pipefail

echo -e "\033[1m>>>>> Start server container >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"
DETACH=true bash $script_path/run-opensearch.sh

echo -e "\033[1m>>>>> Tracing the Model >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"

echo -e "\033[34;1mINFO:\033[0m URL ${opensearch_url}\033[0m"
echo -e "\033[34;1mINFO:\033[0m EXTERNAL OS URL ${external_opensearch_url}\033[0m"
echo -e "\033[34;1mINFO:\033[0m VERSION ${OPENSEARCH_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TEST_SUITE ${TEST_SUITE}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PYTHON_VERSION ${PYTHON_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PYTHON_CONNECTION_CLASS ${PYTHON_CONNECTION_CLASS}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PANDAS_VERSION ${PANDAS_VERSION}\033[0m"

echo -e "\033[1m>>>>> Build [opensearch-project/opensearch-py-ml container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"

docker build \
--file .ci/Dockerfile.client \
--tag opensearch-project/opensearch-py-ml \
--build-arg PYTHON_VERSION=${PYTHON_VERSION} \
.

echo -e "\033[1m>>>>> Run [opensearch-project/opensearch-py-ml container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"

docker run \
--network=${network_name} \
--env "STACK_VERSION=${STACK_VERSION}" \
--env "OPENSEARCH_URL=${opensearch_url}" \
--env "OPENSEARCH_VERSION=${OPENSEARCH_VERSION}" \
--env "TEST_SUITE=${TEST_SUITE}" \
--env "PYTHON_CONNECTION_CLASS=${PYTHON_CONNECTION_CLASS}" \
--env "TEST_TYPE=server" \
--name opensearch-py-ml-model-tracing-runner \
opensearch-project/opensearch-py-ml \

python3 model_auto_tracing.py ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -ed ${EMBEDDING_DIMENSIONS} -pm ${POOLING_MODE}

docker cp opensearch-py-ml-model-tracing-runner:/code/opensearch-py-ml/model-tracing/ ./model-tracing/

docker rm opensearch-py-ml-model-tracing-runner

46 changes: 46 additions & 0 deletions .github/workflows/model_uploader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Model Auto-tracing & Uploading
on:
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing"
required: true
type: choice
options:
- "TORCH_SCRIPT"
- "ONNX"
- "Both"
embedding_dimension:
description: "(Optional) You can add the embedding dimension of the model here if it does not exist in original config.json file. Else, it will use 768."
required: false
type: int
pooling_mode:
description: "(Optional) You can specify the pooling mode of the model here if it does not exist in original config.json file."
required: false
type: string

jobs:
model-tracing:
name: model-tracing
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
cluster: ["opensearch"]
secured: ["true"]
entry:
- { opensearch_version: 2.7.0 }

steps:
- name: Checkout
uses: actions/checkout@v2
- name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-model-tracing ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ${{ github.event.inputs.tracing_format }} -ed ${{ github.event.inputs.embedding_dimensions }} -pm ${{ github.event.inputs.pooling_mode }}"
36 changes: 36 additions & 0 deletions model_auto_tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import argparse
import os
import opensearchpy
from opensearchpy import OpenSearch
import numpy as np
import sys
import os
import json
import warnings

def main(args):
print(args)

if __name__ == '__main__':
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
warnings.filterwarnings("ignore", message="TracerWarning: torch.tensor")
warnings.filterwarnings("ignore", message="using SSL with verify_certs=False is insecure.")

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('model_id', type=str,
help="Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)")
parser.add_argument('model_version', type=str,
help="Model version number (e.g. 1.0.1)")
parser.add_argument('tracing_format', type=str,
help="Model format for auto-tracing")
parser.add_argument('-ed', '--embedding_dimension',
type=int, nargs='?', default=None, const=None,
help="Embedding dimension of the model to use if it does not exist in original config.json")
parser.add_argument('-pm', '--pooling_mode',
type=str, nargs='?', default=None, const=None,
help="Pooling mode if it does not exist in original config.json")
args = parser.parse_args()

main(args)

0 comments on commit d08be5a

Please sign in to comment.