Skip to content

Model Upload Workflow: Tracing-Uploading-Releasing #120

Model Upload Workflow: Tracing-Uploading-Releasing

Model Upload Workflow: Tracing-Uploading-Releasing #120

name: Model Auto-tracing & Uploading
on:
# Step 1: Initiate the workflow
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"
jobs:
# # Step 2: Check if the model already exists in the model hub
# checking-out-model-hub:
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Set Up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.x'
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: checking-out-model-hub
# - name: Check if TORCH_SCRIPT Model Exists
# if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
# aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true
# if [[ -z $TORCH_MODEL_NOT_EXIST ]];
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1
# fi
# - name: Check if ONNX Model Exists
# if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX)
# aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true
# if [[ -z $ONNX_MODEL_NOT_EXIST ]];
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1;
# fi
# # Step 3: Trace the model, Verify the embeddings & Upload the model files as artifacts
# model-auto-tracing:
# needs: checking-out-model-hub
# name: model-auto-tracing
# runs-on: ubuntu-latest
# permissions:
# id-token: write
# contents: read
# strategy:
# matrix:
# cluster: ["opensearch"]
# secured: ["true"]
# entry:
# - { opensearch_version: 2.7.0 }
# steps:
# - name: Checkout
# uses: actions/checkout@v2
# - name: Export Arguments
# run: |
# echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
# echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
# echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
# echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
# echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
# - name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
# run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
# - name: Upload Artifact
# uses: actions/upload-artifact@v3
# with:
# name: upload
# path: ./upload/
# retention-days: 5
# if-no-files-found: error
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: model-auto-tracing
# - name: Dryrun model uploading
# id: dryrun_model_uploading
# run: |
# aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun
# dryrun_output=$(aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun)
# echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}"
# outputs:
# dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }}
# # Step 4: Ask for manual approval from the CODEOWNERS
# manual-approval:
# needs: model-auto-tracing
# runs-on: 'ubuntu-latest'
# permissions:
# issues: write
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Get Approvers
# id: get_approvers
# run: |
# echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
# - name: Create Issue Body
# id: create_issue_body
# run: |
# embedding_dimension=${{ github.event.inputs.embedding_dimension }}
# pooling_mode=${{ github.event.inputs.pooling_mode }}
# issue_body="Please approve or deny opensearch-py-ml model uploading:
# ========= Workflow Details ==========
# - Workflow Name: ${{ github.workflow }}
# - Workflow Initiator: @${{ github.actor }}
# ========= Model Information =========
# - Model ID: ${{ github.event.inputs.model_id }}
# - Model Version: ${{ github.event.inputs.model_version }}
# - Tracing Format: ${{ github.event.inputs.tracing_format }}
# - Embedding Dimension: ${embedding_dimension:-Default}
# - Pooling Mode: ${pooling_mode:-Default}
# ===== Dry Run of Model Uploading =====
# ${{ needs.model-auto-tracing.outputs.dryrun_output }}"
# echo "issue_body<<EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}"
# - uses: trstringer/manual-approval@v1
# with:
# secret: ${{ github.TOKEN }}
# approvers: ${{ steps.get_approvers.outputs.approvers }}
# minimum-approvals: 1
# issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})"
# issue-body: ${{ steps.create_issue_body.outputs.issue_body }}
# exclude-workflow-initiator-as-approver: false
# # Step 5: Download the artifacts & Upload it to the S3 bucket
# model-uploading:
# needs: manual-approval
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# steps:
# - name: Download Artifact
# uses: actions/download-artifact@v2
# with:
# name: upload
# path: ./upload/
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: model-uploading
# - name: Copy Files to the Bucket
# id: copying_to_bucket
# run: |
# aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/
# echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT
# outputs:
# upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }}
# Step 6: Update MODEL_UPLOAD_HISTORY.md & supported_models.json
history-update:
#needs: model-uploading
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: write
concurrency: ${{ github.workflow }}-concurrency
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install Packages
run:
python -m pip install mdutils
- name: Update MODEL_UPLOAD_HISTORY.md
run: |
python utils/model_uploader/update_models_upload_history_md.py \
${{ github.event.inputs.model_id }} \
${{ github.event.inputs.model_version }} \
${{ github.event.inputs.tracing_format }} \
-ed ${{ github.event.inputs.embedding_dimension }} \
-pm ${{ github.event.inputs.pooling_mode }} \
-u ${{ github.actor }}" #-t "${{ needs.model-uploading.outputs.upload_time }}"
- name: Create PR
uses: peter-evans/create-pull-request@v5
id: commit
with:
commit_message: 'GitHub Actions Workflow - Update MODEL_UPLOAD_HISTORY.md (${{ github.event.inputs.model_id }})'
signoff: true
branch: workflow-nox-cleanup-pr # Need to change before merge
commit_options: '--signoff'
title: 'Upload Model to OpenSearch Model Hub: `${{ github.event.inputs.model_id }}` (v.${{ github.event.inputs.model_version }})'
labels: ModelUpload
path: |
./utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md
./utils/model_uploader/upload_history/supported_models.json