Skip to content

Model Upload Workflow: Tracing-Uploading-Releasing #110

Model Upload Workflow: Tracing-Uploading-Releasing

Model Upload Workflow: Tracing-Uploading-Releasing #110

name: Model Auto-tracing & Uploading
on:
# Step 1: Initiate the workflow
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"
jobs:
# Step 2: Check if the model already exists in the model hub
checking-out-model-hub:
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: checking-out-model-hub
- name: Check if TORCH_SCRIPT Model Exists
if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH'
run: |
TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true
if [[ -z $TORCH_MODEL_NOT_EXIST ]];
then
echo "TORCH_SCRIPT Model already exists on model hub."
exit 1
fi
- name: Check if ONNX Model Exists
if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH'
run: |
ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX)
aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true
if [[ -z $ONNX_MODEL_NOT_EXIST ]];
then
echo "TORCH_SCRIPT Model already exists on model hub."
exit 1;
fi
# Step 3: Trace the model, Verify the embeddings & Upload the model files as artifacts
model-auto-tracing:
needs: checking-out-model-hub
name: model-auto-tracing
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
strategy:
matrix:
cluster: ["opensearch"]
secured: ["true"]
entry:
- { opensearch_version: 2.7.0 }
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Export Arguments
run: |
echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
- name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
- name: Upload Artifact
uses: actions/upload-artifact@v3
with:
name: upload
path: ./upload/
retention-days: 5
if-no-files-found: error
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: model-auto-tracing
- name: Dryrun model uploading
id: dryrun_model_uploading
run: |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun
dryrun_output=$(aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun)
echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT
echo "${dryrun_output@E}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "${dryrun_output@E}"
outputs:
dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }}
# Step 4: Ask for manual approval from the CODEOWNERS
manual-approval:
needs: model-auto-tracing
runs-on: 'ubuntu-latest'
permissions:
issues: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Get Approvers
id: get_approvers
run: |
echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
- name: Create Issue Body
id: create_issue_body
run: |
embedding_dimension=${{ github.event.inputs.embedding_dimension }}
pooling_mode=${{ github.event.inputs.pooling_mode }}
issue_body="Please approve or deny opensearch-py-ml model uploading:
========= Workflow Details ==========
- Workflow Name: ${{ github.workflow }}
- Workflow Initiator: @${{ github.actor }}
========= Model Information =========
- Model ID: ${{ github.event.inputs.model_id }}
- Model Version: ${{ github.event.inputs.model_version }}
- Tracing Format: ${{ github.event.inputs.tracing_format }}
- Embedding Dimension: ${embedding_dimension:-Default}
- Pooling Mode: ${pooling_mode:-Default}
===== Dry Run of Model Uploading =====
${{ needs.model-auto-tracing.outputs.dryrun_output }}"
echo "issue_body<<EOF" >> $GITHUB_OUTPUT
echo "${issue_body@E}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "${issue_body@E}"
- uses: trstringer/manual-approval@v1
with:
secret: ${{ github.TOKEN }}
approvers: ${{ steps.get_approvers.outputs.approvers }}
minimum-approvals: 1
issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})"
issue-body: ${{ steps.create_issue_body.outputs.issue_body }}
exclude-workflow-initiator-as-approver: false
# Step 5: Download the artifacts & Upload it to the S3 bucket
model-uploading:
needs: manual-approval
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
steps:
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: upload
path: ./upload/
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: model-uploading
- name: Copy Files to the Bucket
id: copying_to_bucket
run: |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/
echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT
outputs:
upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }}
# Step 6: Update MODEL_UPLOAD_HISTORY.md & supported_models.json
history-update:
needs: model-uploading
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: write
concurrency: ${{ github.workflow }}-concurrency
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install Packages
run:
python -m pip install mdutils
- name: Update MODEL_UPLOAD_HISTORY.md
run: |
python utils/model_uploader/update_models_upload_history_md.py \
${{ github.event.inputs.model_id }} \
${{ github.event.inputs.model_version }} \
${{ github.event.inputs.tracing_format }} \
-ed ${{ github.event.inputs.embedding_dimension }} \
-pm ${{ github.event.inputs.pooling_mode }} \
-u ${{ github.actor }} -t "${{ needs.model-uploading.outputs.upload_time }}"
- name: Commit Updates
uses: stefanzweifel/git-auto-commit-action@v4
id: commit
with:
commit_message: 'GitHub Actions Workflow - Update MODEL_UPLOAD_HISTORY.md (${{ github.event.inputs.model_id }})'
commit_options: '--signoff'
repository: ./utils/model_uploader/upload_history
file_pattern: MODEL_UPLOAD_HISTORY.md supported_models.json