Skip to content

Model Upload Workflow: Tracing-Uploading-Releasing #34

Model Upload Workflow: Tracing-Uploading-Releasing

Model Upload Workflow: Tracing-Uploading-Releasing #34

name: Model Auto-tracing & Uploading
on:
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
allow_overwrite:
description: "Allow overwrites on model hub"
required: true
type: choice
default: "False"
options:
- "False"
- "True"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file. Else, it will use 768)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"
jobs:
checking-out-model-hub:
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
outputs:
overwritting: ${{ steps.checking-out-model-hub.outputs.overwritting }}
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: GitHubActions
- name: Initiate Overwriting Variable
run: echo "OVERWRITING=false" >> $GITHUB_OUTPUT
- name: Check if TORCH_SCRIPT Model Exists
if: ${{ github.event.inputs.model_version == 'TORCH_SCRIPT'}} || ${{ github.event.inputs.model_version == 'BOTH'}}
run: |
TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH || TORCH_MODEL_NOT_EXIST=true
if [[ !$TORCH_MODEL_NOT_EXIST && ${{ github.event.inputs.allow_overwrite == 'False' }} ]];
then
echo "TORCH_SCRIPT Model already exists on model hub. (allow_overwrite=false)"
exit 1
elif [[ !$TORCH_MODEL_NOT_EXIST && ${{ github.event.inputs.allow_overwrite == 'True' }} ]];
then
echo "OVERWRITING=true" >> $GITHUB_OUTPUT
fi
- name: Check if ONNX Model Exists
if: ${{ github.event.inputs.model_version == 'ONNX'}} || ${{ github.event.inputs.model_version == 'BOTH'}}
run: |
ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX)
aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH || ONNX_MODEL_NOT_EXIST=true
if [[ !$ONNX_MODEL_NOT_EXIST && ${{ github.event.inputs.allow_overwrite == 'False' }} ]];
then
echo "TORCH_SCRIPT Model already exists on model hub. (allow_overwrite=false)"
exit 1;
elif [[ !$ONNX_MODEL_NOT_EXIST && ${{ github.event.inputs.allow_overwrite == 'True' }} ]];
then
echo "OVERWRITING=true" >> $GITHUB_OUTPUT
fi
model-auto-tracing:
needs: checking-out-model-hub
name: model-auto-tracing
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
cluster: ["opensearch"]
secured: ["true"]
entry:
- { opensearch_version: 2.7.0 }
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Export Arguments
run: |
echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
echo "ALLOW_OVERWRITE=${{ github.event.inputs.allow_overwrite }}" >> $GITHUB_ENV
echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
- name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
- name: Upload Artifact
uses: actions/upload-artifact@v3
with:
name: upload
path: ./upload/
retention-days: 5
manual-approval:
needs: [checking-out-model-hub, model-auto-tracing]
runs-on: 'ubuntu-latest'
permissions:
issues: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- id: get_approvers
run: |
echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
- uses: trstringer/manual-approval@v1
with:
secret: ${{ github.TOKEN }}
approvers: ${{ steps.get_approvers.outputs.approvers }}
minimum-approvals: 1
issue-title: "Upload opensearch-py-ml model to Amazon S3 Bucket (${{ github.event.inputs.model_id }})"
issue-body: >
Please approve or deny opensearch-py-ml model uploading
- Model ID: ${{ github.event.inputs.model_id }}
- Model Version: ${{ github.event.inputs.model_version }}
- Tracing Format: ${{ github.event.inputs.tracing_format }}
- Allow Overwrite: ${{ github.event.inputs.allow_overwrite }}
- Overwriting: {{ needs.checking-out-model-hub.outputs.overwritting }}
- Embedding Dimension: ${{ github.event.inputs.embedding_dimension }}
- Pooling Mode: ${{ github.event.inputs.pooling_mode }}
exclude-workflow-initiator-as-approver: false
model-uploading:
needs: manual-approval
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
steps:
- name: Download artifact
uses: actions/download-artifact@v2
with:
name: upload
path: ./upload/
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: GitHubActions
- name: Copy files to the bucket
run: |
aws s3 sync ./upload/ s3://opensearch-exp/sentence-transformers/