Model Upload Workflow: Tracing-Uploading-Releasing #99
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Model Auto-tracing & Uploading | |
on: | |
# Step 1: Initiate the workflow | |
workflow_dispatch: | |
inputs: | |
model_id: | |
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)" | |
required: true | |
type: string | |
model_version: | |
description: "Model version number (e.g. 1.0.1)" | |
required: true | |
type: string | |
tracing_format: | |
description: "Model format for auto-tracing (torch_script/onnx)" | |
required: true | |
type: choice | |
options: | |
- "BOTH" | |
- "TORCH_SCRIPT" | |
- "ONNX" | |
embedding_dimension: | |
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)" | |
required: false | |
type: int | |
pooling_mode: | |
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)" | |
required: false | |
type: choice | |
options: | |
- "" | |
- "CLS" | |
- "MEAN" | |
- "MAX" | |
- "MEAN_SQRT_LEN" | |
jobs: | |
# Step 2: Check if the model already exists in the model hub | |
checking-out-model-hub: | |
runs-on: 'ubuntu-latest' | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v3 | |
- name: Set Up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }} | |
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }} | |
role-session-name: checking-out-model-hub | |
- name: Check if TORCH_SCRIPT Model Exists | |
if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH' | |
run: | | |
TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \ | |
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT) | |
aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true | |
if [[ -z $TORCH_MODEL_NOT_EXIST ]]; | |
then | |
echo "TORCH_SCRIPT Model already exists on model hub." | |
exit 1 | |
fi | |
- name: Check if ONNX Model Exists | |
if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH' | |
run: | | |
ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \ | |
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX) | |
aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true | |
if [[ -z $ONNX_MODEL_NOT_EXIST ]]; | |
then | |
echo "TORCH_SCRIPT Model already exists on model hub." | |
exit 1; | |
fi | |
# Step 3: Trace the model, Verify the embeddings & Upload the model files as artifacts | |
model-auto-tracing: | |
needs: checking-out-model-hub | |
name: model-auto-tracing | |
runs-on: ubuntu-latest | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
cluster: ["opensearch"] | |
secured: ["true"] | |
entry: | |
- { opensearch_version: 2.7.0 } | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Export Arguments | |
run: | | |
echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV | |
echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV | |
echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV | |
echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV | |
echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV | |
- name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}} | |
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace" | |
- name: Upload Artifact | |
uses: actions/upload-artifact@v3 | |
with: | |
name: upload | |
path: ./upload/ | |
retention-days: 5 | |
if-no-files-found: error | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }} | |
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }} | |
role-session-name: model-auto-tracing | |
- name: Dryrun model uploading | |
id: dryrun_model_uploading | |
run: | | |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun | |
dryrun_output=$(aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun) | |
echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT | |
echo "${dryrun_output@E}" >> $GITHUB_OUTPUT | |
echo "EOF" >> $GITHUB_OUTPUT | |
echo "${dryrun_output@E}" | |
outputs: | |
dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }} | |
# Step 4: Ask for manual approval from the CODEOWNERS | |
manual-approval: | |
needs: model-auto-tracing | |
runs-on: 'ubuntu-latest' | |
permissions: | |
issues: write | |
steps: | |
- name: Checkout Repository | |
uses: actions/checkout@v3 | |
- name: Get Approvers | |
id: get_approvers | |
run: | | |
echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT | |
- name: Create Issue Body | |
id: create_issue_body | |
run: | | |
embedding_dimension=${{ github.event.inputs.embedding_dimension }} | |
pooling_mode=${{ github.event.inputs.pooling_mode }} | |
issue_body="Please approve or deny opensearch-py-ml model uploading: | |
========= Workflow Details ========== | |
- Workflow Name: ${{ github.workflow }} | |
- Workflow Initiator: @${{ github.actor }} | |
========= Model Information ========= | |
- Model ID: ${{ github.event.inputs.model_id }} | |
- Model Version: ${{ github.event.inputs.model_version }} | |
- Tracing Format: ${{ github.event.inputs.tracing_format }} | |
- Embedding Dimension: ${embedding_dimension:-Default} | |
- Pooling Mode: ${pooling_mode:-Default} | |
===== Dry Run of Model Uploading ===== | |
${{ needs.model-auto-tracing.outputs.dryrun_output }}" | |
echo "issue_body<<EOF" >> $GITHUB_OUTPUT | |
echo "${issue_body@E}" >> $GITHUB_OUTPUT | |
echo "EOF" >> $GITHUB_OUTPUT | |
echo "${issue_body@E}" | |
- uses: trstringer/manual-approval@v1 | |
with: | |
secret: ${{ github.TOKEN }} | |
approvers: ${{ steps.get_approvers.outputs.approvers }} | |
minimum-approvals: 1 | |
issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})" | |
issue-body: ${{ steps.create_issue_body.outputs.issue_body }} | |
exclude-workflow-initiator-as-approver: false | |
# Step 5: Download the artifacts & Upload it to the S3 bucket | |
model-uploading: | |
needs: manual-approval | |
runs-on: 'ubuntu-latest' | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Download Artifact | |
uses: actions/download-artifact@v2 | |
with: | |
name: upload | |
path: ./upload/ | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }} | |
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }} | |
role-session-name: model-uploading | |
- name: Copy Files to the Bucket | |
id: copying_to_bucket | |
run: | | |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ | |
echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT | |
outputs: | |
upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }} | |
# Step 6: Update MODEL_UPLOAD_HISTORY.md & supported_models.json | |
history-update: | |
needs: model-uploading | |
runs-on: 'ubuntu-latest' | |
permissions: | |
id-token: write | |
contents: write | |
concurrency: ${{ github.workflow }}-concurrency | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Set Up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Install Packages | |
run: | |
python -m pip install mdutils | |
- name: Update MODEL_UPLOAD_HISTORY.md | |
run: | | |
python utils/model_uploader/update_models_upload_history_md.py \ | |
${{ github.event.inputs.model_id }} \ | |
${{ github.event.inputs.model_version }} \ | |
${{ github.event.inputs.tracing_format }} \ | |
-ed ${{ github.event.inputs.embedding_dimension }} \ | |
-pm ${{ github.event.inputs.pooling_mode }} \ | |
-u ${{ github.actor }} -t "${{ needs.model-uploading.outputs.upload_time }}" | |
- name: Commit Updates | |
uses: stefanzweifel/git-auto-commit-action@v4 | |
id: commit | |
with: | |
commit_message: 'GitHub Actions Workflow - Update MODEL_UPLOAD_HISTORY.md (${{ github.event.inputs.model_id }})' | |
commit_options: '--signoff' | |
repository: ./utils/model_uploader/upload_history | |
file_pattern: MODEL_UPLOAD_HISTORY.md supported_models.json |