Skip to content

Model Upload Workflow: Tracing-Uploading-Releasing #347

Model Upload Workflow: Tracing-Uploading-Releasing

Model Upload Workflow: Tracing-Uploading-Releasing #347

name: Model Auto-tracing & Uploading
on:
# Step 1: Initiate the workflow
workflow_dispatch:
inputs:
model_source:
description: "Model source (e.g. huggingface)"
required: true
type: string
default: "huggingface"
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"
model_description:
description: "(Optional) Description (Specify here if you want to overwrite the default model description)"
required: false
type: string
jobs:
# Step 2: Initiate workflow variable
init-workflow-var:
runs-on: 'ubuntu-latest'
steps:
# - name: Fail if branch is not main
# if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch)
# run: |
# echo "This workflow should only be triggered on a default branch"
# exit 1
- name: Initiate folders
id: init_folders
run: |
model_id=${{ github.event.inputs.model_id }}
echo "model_folder=ml-models/${{github.event.inputs.model_source}}/${model_id}" >> $GITHUB_OUTPUT
echo "sentence_transformer_folder=ml-models/${{github.event.inputs.model_source}}/${model_id%%/*}/" >> $GITHUB_OUTPUT
- name: Initiate workflow_info
id: init_workflow_info
run: |
embedding_dimension=${{ github.event.inputs.embedding_dimension }}
pooling_mode=${{ github.event.inputs.pooling_mode }}
model_description="${{ github.event.inputs.model_description }}"
workflow_info="
========= Workflow Details ==========
- Workflow Name: ${{ github.workflow }}
- Workflow Run ID: ${{ github.run_id }}
- Workflow Initiator: @${{ github.actor }}
========= Model Information =========
- Model ID: ${{ github.event.inputs.model_id }}
- Model Version: ${{ github.event.inputs.model_version }}
- Tracing Format: ${{ github.event.inputs.tracing_format }}
- Embedding Dimension: ${embedding_dimension:-Default}
- Pooling Mode: ${pooling_mode:-Default}
- Model Description: ${model_description:-Default}
========= Test Information ==========
- Embedding Verification: Passed"
echo "workflow_info<<EOF" >> $GITHUB_OUTPUT
echo "${workflow_info@E}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "${workflow_info@E}"
- name: Initiate license_line
id: init_license_line
run: |
echo "verified=:white_check_mark: — It is verified that this model is licensed under Apache 2.0" >> $GITHUB_OUTPUT
echo "unverified=- [ ] :warning: The license cannot be verified. Please confirm by yourself that the model is licensed under Apache 2.0 :warning:" >> $GITHUB_OUTPUT
outputs:
model_folder: ${{ steps.init_folders.outputs.model_folder }}
sentence_transformer_folder: ${{ steps.init_folders.outputs.sentence_transformer_folder }}
workflow_info: ${{ steps.init_workflow_info.outputs.workflow_info }}
verified_license_line: ${{ steps.init_license_line.outputs.verified }}
unverified_license_line: ${{ steps.init_license_line.outputs.unverified }}
# # Step 3: Check if the model already exists in the model hub
# checking-out-model-hub:
# needs: init-workflow-var
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# environment: opensearch-py-ml-cicd-env
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Set Up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.x'
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.PERSONAL_MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.PERSONAL_MODEL_UPLOADER_ROLE }}
# role-session-name: checking-out-model-hub
# - name: Check if TORCH_SCRIPT Model Exists
# if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ needs.init-workflow-var.outputs.sentence_transformer_folder }} ${{ github.event.inputs.model_id }} \
# ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
# aws s3api head-object --bucket ${{ secrets.PERSONAL_MODEL_BUCKET }} --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true
# if [[ -z $TORCH_MODEL_NOT_EXIST ]]
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1
# fi
# - name: Check if ONNX Model Exists
# if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ needs.init-workflow-var.outputs.sentence_transformer_folder }} ${{ github.event.inputs.model_id }} \
# ${{ github.event.inputs.model_version }} ONNX)
# aws s3api head-object --bucket ${{ secrets.PERSONAL_MODEL_BUCKET }} --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true
# if [[ -z $ONNX_MODEL_NOT_EXIST ]]
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1
# fi
# # Step 4: Trace the model, Verify the embeddings & Upload the model files as artifacts
# model-auto-tracing:
# needs: [init-workflow-var, checking-out-model-hub]
# name: model-auto-tracing
# runs-on: ubuntu-latest
# permissions:
# id-token: write
# contents: read
# environment: opensearch-py-ml-cicd-env
# strategy:
# matrix:
# cluster: ["opensearch"]
# secured: ["true"]
# entry:
# - { opensearch_version: 2.7.0 }
# steps:
# - name: Checkout
# uses: actions/checkout@v3
# - name: Export Arguments
# run: |
# echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
# echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
# echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
# echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
# echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
# echo "MODEL_DESCRIPTION=${{ github.event.inputs.model_description }}" >> $GITHUB_ENV
# - name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
# run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
# - name: License verification
# id: license_verification
# continue-on-error: true
# run: |
# apache_verified=$(<trace_output/apache_verified.txt)
# if [[ $apache_verified == "True" ]]
# then
# echo "license_line=${{ needs.init-workflow-var.outputs.verified_license_line }}" >> $GITHUB_OUTPUT
# else
# echo "license_line=${{ needs.init-workflow-var.outputs.unverified_license_line }}" >> $GITHUB_OUTPUT
# fi
# - name: Handle if license_verification fails
# if: steps.license_verification.outcome == 'failure'
# run: |
# echo "license_line=${{ needs.init-workflow-var.outputs.unverified_license_line }}" >> $GITHUB_OUTPUT
# - name: Upload Artifact
# uses: actions/upload-artifact@v3
# with:
# name: upload
# path: ./upload/
# retention-days: 5
# if-no-files-found: error
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.PERSONAL_MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.PERSONAL_MODEL_UPLOADER_ROLE }}
# role-session-name: model-auto-tracing
# - name: Dryrun model uploading
# id: dryrun_model_uploading
# run: |
# dryrun_output=$(aws s3 sync ./upload/ s3://${{ secrets.PERSONAL_MODEL_BUCKET }}/${{ needs.init-workflow-var.outputs.sentence_transformer_folder }} --dryrun \
# | sed 's|s3://${{ secrets.PERSONAL_MODEL_BUCKET }}/|s3://_MODEL_BUCKET_/|'
# )
# echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}"
# outputs:
# license_line: ${{ steps.license_verification.outputs.license_line }}
# dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }}
# # Step 5: Ask for manual approval from the CODEOWNERS
# manual-approval:
# needs: [init-workflow-var, model-auto-tracing]
# runs-on: 'ubuntu-latest'
# permissions:
# issues: write
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Get Approvers
# id: get_approvers
# run: |
# echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
# - name: Create Issue Body
# id: create_issue_body
# run: |
# issue_body="Please approve or deny opensearch-py-ml model uploading:
# ${{ needs.model-auto-tracing.outputs.license_line }}
# ${{ needs.init-workflow-var.outputs.workflow_info }}
# ===== Dry Run of Model Uploading =====
# ${{ needs.model-auto-tracing.outputs.dryrun_output }}"
# echo "issue_body<<EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}"
# - uses: trstringer/manual-approval@v1
# with:
# secret: ${{ github.TOKEN }}
# approvers: ${{ steps.get_approvers.outputs.approvers }}
# minimum-approvals: 1
# issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})"
# issue-body: ${{ steps.create_issue_body.outputs.issue_body }}
# exclude-workflow-initiator-as-approver: false
# # Step 6: Download the artifacts & Upload it to the S3 bucket
# model-uploading:
# needs: [init-workflow-var, manual-approval]
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# environment: opensearch-py-ml-cicd-env
# steps:
# - name: Download Artifact
# uses: actions/download-artifact@v2
# with:
# name: upload
# path: ./upload/
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.PERSONAL_MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.PERSONAL_MODEL_UPLOADER_ROLE }}
# role-session-name: model-uploading
# - name: Copy Files to the Bucket
# id: copying_to_bucket
# run: |
# aws s3 sync ./upload/ s3://${{ secrets.PERSONAL_MODEL_BUCKET }}/${{ needs.init-workflow-var.outputs.sentence_transformer_folder }}
# echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT
# outputs:
# upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }}
# # Step 7: Update MODEL_UPLOAD_HISTORY.md & supported_models.json
# history-update:
# needs: [init-workflow-var, model-uploading]
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: write
# pull-requests: write
# env:
# model_info: ${{ github.event.inputs.model_id }} (v.${{ github.event.inputs.model_version }})(${{ github.event.inputs.tracing_format }})
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Set Up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.x'
# - name: Install Packages
# run:
# python -m pip install mdutils
# - name: Update Model Upload History
# run: |
# model_description="${{ github.event.inputs.model_description }}"
# python utils/model_uploader/update_models_upload_history_md.py \
# ${{ github.event.inputs.model_id }} \
# ${{ github.event.inputs.model_version }} \
# ${{ github.event.inputs.tracing_format }} \
# -ed ${{ github.event.inputs.embedding_dimension }} \
# -pm ${{ github.event.inputs.pooling_mode }} \
# -md ${model_description:+"$model_description"} \
# -u ${{ github.actor }} -t "${{ needs.model-uploading.outputs.upload_time }}"
# - name: Create PR Body
# id: create_pr_body
# run: |
# pr_body="
# - [ ] This PR made commit to only these three files: MODEL_UPLOAD_HISTORY.md, supported_models.json, and CHANGELOG.md.
# - [ ] CHANGELOG.md has been updated by the workflow or by you if the workflow fails to do so.
# - [ ] Merge conflicts have been resolved.
# ${{ needs.init-workflow-var.outputs.workflow_info }}"
# echo "pr_body<<EOF" >> $GITHUB_OUTPUT
# echo "${pr_body@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${pr_body@E}"
# - name: Create a Branch & Raise a PR
# uses: peter-evans/create-pull-request@v5
# id: create_pr
# with:
# committer: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
# commit-message: 'GitHub Actions Workflow: Update Model Upload History - ${{ env.model_info }}'
# signoff: true
# title: 'Update Model Upload History - ${{ env.model_info }}'
# body: ${{ steps.create_pr_body.outputs.pr_body }}
# labels: ModelUploading
# branch: model-uploader/${{ github.run_id }}
# delete-branch: true
# add-paths: |
# ./utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md
# ./utils/model_uploader/upload_history/supported_models.json
# - name: Checkout Repository
# uses: actions/checkout@v3
# with:
# ref: model-uploader/${{ github.run_id }}
# - name: Create a line for updating CHANGELOG.md
# id: create_changelog_line
# continue-on-error: true
# run: |
# pr_ref="([#${{ steps.create_pr.outputs.pull-request-number }}](${{ steps.create_pr.outputs.pull-request-url }}))"
# changelog_line="Update model upload history - ${{ env.model_info }} by @${{ github.actor }} $pr_ref"
# echo "changelog_line=$changelog_line" >> $GITHUB_OUTPUT
# - name: Warning Comment on PR if create_changelog_line fails
# if: steps.create_changelog_line.outcome == 'failure'
# uses: thollander/actions-comment-pull-request@v2
# with:
# pr_number: ${{ steps.create_pr.outputs.pull-request-number }}
# message: |
# Warning:exclamation:: The workflow failed to update CHANGELOG.md. Please update CHANGELOG.md manually.
# - name: Update CHANGELOG.md
# if: steps.create_changelog_line.outcome == 'success'
# id: update_changelog
# continue-on-error: true
# run: |
# python utils/model_uploader/update_changelog_md.py "${{ steps.create_changelog_line.outputs.changelog_line }}"
# - name: Commit Updates
# if: steps.create_changelog_line.outcome == 'success' && steps.update_changelog.outcome == 'success'
# uses: stefanzweifel/git-auto-commit-action@v4
# id: commit
# with:
# branch: model-uploader/${{ github.run_id }}
# commit_user_email: "github-actions[bot]@users.noreply.github.com"
# commit_message: 'GitHub Actions Workflow: Update CHANGELOG.md - ${{ env.model_info }}'
# commit_options: '--signoff'
# file_pattern: CHANGELOG.md
# - name: Warning Comment on PR if update_changelog fails
# if: steps.create_changelog_line.outcome == 'success' && steps.update_changelog.outcome == 'failure'
# uses: thollander/actions-comment-pull-request@v2
# with:
# pr_number: ${{ steps.create_pr.outputs.pull-request-number }}
# message: |
# Warning:exclamation:: The workflow failed to update CHANGELOG.md. Please add the following line manually.
# ${{ steps.create_changelog_line.outputs.changelog_line }}
# Step 8: Trigger Jenkins ml-models workflow
trigger-model-release-workflow:
needs: init-workflow-var #[init-workflow-var, history-update]
runs-on: 'ubuntu-latest'
steps:
- name: Trigger Jenkins Workflow with Generic Webhook
run: |
JENKINS_URL=${{ secrets.JENKINS_URL }} # "https://build.ci.opensearch.org"
JENKINS_TRIGGER_TOKEN=${{ secrets.JENKINS_ML_MODELS_RELEASE_GENERIC_WEBHOOK_TOKEN }}
BASE_DOWNLOAD_PATH=${{ needs.init-workflow-var.outputs.model_folder }}
VERSION=${{ github.event.inputs.model_version }}
FORMAT=${{ github.event.inputs.tracing_format }}
JENKINS_PARAMS="{\"BASE_DOWNLOAD_PATH\":\"$BASE_DOWNLOAD_PATH\", \"VERSION\":\"$VERSION\", \"FORMAT\":\"$FORMAT\"}"
# JENKINS_PARAMS="{\"BASE_DOWNLOAD_PATH\":\"$BASE_DOWNLOAD_PATH\", \"VERSION\":\"$VERSION\"}"
# JENKINS_PARAMS="{\"parameter\": [{\"name\":\"BASE_DOWNLOAD_PATH\", \"value\":\"$BASE_DOWNLOAD_PATH\"},
# {\"name\":\"VERSION\", \"value\":\"$VERSION\"}, {\"FORMAT\":\"FORMAT\", \"value\":\"$FORMAT\"}]}"
# TODO: Set up JENKINS_TRIGGER_TOKEN
JENKINS_REQ=`curl -s -XPOST \
-H "Authorization: Bearer $JENKINS_TRIGGER_TOKEN" \
-H "Content-Type: application/json" \
"$JENKINS_URL/generic-webhook-trigger/invoke" \
--data "$(echo $JENKINS_PARAMS)"` # --data-urlencode json="$(echo $JENKINS_PARAMS)"`
echo $JENKINS_REQ