Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Testing GitHub Environment] Automating PR #196

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 31 additions & 7 deletions .ci/run-repository.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash

# Called by entry point `run-test` use this script to add your repository specific test commands
# Called by entry point `run-test` use this script to add your repository specific task commands
# Once called opensearch is up and running and the following parameters are available to this script

# OPENSEARCH_VERSION -- version e.g Major.Minor.Patch(-Prelease)
Expand All @@ -16,7 +16,7 @@ set -e
echo -e "\033[34;1mINFO:\033[0m URL ${opensearch_url}\033[0m"
echo -e "\033[34;1mINFO:\033[0m EXTERNAL OS URL ${external_opensearch_url}\033[0m"
echo -e "\033[34;1mINFO:\033[0m VERSION ${OPENSEARCH_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m IS_DOC: ${IS_DOC}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TASK_TYPE: ${TASK_TYPE}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TEST_SUITE ${TEST_SUITE}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PYTHON_VERSION ${PYTHON_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m PYTHON_CONNECTION_CLASS ${PYTHON_CONNECTION_CLASS}\033[0m"
Expand All @@ -33,7 +33,8 @@ docker build \
echo -e "\033[1m>>>>> Run [opensearch-project/opensearch-py-ml container] >>>>>>>>>>>>>>>>>>>>>>>>>>>>>\033[0m"


if [[ "$IS_DOC" == "false" ]]; then
if [[ "$TASK_TYPE" == "test" ]]; then
# Set up OpenSearch cluster & Run test (Invoked by integration.yml workflow)
docker run \
--network=${network_name} \
--env "STACK_VERSION=${STACK_VERSION}" \
Expand All @@ -45,10 +46,11 @@ if [[ "$IS_DOC" == "false" ]]; then
--name opensearch-py-ml-test-runner \
opensearch-project/opensearch-py-ml \
nox -s "test-${PYTHON_VERSION}(pandas_version='${PANDAS_VERSION}')"

docker cp opensearch-py-ml-test-runner:/code/opensearch-py-ml/junit/ ./junit/

docker rm opensearch-py-ml-test-runner
else
elif [[ "$TASK_TYPE" == "doc" ]]; then
# Set up OpenSearch cluster & Run docs (Invoked by build_deploy_doc.yml workflow)
docker run \
--network=${network_name} \
--env "STACK_VERSION=${STACK_VERSION}" \
Expand All @@ -60,7 +62,29 @@ else
--name opensearch-py-ml-doc-runner \
opensearch-project/opensearch-py-ml \
nox -s docs

docker cp opensearch-py-ml-doc-runner:/code/opensearch-py-ml/docs/build/ ./docs/

docker rm opensearch-py-ml-doc-runner
fi
elif [[ "$TASK_TYPE" == "trace" ]]; then
# Set up OpenSearch cluster & Run model autotracing (Invoked by model_uploader.yml workflow)
echo -e "\033[34;1mINFO:\033[0m MODEL_ID: ${MODEL_ID}\033[0m"
echo -e "\033[34;1mINFO:\033[0m MODEL_VERSION: ${MODEL_VERSION}\033[0m"
echo -e "\033[34;1mINFO:\033[0m TRACING_FORMAT: ${TRACING_FORMAT}\033[0m"
echo -e "\033[34;1mINFO:\033[0m EMBEDDING_DIMENSION: ${EMBEDDING_DIMENSION:-N/A}\033[0m"
echo -e "\033[34;1mINFO:\033[0m POOLING_MODE: ${POOLING_MODE:-N/A}\033[0m"

docker run \
--network=${network_name} \
--env "STACK_VERSION=${STACK_VERSION}" \
--env "OPENSEARCH_URL=${opensearch_url}" \
--env "OPENSEARCH_VERSION=${OPENSEARCH_VERSION}" \
--env "TEST_SUITE=${TEST_SUITE}" \
--env "PYTHON_CONNECTION_CLASS=${PYTHON_CONNECTION_CLASS}" \
--env "TEST_TYPE=server" \
--name opensearch-py-ml-trace-runner \
opensearch-project/opensearch-py-ml \
nox -s trace -- ${MODEL_ID} ${MODEL_VERSION} ${TRACING_FORMAT} -ed ${EMBEDDING_DIMENSION} -pm ${POOLING_MODE}

docker cp opensearch-py-ml-trace-runner:/code/opensearch-py-ml/upload/ ./upload/
docker rm opensearch-py-ml-trace-runner
fi
2 changes: 1 addition & 1 deletion .ci/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export PYTHON_CONNECTION_CLASS="${PYTHON_CONNECTION_CLASS:=Urllib3HttpConnection
export CLUSTER="${1:-opensearch}"
export SECURE_INTEGRATION="${2:-true}"
export OPENSEARCH_VERSION="${3:-latest}"
export IS_DOC="${4:-false}"
export TASK_TYPE="${4:-test}"
if [[ "$SECURE_INTEGRATION" == "true" ]]; then
export OPENSEARCH_URL_EXTENSION="https"
else
Expand Down
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1 +1 @@
* @dhrubo-os @greaa-aws @ylwu-amzn @b4sjoo @jngz-es @rbhavna
* @thanawan-atc
2 changes: 1 addition & 1 deletion .github/workflows/build_deploy_doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Checkout Repository
uses: actions/checkout@v2
- name: Integ ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} true"
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} doc"
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v2
- name: Integ ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }}"
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} test"
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v2
with:
Expand Down
246 changes: 246 additions & 0 deletions .github/workflows/model_uploader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
name: Model Auto-tracing & Uploading
on:
# Step 1: Initiate the workflow
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"

jobs:
# # Step 2: Check if the model already exists in the model hub
# checking-out-model-hub:
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Set Up Python
# uses: actions/setup-python@v2
# with:
# python-version: '3.x'
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: checking-out-model-hub
# - name: Check if TORCH_SCRIPT Model Exists
# if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
# aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true
# if [[ -z $TORCH_MODEL_NOT_EXIST ]];
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1
# fi
# - name: Check if ONNX Model Exists
# if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH'
# run: |
# ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
# ${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX)
# aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true
# if [[ -z $ONNX_MODEL_NOT_EXIST ]];
# then
# echo "TORCH_SCRIPT Model already exists on model hub."
# exit 1;
# fi

# # Step 3: Trace the model, Verify the embeddings & Upload the model files as artifacts
# model-auto-tracing:
# needs: checking-out-model-hub
# name: model-auto-tracing
# runs-on: ubuntu-latest
# permissions:
# id-token: write
# contents: read
# strategy:
# matrix:
# cluster: ["opensearch"]
# secured: ["true"]
# entry:
# - { opensearch_version: 2.7.0 }
# steps:
# - name: Checkout
# uses: actions/checkout@v2
# - name: Export Arguments
# run: |
# echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
# echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
# echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
# echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
# echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
# - name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
# run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
# - name: Upload Artifact
# uses: actions/upload-artifact@v3
# with:
# name: upload
# path: ./upload/
# retention-days: 5
# if-no-files-found: error
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: model-auto-tracing
# - name: Dryrun model uploading
# id: dryrun_model_uploading
# run: |
# aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun
# dryrun_output=$(aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun)
# echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${dryrun_output@E}"
# outputs:
# dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }}

# # Step 4: Ask for manual approval from the CODEOWNERS
# manual-approval:
# needs: model-auto-tracing
# runs-on: 'ubuntu-latest'
# permissions:
# issues: write
# steps:
# - name: Checkout Repository
# uses: actions/checkout@v3
# - name: Get Approvers
# id: get_approvers
# run: |
# echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
# - name: Create Issue Body
# id: create_issue_body
# run: |
# embedding_dimension=${{ github.event.inputs.embedding_dimension }}
# pooling_mode=${{ github.event.inputs.pooling_mode }}
# issue_body="Please approve or deny opensearch-py-ml model uploading:

# ========= Workflow Details ==========
# - Workflow Name: ${{ github.workflow }}
# - Workflow Initiator: @${{ github.actor }}

# ========= Model Information =========
# - Model ID: ${{ github.event.inputs.model_id }}
# - Model Version: ${{ github.event.inputs.model_version }}
# - Tracing Format: ${{ github.event.inputs.tracing_format }}
# - Embedding Dimension: ${embedding_dimension:-Default}
# - Pooling Mode: ${pooling_mode:-Default}

# ===== Dry Run of Model Uploading =====
# ${{ needs.model-auto-tracing.outputs.dryrun_output }}"

# echo "issue_body<<EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}" >> $GITHUB_OUTPUT
# echo "EOF" >> $GITHUB_OUTPUT
# echo "${issue_body@E}"
# - uses: trstringer/manual-approval@v1
# with:
# secret: ${{ github.TOKEN }}
# approvers: ${{ steps.get_approvers.outputs.approvers }}
# minimum-approvals: 1
# issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})"
# issue-body: ${{ steps.create_issue_body.outputs.issue_body }}
# exclude-workflow-initiator-as-approver: false

# # Step 5: Download the artifacts & Upload it to the S3 bucket
# model-uploading:
# needs: manual-approval
# runs-on: 'ubuntu-latest'
# permissions:
# id-token: write
# contents: read
# steps:
# - name: Download Artifact
# uses: actions/download-artifact@v2
# with:
# name: upload
# path: ./upload/
# - name: Configure AWS Credentials
# uses: aws-actions/configure-aws-credentials@v2
# with:
# aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
# role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
# role-session-name: model-uploading
# - name: Copy Files to the Bucket
# id: copying_to_bucket
# run: |
# aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/
# echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT
# outputs:
# upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }}

# Step 6: Update MODEL_UPLOAD_HISTORY.md & supported_models.json
history-update:
#needs: model-uploading
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: write
pull-requests: write
concurrency: ${{ github.workflow }}-concurrency
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install Packages
run:
python -m pip install mdutils
- name: Update MODEL_UPLOAD_HISTORY.md
run: |
python utils/model_uploader/update_models_upload_history_md.py \
${{ github.event.inputs.model_id }} \
${{ github.event.inputs.model_version }} \
${{ github.event.inputs.tracing_format }} \
-ed ${{ github.event.inputs.embedding_dimension }} \
-pm ${{ github.event.inputs.pooling_mode }} \
-u ${{ github.actor }} #-t "${{ needs.model-uploading.outputs.upload_time }}"
- name: Create PR
uses: peter-evans/create-pull-request@v5
id: commit
with:
committer: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com>
commit-message: 'GitHub Actions Workflow - Update MODEL_UPLOAD_HISTORY.md (${{ github.event.inputs.model_id }})'
signoff: true
title: 'Upload Model to OpenSearch Model Hub: ${{ github.event.inputs.model_id }} (v.${{ github.event.inputs.model_version }})'
labels: ModelUpload
base: workflow-nox-cleanup-pr
delete-branch: true
add-paths: |
./utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md
./utils/model_uploader/upload_history/supported_models.json
22 changes: 19 additions & 3 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
@nox.session(reuse_venv=True)
def format(session):
session.install("black", "isort", "flynt")
session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES)
session.run("python", "utils/lint/license-headers.py", "fix", *SOURCE_FILES)
session.run("flynt", *SOURCE_FILES)
session.run("black", "--target-version=py38", *SOURCE_FILES)
session.run("isort", "--profile=black", *SOURCE_FILES)
Expand All @@ -73,7 +73,7 @@ def lint(session):
# Install numpy to use its mypy plugin
# https://numpy.org/devdocs/reference/typing.html#mypy-plugin
session.install("black", "flake8", "mypy", "isort", "numpy")
session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
session.run("python", "utils/lint/license-headers.py", "check", *SOURCE_FILES)
session.run("black", "--check", "--target-version=py38", *SOURCE_FILES)
session.run("isort", "--check", "--profile=black", *SOURCE_FILES)
session.run("flake8", "--ignore=E501,W503,E402,E712,E203", *SOURCE_FILES)
Expand Down Expand Up @@ -142,10 +142,26 @@ def test(session, pandas_version: str):
@nox.session(reuse_venv=True)
def docs(session):
# Run this so users get an error if they don't have Pandoc installed.

session.install("-r", "docs/requirements-docs.txt")
session.install(".")

session.cd("docs")
session.run("make", "clean", external=True)
session.run("make", "html", external=True)


@nox.session
def trace(session):
session.install(
"-r",
"requirements-dev.txt",
"--timeout",
"1500",
)
session.install(".")

session.run(
"python",
"utils/model_uploader/model_autotracing.py",
*(session.posargs),
)
File renamed without changes.
Loading
Loading