forked from opensearch-project/opensearch-py-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
239 lines (230 loc) · 9.57 KB
/
model_uploader.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
name: Model Auto-tracing & Uploading
on:
# Step 1: Initiate the workflow
workflow_dispatch:
inputs:
model_id:
description: "Model ID for auto-tracing and uploading (e.g. sentence-transformers/msmarco-distilbert-base-tas-b)"
required: true
type: string
model_version:
description: "Model version number (e.g. 1.0.1)"
required: true
type: string
tracing_format:
description: "Model format for auto-tracing (torch_script/onnx)"
required: true
type: choice
options:
- "BOTH"
- "TORCH_SCRIPT"
- "ONNX"
embedding_dimension:
description: "(Optional) Embedding Dimension (Specify here if it does not exist in original config.json file, or you want to overwrite it.)"
required: false
type: int
pooling_mode:
description: "(Optional) Pooling Mode (Specify here if it does not exist in original config.json file or you want to overwrite it.)"
required: false
type: choice
options:
- ""
- "CLS"
- "MEAN"
- "MAX"
- "MEAN_SQRT_LEN"
jobs:
# Step 2: Check if the model already exists in the model hub
checking-out-model-hub:
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: checking-out-model-hub
- name: Check if TORCH_SCRIPT Model Exists
if: github.event.inputs.tracing_format == 'TORCH_SCRIPT' || github.event.inputs.tracing_format == 'BOTH'
run: |
TORCH_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} TORCH_SCRIPT)
aws s3api head-object --bucket opensearch-exp --key $TORCH_FILE_PATH > /dev/null 2>&1 || TORCH_MODEL_NOT_EXIST=true
if [[ -z $TORCH_MODEL_NOT_EXIST ]];
then
echo "TORCH_SCRIPT Model already exists on model hub."
exit 1
fi
- name: Check if ONNX Model Exists
if: github.event.inputs.tracing_format == 'ONNX' || github.event.inputs.tracing_format == 'BOTH'
run: |
ONNX_FILE_PATH=$(python utils/model_uploader/save_model_file_path_to_env.py \
${{ github.event.inputs.model_id }} ${{ github.event.inputs.model_version }} ONNX)
aws s3api head-object --bucket opensearch-exp --key $ONNX_FILE_PATH > /dev/null 2>&1 || ONNX_MODEL_NOT_EXIST=true
if [[ -z $ONNX_MODEL_NOT_EXIST ]];
then
echo "TORCH_SCRIPT Model already exists on model hub."
exit 1;
fi
# Step 3: Trace the model, Verify the embeddings & Upload the model files as artifacts
model-auto-tracing:
needs: checking-out-model-hub
name: model-auto-tracing
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
strategy:
matrix:
cluster: ["opensearch"]
secured: ["true"]
entry:
- { opensearch_version: 2.7.0 }
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Export Arguments
run: |
echo "MODEL_ID=${{ github.event.inputs.model_id }}" >> $GITHUB_ENV
echo "MODEL_VERSION=${{ github.event.inputs.model_version }}" >> $GITHUB_ENV
echo "TRACING_FORMAT=${{ github.event.inputs.tracing_format }}" >> $GITHUB_ENV
echo "EMBEDDING_DIMENSION=${{ github.event.inputs.embedding_dimension }}" >> $GITHUB_ENV
echo "POOLING_MODE=${{ github.event.inputs.pooling_mode }}" >> $GITHUB_ENV
- name: Autotracing ${{ matrix.cluster }} secured=${{ matrix.secured }} version=${{matrix.entry.opensearch_version}}
run: "./.ci/run-tests ${{ matrix.cluster }} ${{ matrix.secured }} ${{ matrix.entry.opensearch_version }} trace"
- name: Upload Artifact
uses: actions/upload-artifact@v3
with:
name: upload
path: ./upload/
retention-days: 5
if-no-files-found: error
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: model-auto-tracing
- name: Dryrun model uploading
id: dryrun_model_uploading
run: |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun
dryrun_output=$(aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/ --dryrun)
echo "dryrun_output<<EOF" >> $GITHUB_OUTPUT
echo "${dryrun_output@E}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "${dryrun_output@E}"
outputs:
dryrun_output: ${{ steps.dryrun_model_uploading.outputs.dryrun_output }}
# Step 4: Ask for manual approval from the CODEOWNERS
manual-approval:
needs: model-auto-tracing
runs-on: 'ubuntu-latest'
permissions:
issues: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
- name: Get Approvers
id: get_approvers
run: |
echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
- name: Create Issue Body
id: create_issue_body
run: |
embedding_dimension=${{ github.event.inputs.embedding_dimension }}
pooling_mode=${{ github.event.inputs.pooling_mode }}
issue_body="Please approve or deny opensearch-py-ml model uploading:
========= Workflow Details ==========
- Workflow Name: ${{ github.workflow }}
- Workflow Initiator: @${{ github.actor }}
========= Model Information =========
- Model ID: ${{ github.event.inputs.model_id }}
- Model Version: ${{ github.event.inputs.model_version }}
- Tracing Format: ${{ github.event.inputs.tracing_format }}
- Embedding Dimension: ${embedding_dimension:-Default}
- Pooling Mode: ${pooling_mode:-Default}
===== Dry Run of Model Uploading =====
${{ needs.model-auto-tracing.outputs.dryrun_output }}"
echo "issue_body<<EOF" >> $GITHUB_OUTPUT
echo "${issue_body@E}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "${issue_body@E}"
- uses: trstringer/manual-approval@v1
with:
secret: ${{ github.TOKEN }}
approvers: ${{ steps.get_approvers.outputs.approvers }}
minimum-approvals: 1
issue-title: "Upload Model to OpenSearch Model Hub (${{ github.event.inputs.model_id }})"
issue-body: ${{ steps.create_issue_body.outputs.issue_body }}
exclude-workflow-initiator-as-approver: false
# Step 5: Download the artifacts & Upload it to the S3 bucket
model-uploading:
needs: manual-approval
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: read
steps:
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: upload
path: ./upload/
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: ${{ secrets.MODEL_UPLOADER_AWS_REGION }}
role-to-assume: ${{ secrets.MODEL_UPLOADER_ROLE }}
role-session-name: model-uploading
- name: Copy Files to the Bucket
id: copying_to_bucket
run: |
aws s3 sync ./upload/ s3://opensearch-exp/ml-models/huggingface/sentence-transformers/
echo "upload_time=$(TZ='America/Los_Angeles' date "+%Y-%m-%d %T")" >> $GITHUB_OUTPUT
outputs:
upload_time: ${{ steps.copying_to_bucket.outputs.upload_time }}
# Step 6: Update MODEL_UPLOAD_HISTORY.md & supported_models.json
history-update:
needs: model-uploading
runs-on: 'ubuntu-latest'
permissions:
id-token: write
contents: write
concurrency: ${{ github.workflow }}-concurrency
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set Up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install Packages
run:
python -m pip install mdutils
- name: Update MODEL_UPLOAD_HISTORY.md
run: |
python utils/model_uploader/update_models_upload_history_md.py \
${{ github.event.inputs.model_id }} \
${{ github.event.inputs.model_version }} \
${{ github.event.inputs.tracing_format }} \
-ed ${{ github.event.inputs.embedding_dimension }} \
-pm ${{ github.event.inputs.pooling_mode }} \
-u ${{ github.actor }} -t "${{ needs.model-uploading.outputs.upload_time }}"
- name: Commit Updates
uses: stefanzweifel/git-auto-commit-action@v4
id: commit
with:
commit_message: 'GitHub Actions Workflow - Update MODEL_UPLOAD_HISTORY.md (${{ github.event.inputs.model_id }})'
commit_options: '--signoff'
repository: ./utils/model_uploader/upload_history
file_pattern: MODEL_UPLOAD_HISTORY.md supported_models.json