From d6ec5155138fc3cfd0a4b8bb6114348d126bc09a Mon Sep 17 00:00:00 2001 From: conggguan Date: Tue, 6 Aug 2024 17:11:11 +0800 Subject: [PATCH 1/3] [Feature] Add a workflow parameter that model uploader can specific a customize prefix. Signed-off-by: conggguan --- .github/workflows/model_uploader.yml | 11 ++++++++++- CHANGELOG.md | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/model_uploader.yml b/.github/workflows/model_uploader.yml index 1c7362b2..cdfc3c28 100644 --- a/.github/workflows/model_uploader.yml +++ b/.github/workflows/model_uploader.yml @@ -24,6 +24,10 @@ on: - "BOTH" - "TORCH_SCRIPT" - "ONNX" + upload_prefix: + description: "Specifies the model prefix for uploading. For example, transforming the default path from '.../sentence-transformers/msmarco-distilbert-base-tas-b' to '.../{prefix}/msmarco-distilbert-base-tas-b'." + required: false + type: string model_type: description: "Model type for auto-tracing (SentenceTransformer/Sparse)" required: true @@ -74,7 +78,12 @@ jobs: run: | model_id=${{ github.event.inputs.model_id }} echo "model_folder=ml-models/${{github.event.inputs.model_source}}/${model_id}" >> $GITHUB_OUTPUT - echo "model_prefix_folder=ml-models/${{github.event.inputs.model_source}}/${model_id%%/*}/" >> $GITHUB_OUTPUT + if [[ -n "${{ github.event.inputs.upload_prefix }}" ]]; then + model_prefix="ml-models/${{ github.event.inputs.model_source }}/${{ github.event.inputs.upload_prefix }}" + else + model_prefix="ml-models/${{ github.event.inputs.model_source }}/${model_id%%/*}" + fi + echo "model_prefix_folder=$model_prefix" >> $GITHUB_OUTPUT - name: Initiate workflow_info id: init_workflow_info run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index f8168762..49a52003 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Add workflows and scripts for sparse encoding model tracing and uploading process by @conggguan in ([#394](https://github.com/opensearch-project/opensearch-py-ml/pull/394)) ### Changed +- Add a parameter for customize the upload folder prefix ([#398](https://github.com/opensearch-project/opensearch-py-ml/pull/398)) - Modify ml-models.JenkinsFile so that it takes model format into account and can be triggered with generic webhook by @thanawan-atc in ([#211](https://github.com/opensearch-project/opensearch-py-ml/pull/211)) - Update demo_tracing_model_torchscript_onnx.ipynb to use make_model_config_json by @thanawan-atc in ([#220](https://github.com/opensearch-project/opensearch-py-ml/pull/220)) - Bump torch from 1.13.1 to 2.0.1 and add onnx dependency by @thanawan-atc ([#237](https://github.com/opensearch-project/opensearch-py-ml/pull/237)) From d134be6c08e189ab6b98510416a7f70e5abebac9 Mon Sep 17 00:00:00 2001 From: conggguan Date: Wed, 7 Aug 2024 12:15:07 +0800 Subject: [PATCH 2/3] [Fix] To fix the Jekins trigger's wrong folder parameters bug. Signed-off-by: conggguan --- .github/workflows/model_uploader.yml | 4 ++-- CHANGELOG.md | 2 +- .../upload_history/MODEL_UPLOAD_HISTORY.md | 3 +-- .../upload_history/supported_models.json | 10 ---------- 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/model_uploader.yml b/.github/workflows/model_uploader.yml index cdfc3c28..d8b04e58 100644 --- a/.github/workflows/model_uploader.yml +++ b/.github/workflows/model_uploader.yml @@ -77,12 +77,12 @@ jobs: id: init_folders run: | model_id=${{ github.event.inputs.model_id }} - echo "model_folder=ml-models/${{github.event.inputs.model_source}}/${model_id}" >> $GITHUB_OUTPUT if [[ -n "${{ github.event.inputs.upload_prefix }}" ]]; then model_prefix="ml-models/${{ github.event.inputs.model_source }}/${{ github.event.inputs.upload_prefix }}" else model_prefix="ml-models/${{ github.event.inputs.model_source }}/${model_id%%/*}" fi + echo "model_folder=$model_prefix/${model_id##*/}" >> $GITHUB_OUTPUT echo "model_prefix_folder=$model_prefix" >> $GITHUB_OUTPUT - name: Initiate workflow_info id: init_workflow_info @@ -446,4 +446,4 @@ jobs: version=${{ github.event.inputs.model_version }} format=${{ github.event.inputs.tracing_format }} jenkins_params="{\"BASE_DOWNLOAD_PATH\":\"$base_download_path\", \"VERSION\":\"$version\", \"FORMAT\":\"$format\"}" - sh utils/model_uploader/trigger_ml_models_release.sh $jenkins_trigger_token "$jenkins_params" + sh utils/model_uploader/trigger_ml_models_release.sh $jenkins_trigger_token "$jenkins_params" \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a31da865..5b730378 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,9 +40,9 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Removed pandas version pin in nox tests by @rawwar ([#368](https://github.com/opensearch-project/opensearch-py-ml/pull/368)) - Switch AL2 to AL2023 agent and DockerHub to ECR images in ml-models.JenkinsFile ([#377](https://github.com/opensearch-project/opensearch-py-ml/pull/377)) - Refactored validators in ML Commons' client([#385](https://github.com/opensearch-project/opensearch-py-ml/pull/385)) -- Update model upload history - opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill (v.1.0.0)(TORCH_SCRIPT) by @dhrubo-os ([#400](https://github.com/opensearch-project/opensearch-py-ml/pull/400)) ### Fixed +- Fix the wrong input parameter for model_uploader's base_download_path in jekins trigger.([#402](https://github.com/opensearch-project/opensearch-py-ml/pull/402)) - Enable make_model_config_json to add model description to model config file by @thanawan-atc in ([#203](https://github.com/opensearch-project/opensearch-py-ml/pull/203)) - Correct demo_ml_commons_integration.ipynb by @thanawan-atc in ([#208](https://github.com/opensearch-project/opensearch-py-ml/pull/208)) - Handle the case when the model max length is undefined in tokenizer by @thanawan-atc in ([#219](https://github.com/opensearch-project/opensearch-py-ml/pull/219)) diff --git a/utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md b/utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md index 4ff25eef..e1bd23e1 100644 --- a/utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md +++ b/utils/model_uploader/upload_history/MODEL_UPLOAD_HISTORY.md @@ -20,5 +20,4 @@ The following table shows sentence transformer model upload history. |2023-08-31 15:22:19|@dhrubo-os|`sentence-transformers/msmarco-distilbert-base-tas-b`|1.0.2|TORCH_SCRIPT|N/A|N/A|6042401385| |2023-09-13 18:03:32|@dhrubo-os|`sentence-transformers/distiluse-base-multilingual-cased-v1`|1.0.1|TORCH_SCRIPT|N/A|N/A|6178024517| |2023-10-18 18:06:15|@dhrubo-os|`sentence-transformers/paraphrase-mpnet-base-v2`|1.0.0|ONNX|N/A|N/A|6568285400| -|2023-10-18 18:06:15|@dhrubo-os|`sentence-transformers/paraphrase-mpnet-base-v2`|1.0.0|TORCH_SCRIPT|N/A|N/A|6568285400| -|2024-08-06 12:42:00|@dhrubo-os|`opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill`|1.0.0|TORCH_SCRIPT|N/A|N/A|10271804648| +|2023-10-18 18:06:15|@dhrubo-os|`sentence-transformers/paraphrase-mpnet-base-v2`|1.0.0|TORCH_SCRIPT|N/A|N/A|6568285400| \ No newline at end of file diff --git a/utils/model_uploader/upload_history/supported_models.json b/utils/model_uploader/upload_history/supported_models.json index b0c33980..ce09ec4c 100644 --- a/utils/model_uploader/upload_history/supported_models.json +++ b/utils/model_uploader/upload_history/supported_models.json @@ -48,15 +48,5 @@ "Embedding Dimension": "N/A", "Pooling Mode": "N/A", "Workflow Run ID": "6568285400" - }, - { - "Model Uploader": "@dhrubo-os", - "Upload Time": "2024-08-06 12:42:00", - "Model ID": "opensearch-project/opensearch-neural-sparse-encoding-doc-v2-distill", - "Model Version": "1.0.0", - "Model Format": "TORCH_SCRIPT", - "Embedding Dimension": "N/A", - "Pooling Mode": "N/A", - "Workflow Run ID": "10271804648" } ] \ No newline at end of file From 208d343d31491895e4c82a97443d8b5f77ab03b7 Mon Sep 17 00:00:00 2001 From: conggguan Date: Thu, 8 Aug 2024 01:49:42 +0800 Subject: [PATCH 3/3] [Comments] Add a comments for model uploader workflow's [Initiate folders]. Signed-off-by: conggguan --- .github/workflows/model_uploader.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/model_uploader.yml b/.github/workflows/model_uploader.yml index d8b04e58..dae40f31 100644 --- a/.github/workflows/model_uploader.yml +++ b/.github/workflows/model_uploader.yml @@ -74,6 +74,16 @@ jobs: echo "This workflow should only be triggered on 'main' branch" exit 1 - name: Initiate folders + # This scripts init the folders path variables. + # 1. Retrieves the input model_id. + # 2. If upload_prefix is provided, constructs model_prefix using upload_prefix and model_source. + # - model_prefix: "ml-models/{model_source}/{upload_prefix}" + # 3. If upload_prefix is not provided, it constructs model_prefix using model_source and the prefix part of model_id. + # - The prefix part is the substring before the first '/' in model_id. + # Example: + # - Given model_id: "opensearch-project/opensearch-neural-sparse-encoding-v1" + # - model_prefix: "ml-models/{model_source}/opensearch-project" + # 4. Constructs model_folder and model_prefix_folder. id: init_folders run: | model_id=${{ github.event.inputs.model_id }}