Merge final-pr-1.0-model

Signed-off-by: Thanawan Atchariyachanvanit <[email protected]>
thanawan-atc · Aug 9, 2023 · 0f42710 · 0f42710
2 parents 7662b0e + 95d266a
commit 0f42710
Show file tree

Hide file tree

Showing 8 changed files with 118 additions and 43 deletions.
diff --git a/.ci/run-repository.sh b/.ci/run-repository.sh
@@ -70,9 +70,10 @@ elif [[ "$TASK_TYPE" == "trace" ]]; then
   echo -e "\033[34;1mINFO:\033[0m MODEL_ID: ${MODEL_ID}\033[0m"
   echo -e "\033[34;1mINFO:\033[0m MODEL_VERSION: ${MODEL_VERSION}\033[0m"
   echo -e "\033[34;1mINFO:\033[0m TRACING_FORMAT: ${TRACING_FORMAT}\033[0m"
-  echo -e "\033[34;1mINFO:\033[0m EMBEDDING_DIMENSION: ${EMBEDDING_DIMENSION:-Default}\033[0m"
-  echo -e "\033[34;1mINFO:\033[0m POOLING_MODE: ${POOLING_MODE:-Default}\033[0m"
-  echo -e "\033[34;1mINFO:\033[0m MODEL_DESCRIPTION: ${MODEL_DESCRIPTION:-Default}\033[0m"
+  echo -e "\033[34;1mINFO:\033[0m EMBEDDING_DIMENSION: ${EMBEDDING_DIMENSION:-N/A}\033[0m"
+  echo -e "\033[34;1mINFO:\033[0m POOLING_MODE: ${POOLING_MODE:-N/A}\033[0m"
+  echo -e "\033[34;1mINFO:\033[0m MODEL_DESCRIPTION: ${MODEL_DESCRIPTION:-N/A}\033[0m"
+
 
   docker run \
   --network=${network_name} \

diff --git a/.github/workflows/model_uploader.yml b/.github/workflows/model_uploader.yml
@@ -76,9 +76,15 @@ jobs:
         - Model ID: ${{ github.event.inputs.model_id }}
         - Model Version: ${{ github.event.inputs.model_version }}
         - Tracing Format: ${{ github.event.inputs.tracing_format }}
+<<<<<<< HEAD
         - Embedding Dimension: ${embedding_dimension:-Default}
         - Pooling Mode: ${pooling_mode:-Default}
         - Model Description: ${model_description:-Default}
+=======
+        - Embedding Dimension: ${embedding_dimension:-N/A}
+        - Pooling Mode: ${pooling_mode:-N/A}
+        - Model Description: ${model_description:-N/A}
+>>>>>>> final-pr-1.0-model
 
         ======== Workflow Output Information =========
         - Embedding Verification: Passed"
@@ -316,8 +322,8 @@ jobs:
             ${{ github.event.inputs.tracing_format }} \
             -ed ${{ github.event.inputs.embedding_dimension }} \
             -pm ${{ github.event.inputs.pooling_mode }} \
-            -md ${model_description:+"$model_description"} \
-            -u ${{ github.actor }} -t "${{ needs.model-uploading.outputs.upload_time }}"
+            -id ${{ github.run_id }} -u ${{ github.actor }} \
+            -t "${{ needs.model-uploading.outputs.upload_time }}"
       - name: Create PR Body
         id: create_pr_body
         run: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,15 @@
 # CHANGELOG
 Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 
+## [1.2.0]
+
+### Added
+
+### Changed
+
+### Fixed
+- Enable make_model_config_json to add model description to model config file by @thanawan-atc in ([#203](https://github.com/opensearch-project/opensearch-py-ml/pull/203))
+
 ## [1.1.0]
 
 ### Added

diff --git a/noxfile.py b/noxfile.py
@@ -162,7 +162,7 @@ def trace(session):
         "1500",
     )
     session.install(".")
-    
+
     session.run(
         "python",
         "utils/model_uploader/model_autotracing.py",

diff --git a/opensearch_py_ml/ml_models/sentencetransformermodel.py b/opensearch_py_ml/ml_models/sentencetransformermodel.py
@@ -1032,11 +1032,11 @@ def _get_model_description_from_readme_file(self, readme_file_path) -> str:
         readme_data = MarkDownFile.read_file(readme_file_path)
 
         # Find the description section
-        start_str = f"# {self.model_id}"
+        start_str = f"\n# {self.model_id}"
         start = readme_data.find(start_str)
         if start == -1:
             model_name = self.model_id.split("/")[1]
-            start_str = f"# {model_name}"
+            start_str = f"\n# {model_name}"
             start = readme_data.find(start_str)
         end = readme_data.find("\n#", start + len(start_str))
 

diff --git a/utils/model_uploader/model_autotracing.py b/utils/model_uploader/model_autotracing.py
@@ -11,19 +11,20 @@
 
 import argparse
 import json
-import numpy as np
 import os
 import shutil
 import sys
 import warnings
+from typing import List, Optional, Tuple
+from zipfile import ZipFile
+
+import numpy as np
 from mdutils.fileutils import MarkDownFile
 from numpy.typing import DTypeLike
 from sentence_transformers import SentenceTransformer
-from typing import List, Optional, Tuple
-from zipfile import ZipFile
 
-# We need to append ROOT_DIR path so that we can import 
-# OPENSEARCH_TEST_CLIENT and opensearch_py_ml since this 
+# We need to append ROOT_DIR path so that we can import
+# OPENSEARCH_TEST_CLIENT and opensearch_py_ml since this
 # python script is not in the root directory.
 THIS_DIR = os.path.dirname(__file__)
 ROOT_DIR = os.path.join(THIS_DIR, "../..")
@@ -429,12 +430,12 @@ def main(
     print("Tracing Format: ", tracing_format)
     print(
         "Embedding Dimension: ",
-        embedding_dimension if embedding_dimension is not None else "Default",
+        embedding_dimension if embedding_dimension is not None else "N/A",
     )
-    print("Pooling Mode: ", pooling_mode if pooling_mode is not None else "Default")
+    print("Pooling Mode: ", pooling_mode if pooling_mode is not None else "N/A")
     print(
         "Model Description: ",
-        model_description if model_description is not None else "Default",
+        model_description if model_description is not None else "N/A",
     )
     print("==========================================")
 

diff --git a/utils/model_uploader/update_changelog_md.py b/utils/model_uploader/update_changelog_md.py
@@ -5,16 +5,18 @@
 # Any modifications Copyright OpenSearch Contributors. See
 # GitHub history for details.
 
-# This program is run by "Model Auto-tracing & Uploading" workflow
-# (See model_uploader.yml) to update CHANGELOG.md after uploading the model
-# to our model hub.
+# This program is run by "Model Auto-tracing & Uploading"
+# & "Model Listing Uploading" workflow (See model_uploader.yml
+# & model_listing_uploader.yml) to update CHANGELOG.md after
+# uploading the model to our model hub.
 
 import argparse
 from mdutils.fileutils import MarkDownFile
 
 CHANGELOG_DIRNAME = "."
 CHANGELOG_FILENAME = "CHANGELOG.md"
-SECTION_NAME = "Changed"
+SUBSECTION_NAME = "Changed"
+PREV_SUBSECTION_NAME = "Added"
 
 
 def update_changelog_file(
@@ -30,14 +32,24 @@ def update_changelog_file(
     """
     changelog_data = MarkDownFile.read_file(f"{CHANGELOG_DIRNAME}/{CHANGELOG_FILENAME}")
 
-    this_version_ptr = changelog_data.find("## [")
-    assert this_version_ptr != -1, "Cannot find a version section in the CHANGELOG.md"
-    next_version_ptr = changelog_data.find("## [", this_version_ptr + 1)
+    # Find the most recent version section and pull it out
+    this_version_ptr = changelog_data.find("\n## ") + 1
+    assert this_version_ptr != 0, "Cannot find a version section in the CHANGELOG.md"
+    next_version_ptr = changelog_data.find("\n## ", this_version_ptr + 1) + 1
+    if next_version_ptr == 0:
+        next_version_ptr = -1
     this_version_section = changelog_data[this_version_ptr:next_version_ptr]
 
-    this_subsection_ptr = this_version_section.find(f"### {SECTION_NAME}")
-    if this_subsection_ptr != -1:
-        next_subsection_ptr = this_version_section.find("### ", this_subsection_ptr + 1)
+    # Find the sub-section SUBSECTION_NAME
+    this_subsection_ptr = this_version_section.find(f"\n### {SUBSECTION_NAME}") + 1
+    if this_subsection_ptr != 0:
+        # Case 1: Section SUBSECTION_NAME exists
+        # Append a change_log line to the end of that subsection if it exists
+        next_subsection_ptr = (
+            this_version_section.find("\n### ", this_subsection_ptr + 1) + 1
+        )
+        if next_subsection_ptr == 0:
+            next_subsection_ptr = -1
         this_subsection = this_version_section[
             this_subsection_ptr:next_subsection_ptr
         ].strip()
@@ -48,10 +60,47 @@ def update_changelog_file(
             + this_version_section[next_subsection_ptr:]
         )
     else:
-        this_subsection = this_version_section.strip()
-        this_subsection += "\n\n" + f"### {SECTION_NAME}\n- " + changelog_line + "\n\n"
-        new_version_section = this_subsection
+        # Case 2: Sub-section SUBSECTION_NAME does not exist
+        # Create sub-section SUBSECTION_NAME and add a change_log line
+        this_subsection = f"### {SUBSECTION_NAME}\n- {changelog_line}\n\n"
+        prev_subsection_ptr = (
+            this_version_section.find(f"\n### {PREV_SUBSECTION_NAME}") + 1
+        )
+        if prev_subsection_ptr != 0:
+            # Case 2.1: Sub-section PREV_SUBSECTION_NAME exist
+            # Add a sub-section SUBSECTION_NAME after PREV_SUBSECTION_NAME if PREV_SUBSECTION_NAME exists
+            next_subsection_ptr = (
+                this_version_section.find("\n### ", prev_subsection_ptr + 1) + 1
+            )
+            prev_subsection = this_version_section[
+                prev_subsection_ptr:next_subsection_ptr
+            ].strip()
+            new_version_section = (
+                this_version_section[:prev_subsection_ptr]
+                + prev_subsection
+                + "\n\n"
+                + this_subsection
+                + this_version_section[next_subsection_ptr:]
+            )
+        else:
+            # Case 2.2: Sub-section PREV_SUBSECTION_NAME does not exist
+            next_subsection_ptr = this_version_section.find("\n### ") + 1
+            if next_subsection_ptr != 0:
+                # Case 2.2.1: There exists other sub-section in this version section
+                # Add a sub-section SECTION_NAME before other sub-sections
+                new_version_section = (
+                    this_version_section[:next_subsection_ptr]
+                    + this_subsection
+                    + this_version_section[next_subsection_ptr:]
+                )
+            else:
+                # Case 2.2.2: There isn't any other sub-section in this version section
+                # Add a sub-section SECTION_NAME after version headline
+                new_version_section = (
+                    this_version_section.strip() + "\n\n" + this_subsection
+                )
 
+    # Insert new_version_section back to the document
     new_changelog_data = (
         changelog_data[:this_version_ptr]
         + new_version_section

diff --git a/utils/model_uploader/update_models_upload_history_md.py b/utils/model_uploader/update_models_upload_history_md.py
@@ -13,6 +13,7 @@
 import json
 import os
 from typing import Dict, List, Optional
+
 from mdutils.fileutils import MarkDownFile
 from mdutils.tools.Table import Table
 
@@ -32,7 +33,7 @@
     "Model Format",
     "Embedding Dimension",
     "Pooling Mode",
-    "Model Description",
+    "Workflow Run ID",
 ]
 MD_HEADER = "# Pretrained Model Upload History\n\nThe model-serving framework supports a variety of open-source pretrained models that can assist with a range of machine learning (ML) search and analytics use cases. \n\n\n## Uploaded Pretrained Models\n\n\n### Sentence transformers\n\nSentence transformer models map sentences and paragraphs across a dimensional dense vector space. The number of vectors depends on the model. Use these models for use cases such as clustering and semantic search. \n\nThe following table shows sentence transformer model upload history.\n\n[//]: # (This may be the most platform independent comment)\n"
 
@@ -43,7 +44,7 @@ def create_model_json_obj(
     model_format: str,
     embedding_dimension: Optional[int] = None,
     pooling_mode: Optional[str] = None,
-    model_description: Optional[str] = None,
+    workflow_id: Optional[str] = None,
     model_uploader: Optional[str] = None,
     upload_time: Optional[str] = None,
 ) -> Dict:
@@ -60,8 +61,13 @@ def create_model_json_obj(
     :type embedding_dimension: int
     :param pooling_mode: Pooling mode input ("CLS", "MEAN", "MAX", "MEAN_SQRT_LEN" or None)
     :type pooling_mode: string
+<<<<<<< HEAD
     :param model_description: Model description input
     :type model_description: string
+=======
+    :param workflow_id: Workflow run id
+    :type workflow_id: string
+>>>>>>> final-pr-1.0-model
     :param model_uploader: Model uploader input
     :type model_uploader: string
     :param uploader_time: Upload time input
@@ -77,11 +83,9 @@ def create_model_json_obj(
         "Model Format": model_format,
         "Embedding Dimension": str(embedding_dimension)
         if embedding_dimension is not None
-        else "Default",
-        "Pooling Mode": pooling_mode if pooling_mode is not None else "Default",
-        "Model Description": model_description
-        if model_description is not None
-        else "Default",
+        else "N/A",
+        "Pooling Mode": pooling_mode if pooling_mode is not None else "N/A",
+        "Workflow Run ID": workflow_id if workflow_id is not None else "-"
     }
     return model_obj
 
@@ -113,7 +117,7 @@ def update_model_json_file(
     tracing_format: str,
     embedding_dimension: Optional[int] = None,
     pooling_mode: Optional[str] = None,
-    model_description: Optional[str] = None,
+    workflow_id: Optional[str] = None,
     model_uploader: Optional[str] = None,
     upload_time: Optional[str] = None,
 ) -> None:
@@ -130,8 +134,13 @@ def update_model_json_file(
     :type embedding_dimension: int
     :param pooling_mode: Pooling mode input ("CLS", "MEAN", "MAX", "MEAN_SQRT_LEN" or None)
     :type pooling_mode: string
+<<<<<<< HEAD
     :param model_description: Model description input
     :type model_description: string
+=======
+    :param workflow_id: Workflow run id
+    :type workflow_id: string
+>>>>>>> final-pr-1.0-model
     :param model_uploader: Model uploader input
     :type model_uploader: string
     :param uploader_time: Upload time input
@@ -153,7 +162,7 @@ def update_model_json_file(
             TORCH_SCRIPT_FORMAT,
             embedding_dimension,
             pooling_mode,
-            model_description,
+            workflow_id,
             model_uploader,
             upload_time,
         )
@@ -166,7 +175,7 @@ def update_model_json_file(
             ONNX_FORMAT,
             embedding_dimension,
             pooling_mode,
-            model_description,
+            workflow_id,
             model_uploader,
             upload_time,
         )
@@ -242,13 +251,13 @@ def update_md_file():
         help="Pooling mode if it does not exist in original config.json",
     )
     parser.add_argument(
-        "-md",
-        "--model_description",
+        "-id",
+        "--workflow_id",
         type=str,
         nargs="?",
         default=None,
         const=None,
-        help="Model description if you want to overwrite the default description",
+        help="Workflow Run ID",
     )
     parser.add_argument(
         "-u",
@@ -276,7 +285,7 @@ def update_md_file():
         args.tracing_format,
         args.embedding_dimension,
         args.pooling_mode,
-        args.model_description,
+        args.workflow_id,
         args.model_uploader,
         args.upload_time,
     )