From bc4cc78b0f2e401577ea8f6f8bec5373b7f61701 Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Thu, 17 Aug 2023 00:23:57 -0700 Subject: [PATCH] Add tests for PR2 Signed-off-by: Thanawan Atchariyachanvanit --- .../e5-small-v2/1.0.1/onnx/config.json | 1 + .../1.0.1/torch_script/config.json | 1 + .../1.0.1/torch_script/config.json | 1 + .../1.0.1/onnx/config.json | 1 + .../1.0.1/torch_script/config.json | 1 + .../2.0.0/torch_script/config.json | 1 + .../ml_model_listing/samples/config_paths.txt | 1 + .../samples/pretrained_model_listing.json | 53 ++++++++ .../test_update_pretrained_model_listing.py | 126 ++++++++++++++++++ .../update_pretrained_model_listing.py | 58 ++++---- 10 files changed, 217 insertions(+), 27 deletions(-) create mode 100644 tests/ml_model_listing/samples/config_folder/intfloat/e5-small-v2/1.0.1/onnx/config.json create mode 100644 tests/ml_model_listing/samples/config_folder/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json create mode 100644 tests/ml_model_listing/samples/config_folder/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json create mode 100644 tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json create mode 100644 tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json create mode 100644 tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json create mode 100644 tests/ml_model_listing/samples/config_paths.txt create mode 100644 tests/ml_model_listing/samples/pretrained_model_listing.json create mode 100644 tests/ml_model_listing/test_update_pretrained_model_listing.py diff --git a/tests/ml_model_listing/samples/config_folder/intfloat/e5-small-v2/1.0.1/onnx/config.json b/tests/ml_model_listing/samples/config_folder/intfloat/e5-small-v2/1.0.1/onnx/config.json new file mode 100644 index 00000000..0784b70f --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/intfloat/e5-small-v2/1.0.1/onnx/config.json @@ -0,0 +1 @@ +{"name": "intfloat/e5-small-v2", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "bert", "embedding_dimension": 384, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/intfloat_e5-small-v2/\", \"architectures\": [\"BertModel\"], \"attention_probs_dropout_prob\": 0.1, \"classifier_dropout\": null, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 384, \"initializer_range\": 0.02, \"intermediate_size\": 1536, \"layer_norm_eps\": 1e-12, \"max_position_embeddings\": 512, \"model_type\": \"bert\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 0, \"position_embedding_type\": \"absolute\", \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"type_vocab_size\": 2, \"use_cache\": true, \"vocab_size\": 30522}"}} diff --git a/tests/ml_model_listing/samples/config_folder/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json b/tests/ml_model_listing/samples/config_folder/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json new file mode 100644 index 00000000..bc543aec --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json @@ -0,0 +1 @@ +{"name": "jhgan/ko-sroberta-multitask", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "roberta", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/jhgan_ko-sroberta-multitask/\", \"architectures\": [\"RobertaModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"classifier_dropout\": null, \"eos_token_id\": 2, \"gradient_checkpointing\": false, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"roberta\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"position_embedding_type\": \"absolute\", \"tokenizer_class\": \"BertTokenizer\", \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"type_vocab_size\": 1, \"use_cache\": true, \"vocab_size\": 32000}"}} diff --git a/tests/ml_model_listing/samples/config_folder/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json b/tests/ml_model_listing/samples/config_folder/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json new file mode 100644 index 00000000..d4bfbddc --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json @@ -0,0 +1 @@ +{"name": "sentence-transformers/clip-ViT-B-32-multilingual-v1", "version": "1.0.1", "description": "This is a multi-lingual version of the OpenAI CLIP-ViT-B32 model. You can map text and images to a common dense vector space such that images and the matching texts are close. This model can be used for image search and for multi-lingual zero-shot image classification .", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "distilbert", "embedding_dimension": 512, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_clip-ViT-B-32-multilingual-v1/\", \"activation\": \"gelu\", \"architectures\": [\"DistilBertModel\"], \"attention_dropout\": 0.1, \"dim\": 768, \"dropout\": 0.1, \"hidden_dim\": 3072, \"initializer_range\": 0.02, \"max_position_embeddings\": 512, \"model_type\": \"distilbert\", \"n_heads\": 12, \"n_layers\": 6, \"output_past\": true, \"pad_token_id\": 0, \"qa_dropout\": 0.1, \"seq_classif_dropout\": 0.2, \"sinusoidal_pos_embds\": false, \"tie_weights_\": true, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 119547}"}} diff --git a/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json new file mode 100644 index 00000000..bfa5fa37 --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json @@ -0,0 +1 @@ +{"name": "sentence-transformers/multi-qa-mpnet-base-cos-v1", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-cos-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 30527}"}} diff --git a/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json new file mode 100644 index 00000000..cbbb2145 --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json @@ -0,0 +1 @@ +{"name": "sentence-transformers/multi-qa-mpnet-base-cos-v1", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-cos-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 30527}"}} diff --git a/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json new file mode 100644 index 00000000..118dd2a2 --- /dev/null +++ b/tests/ml_model_listing/samples/config_folder/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json @@ -0,0 +1 @@ +{"name": "sentence-transformers/multi-qa-mpnet-base-cos-v1", "version": "2.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (New Version)", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-cos-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 30527}"}} diff --git a/tests/ml_model_listing/samples/config_paths.txt b/tests/ml_model_listing/samples/config_paths.txt new file mode 100644 index 00000000..62f308bb --- /dev/null +++ b/tests/ml_model_listing/samples/config_paths.txt @@ -0,0 +1 @@ +ml-models/huggingface/intfloat/e5-small-v2/1.0.1/onnx/config.json ml-models/huggingface/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json \ No newline at end of file diff --git a/tests/ml_model_listing/samples/pretrained_model_listing.json b/tests/ml_model_listing/samples/pretrained_model_listing.json new file mode 100644 index 00000000..bf5992c5 --- /dev/null +++ b/tests/ml_model_listing/samples/pretrained_model_listing.json @@ -0,0 +1,53 @@ +[ + { + "name": "huggingface/intfloat/e5-small-v2", + "versions": { + "1.0.1": { + "format": [ + "onnx" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space." + } + } + }, + { + "name": "huggingface/jhgan/ko-sroberta-multitask", + "versions": { + "1.0.1": { + "format": [ + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search." + } + } + }, + { + "name": "huggingface/sentence-transformers/clip-ViT-B-32-multilingual-v1", + "versions": { + "1.0.1": { + "format": [ + "torch_script" + ], + "description": "This is a multi-lingual version of the OpenAI CLIP-ViT-B32 model. You can map text and images to a common dense vector space such that images and the matching texts are close. This model can be used for image search and for multi-lingual zero-shot image classification ." + } + } + }, + { + "name": "huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1", + "versions": { + "1.0.1": { + "format": [ + "onnx", + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources." + }, + "2.0.0": { + "format": [ + "torch_script" + ], + "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (New Version)" + } + } + } +] \ No newline at end of file diff --git a/tests/ml_model_listing/test_update_pretrained_model_listing.py b/tests/ml_model_listing/test_update_pretrained_model_listing.py new file mode 100644 index 00000000..1a20000d --- /dev/null +++ b/tests/ml_model_listing/test_update_pretrained_model_listing.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: Apache-2.0 +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# Any modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +# We need to append UTILS_MODEL_UPLOADER_DIR path so that we can import +# functions from update_pretrained_model_listing.py +# since this python script is not in the root directory. + +import json +import os +import shutil +import sys + +import pytest + +THIS_DIR = os.path.dirname(__file__) +UTILS_MODEL_UPLOADER_DIR = os.path.join(THIS_DIR, "../../utils/model_uploader") +sys.path.append(UTILS_MODEL_UPLOADER_DIR) + +SAMPLE_FOLDER = os.path.join(THIS_DIR, "samples") +CONFIG_PATHS_TXT_FILENAME = "config_paths.txt" +CONFIG_FOLDERNAME = "config_folder" +SAMPLE_PRETRAINED_MODEL_LISTING = os.path.join( + SAMPLE_FOLDER, "pretrained_model_listing.json" +) +SAMPLE_FOLDER_COPY = os.path.join(THIS_DIR, "samples_copy") +SAMPLE_MISSING_CONFIG_SUBFOLDERNAME = "sentence-transformers" +TEST_FILE = os.path.join(THIS_DIR, "test_pretrained_model_listing.json") + +from update_pretrained_model_listing import create_new_pretrained_model_listing + + +def clean_test_file(): + if os.path.isfile(TEST_FILE): + os.remove(TEST_FILE) + + +def copy_samples_folder(): + shutil.copytree(SAMPLE_FOLDER, SAMPLE_FOLDER_COPY) + + +def clean_samples_folder_copy(): + if os.path.exists(SAMPLE_FOLDER_COPY): + for files in os.listdir(SAMPLE_FOLDER_COPY): + sub_path = os.path.join(SAMPLE_FOLDER_COPY, files) + if os.path.isfile(sub_path): + os.remove(sub_path) + else: + try: + shutil.rmtree(sub_path) + except OSError as err: + print( + "Fail to delete files, please delete all files in " + + str(SAMPLE_FOLDER_COPY) + + " " + + str(err) + ) + + shutil.rmtree(SAMPLE_FOLDER_COPY) + + +clean_samples_folder_copy() +clean_test_file() + + +def test_create_new_pretrained_model_listing(): + clean_test_file() + try: + create_new_pretrained_model_listing( + os.path.join(SAMPLE_FOLDER, CONFIG_PATHS_TXT_FILENAME), + os.path.join(SAMPLE_FOLDER, CONFIG_FOLDERNAME), + pretrained_model_listing_json_filepath=TEST_FILE, + ) + except Exception as e: + assert False, print(f"Failed while creating new pretrained model listing: {e}") + + try: + with open(SAMPLE_PRETRAINED_MODEL_LISTING, "r") as f: + sample_pretrained_model_listing = json.load(f) + except Exception as e: + assert False, print( + f"Cannot open {SAMPLE_PRETRAINED_MODEL_LISTING} to use it for verification: {e}" + ) + + try: + with open(TEST_FILE, "r") as f: + test_pretrained_model_listing = json.load(f) + except Exception as e: + assert False, print(f"Cannot open {TEST_FILE} to verify its content: {e}") + + assert test_pretrained_model_listing == sample_pretrained_model_listing, print( + "Incorrect pretrained model listing" + ) + + clean_test_file() + + +def test_missing_config_file(): + clean_test_file() + clean_samples_folder_copy() + + copy_samples_folder() + shutil.rmtree( + os.path.join( + SAMPLE_FOLDER_COPY, CONFIG_FOLDERNAME, SAMPLE_MISSING_CONFIG_SUBFOLDERNAME + ) + ) + + with pytest.raises(Exception) as exc_info: + create_new_pretrained_model_listing( + os.path.join(SAMPLE_FOLDER_COPY, CONFIG_PATHS_TXT_FILENAME), + os.path.join(SAMPLE_FOLDER_COPY, CONFIG_FOLDERNAME), + pretrained_model_listing_json_filepath=TEST_FILE, + ) + assert exc_info.type is Exception + assert "Cannot open" in str(exc_info.value) + + clean_test_file() + clean_samples_folder_copy() + + +clean_samples_folder_copy() +clean_test_file() diff --git a/utils/model_uploader/update_pretrained_model_listing.py b/utils/model_uploader/update_pretrained_model_listing.py index f1b8be4c..a4bffac8 100644 --- a/utils/model_uploader/update_pretrained_model_listing.py +++ b/utils/model_uploader/update_pretrained_model_listing.py @@ -27,26 +27,25 @@ def get_sentence_transformer_model_description( - config_folder_name: str, config_filepath: str + config_folderpath: str, config_filepath: str ) -> Optional[str]: """ Get description of the pretrained sentence transformer model from config file - :param config_folder_name: Name of the local folder that stores config files (e.g. 'config_folder') - :type config_folder_name: string + :param config_folderpath: Path to the folder that stores copies of config files from S3 (e.g. 'config_folder') + :type config_folderpath: string :param config_filepath: Path to local config file (e.g. 'sentence-transformers/all-MiniLM-L12-v2/2.0.0/onnx/config.json') :type config_filepath: string :return: Description of the model :rtype: string or None """ - filepath = os.path.join(config_folder_name, config_filepath) + filepath = os.path.join(config_folderpath, config_filepath) try: with open(filepath, "r") as f: model_config = json.load(f) except Exception as e: - print(f"Cannot open {filepath} to get model description: {e}") - return None + raise Exception(f"Cannot open {filepath} to get model description: {e}") if "description" in model_config: return model_config["description"] else: @@ -54,26 +53,29 @@ def get_sentence_transformer_model_description( def create_new_pretrained_model_listing( - config_paths_txt_filename: str, config_foldername: str + config_paths_txt_filepath: str, + config_folderpath: str, + pretrained_model_listing_json_filepath: str = PRETRAINED_MODEL_LISTING_JSON_FILEPATH, ): """ - Create a new pretrained model listing and store it at PRETRAINED_MODEL_LISTING_JSON_FILEPATH - based on current models in config_paths_txt_filename and their config files in config_foldername + Create a new pretrained model listing and store it at pretrained_model_listing_json_filepath + based on current models in config_paths_txt_filepath and their config files in config_folderpath - :param config_paths_txt_filename: Name of the txt file that stores paths to config file + :param config_paths_txt_filepath: Path to the txt file that stores a list of config paths from S3 in the ml-models/huggingface/ folder of the S3 bucket - :type config_paths_txt_filename: string - :param config_foldername: Name of the local folder that stores config files - :type config_foldername: string + :type config_paths_txt_filepath: string + :param config_folderpath: Path to the folder that stores copies of config files from S3 + :type config_folderpath: string :return: No return value expected + :param pretrained_model_listing_json_filepath: Path to the json file that stores new model listing :rtype: None """ print("\n=== Begin running update_pretrained_model_listing.py ===") - print(f"--- Reading {config_paths_txt_filename} ---") - with open(config_paths_txt_filename, "r") as f: + print(f"--- Reading {config_paths_txt_filepath} ---") + with open(config_paths_txt_filepath, "r") as f: config_paths_lst = f.read().split() - print("--- Creating New Model Listing --- ") + print("\n--- Creating New Model Listing --- ") new_model_listing_dict = {} for config_filepath in config_paths_lst: if config_filepath.startswith(PREFIX_HUGGINGFACE_MODEL_FILEPATH): @@ -96,7 +98,7 @@ def create_new_pretrained_model_listing( versions_content[model_version]["format"].append(model_format) if "description" not in versions_content[model_version]: description = get_sentence_transformer_model_description( - config_foldername, local_config_filepath + config_folderpath, local_config_filepath ) if description is not None: versions_content[model_version]["description"] = description @@ -107,11 +109,11 @@ def create_new_pretrained_model_listing( model_dict["versions"] = dict(sorted(model_dict["versions"].items())) print( - f"--- Dumping New Model Listing in {PRETRAINED_MODEL_LISTING_JSON_FILEPATH} --- " + f"\n--- Dumping New Model Listing in {pretrained_model_listing_json_filepath} --- " ) if not os.path.isdir(JSON_DIRNAME): os.makedirs(JSON_DIRNAME) - with open(PRETRAINED_MODEL_LISTING_JSON_FILEPATH, "w") as f: + with open(pretrained_model_listing_json_filepath, "w") as f: json.dump(new_model_listing_lst, f, indent=2) print("\n=== Finished running update_pretrained_model_listing.py ===") @@ -119,22 +121,24 @@ def create_new_pretrained_model_listing( if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "config_paths_txt_filename", + "config_paths_txt_filepath", type=str, - help="Name of the file that stores config paths in S3", + help="Path to the txt file that stores a list of config paths from S3", ) parser.add_argument( - "config_foldername", + "config_folderpath", type=str, - help="Name of the local folder that stores copies of config files from S3", + help="Path to the folder that stores copies of config files from S3", ) args = parser.parse_args() - if not args.config_paths_txt_filename.endswith(".txt"): - assert False, "Invalid arguments" + if not args.config_paths_txt_filepath.endswith(".txt"): + raise Exception( + "Invalid argument: config_paths_txt_filepath should be .txt file" + ) create_new_pretrained_model_listing( - args.config_paths_txt_filename, - args.config_foldername, + args.config_paths_txt_filepath, + args.config_folderpath, )