Skip to content

Commit

Permalink
Migrate excluded_models to py file
Browse files Browse the repository at this point in the history
Signed-off-by: Thanawan Atchariyachanvanit <[email protected]>
  • Loading branch information
thanawan-atc committed Sep 6, 2023
1 parent 19cf4b1 commit 1d51cbf
Show file tree
Hide file tree
Showing 15 changed files with 46 additions and 59 deletions.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "sentence-transformers/multi-qa-mpnet-base-cos-v1", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-cos-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 30527}"}}
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "sentence-transformers/multi-qa-mpnet-base-cos-v1", "version": "1.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "MEAN", "normalize_result": true, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-cos-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.31.0\", \"vocab_size\": 30527}"}}
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "1.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources.", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "2.0.0", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.2.0.0)", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "3.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.3.0.1)", "model_format": "ONNX", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"name": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "version": "3.0.1", "description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.3.0.1)", "model_format": "TORCH_SCRIPT", "model_task_type": "TEXT_EMBEDDING", "model_config": {"model_type": "mpnet", "embedding_dimension": 768, "framework_type": "sentence_transformers", "pooling_mode": "CLS", "normalize_result": false, "all_config": "{\"_name_or_path\": \"/root/.cache/torch/sentence_transformers/sentence-transformers_multi-qa-mpnet-base-dot-v1/\", \"architectures\": [\"MPNetModel\"], \"attention_probs_dropout_prob\": 0.1, \"bos_token_id\": 0, \"eos_token_id\": 2, \"hidden_act\": \"gelu\", \"hidden_dropout_prob\": 0.1, \"hidden_size\": 768, \"initializer_range\": 0.02, \"intermediate_size\": 3072, \"layer_norm_eps\": 1e-05, \"max_position_embeddings\": 514, \"model_type\": \"mpnet\", \"num_attention_heads\": 12, \"num_hidden_layers\": 12, \"pad_token_id\": 1, \"relative_attention_num_buckets\": 32, \"torch_dtype\": \"float32\", \"transformers_version\": \"4.30.2\", \"vocab_size\": 30527}"}}

2 changes: 1 addition & 1 deletion tests/ml_model_listing/samples/config_paths.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ml-models/huggingface/intfloat/e5-small-v2/1.0.1/onnx/config.json ml-models/other_source/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/onnx/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/2.0.0/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/3.0.1/onnx/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1/3.0.1/torch_script/config.json
ml-models/huggingface/intfloat/e5-small-v2/1.0.1/onnx/config.json ml-models/other_source/jhgan/ko-sroberta-multitask/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/clip-ViT-B-32-multilingual-v1/1.0.1/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/1.0.0/onnx/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/1.0.0/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/2.0.0/torch_script/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/3.0.1/onnx/config.json ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/3.0.1/torch_script/config.json
2 changes: 0 additions & 2 deletions tests/ml_model_listing/samples/excluded_models.txt

This file was deleted.

6 changes: 3 additions & 3 deletions tests/ml_model_listing/samples/pretrained_model_listing.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@
}
},
{
"name": "huggingface/sentence-transformers/multi-qa-mpnet-base-cos-v1",
"name": "huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1",
"versions": {
"2.0.0": {
"format": [
"torch_script"
],
"description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (New Version)"
"description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.2.0.0)"
},
"3.0.1": {
"format": [
"onnx",
"torch_script"
],
"description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources."
"description": "This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and was designed for semantic search. It has been trained on 215M pairs from diverse sources. (v.3.0.1)"
}
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
SAMPLE_FOLDER = os.path.join(THIS_DIR, "samples")
CONFIG_PATHS_TXT_FILENAME = "config_paths.txt"
CONFIG_FOLDERNAME = "config_folder"
EXCLUDED_MODELS_TXT_FILENAME = "excluded_models.txt"
SAMPLE_PRETRAINED_MODEL_LISTING = os.path.join(
SAMPLE_FOLDER, "pretrained_model_listing.json"
)
Expand Down Expand Up @@ -76,8 +75,6 @@ def test_create_new_pretrained_model_listing():
os.path.join(SAMPLE_FOLDER, CONFIG_FOLDERNAME),
"--pretrained_model_listing_json_filepath",
TEST_FILE,
"--excluded_models_txt_filepath",
os.path.join(SAMPLE_FOLDER, EXCLUDED_MODELS_TXT_FILENAME),
]
)
except Exception as e:
Expand All @@ -97,6 +94,7 @@ def test_create_new_pretrained_model_listing():
except Exception as e:
assert False, print(f"Cannot open {TEST_FILE} to verify its content: {e}")

print(test_pretrained_model_listing)
assert test_pretrained_model_listing == sample_pretrained_model_listing, print(
"Incorrect pretrained model listing"
)
Expand All @@ -122,8 +120,6 @@ def test_missing_config_file():
os.path.join(SAMPLE_FOLDER_COPY, CONFIG_FOLDERNAME),
"--pretrained_model_listing_json_filepath",
TEST_FILE,
"--excluded_models_txt_filepath",
os.path.join(SAMPLE_FOLDER, EXCLUDED_MODELS_TXT_FILENAME),
]
)
assert exc_info.type is Exception
Expand Down
29 changes: 29 additions & 0 deletions utils/model_uploader/excluded_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# SPDX-License-Identifier: Apache-2.0
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Any modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

# This list contains paths to models that we want to exclude from model listing
# generated by update_pretrained_model_listing.py in "Model Listing Workflow I: Updating" workflow
EXCLUDED_MODELS = [
"ml-models/huggingface/sentence-transformers/all-MiniLM-L12-v2/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/all-MiniLM-L12-v2/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/all-MiniLM-L6-v2/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/all-MiniLM-L6-v2/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/all-distilroberta-v1/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/all-distilroberta-v1/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/all-mpnet-base-v2/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/all-mpnet-base-v2/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/msmarco-distilbert-base-tas-b/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/msmarco-distilbert-base-tas-b/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/multi-qa-MiniLM-L6-cos-v1/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/multi-qa-mpnet-base-dot-v1/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2/1.0.0/torch_script",
"ml-models/huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/1.0.0/onnx",
"ml-models/huggingface/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2/1.0.0/torch_script",
]
Loading

0 comments on commit 1d51cbf

Please sign in to comment.