From 26165f69f680f282b45a91a9540bf92dd15be762 Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 13:55:26 -0700 Subject: [PATCH 1/6] Improve make_model_config Signed-off-by: Thanawan Atchariyachanvanit --- .../ml_models/sentencetransformermodel.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/opensearch_py_ml/ml_models/sentencetransformermodel.py b/opensearch_py_ml/ml_models/sentencetransformermodel.py index fb166f28..285a1414 100644 --- a/opensearch_py_ml/ml_models/sentencetransformermodel.py +++ b/opensearch_py_ml/ml_models/sentencetransformermodel.py @@ -24,7 +24,7 @@ import yaml from accelerate import Accelerator, notebook_launcher from sentence_transformers import SentenceTransformer -from sentence_transformers.models import Normalize, Pooling, Transformer +from sentence_transformers.models import Normalize, Pooling, Transformer # , Dense from torch.utils.data import DataLoader from tqdm import tqdm from transformers import TrainingArguments, get_linear_schedule_with_warmup @@ -1068,28 +1068,25 @@ def make_model_config_json( or normalize_result is None ): try: - if ( - model_type is None - and len(model._modules) >= 1 - and isinstance(model._modules["0"], Transformer) - ): - model_type = model._modules["0"].auto_model.__class__.__name__ - model_type = model_type.lower().rstrip("model") if embedding_dimension is None: embedding_dimension = model.get_sentence_embedding_dimension() - if ( - pooling_mode is None - and len(model._modules) >= 2 - and isinstance(model._modules["1"], Pooling) - ): - pooling_mode = model._modules["1"].get_pooling_mode_str().upper() - if normalize_result is None: - if len(model._modules) >= 3 and isinstance( - model._modules["2"], Normalize - ): + + for str_idx, module in model._modules.items(): + if model_type is None and isinstance(module, Transformer): + model_type = module.auto_model.__class__.__name__ + model_type = model_type.lower().rstrip("model") + elif pooling_mode is None and isinstance(module, Pooling): + pooling_mode = module.get_pooling_mode_str().upper() + elif normalize_result is None and isinstance(module, Normalize): normalize_result = True - else: - normalize_result = False + # Currently, we don't support Dense + # elif ( + # ... is not None + # isinstance(module, Dense) + # ): + # ... + if normalize_result is None: + normalize_result = False except Exception as e: raise Exception( f"Raised exception while getting model data from pre-trained hugging-face model object: {e}" From f372ecb69324f5eb965be43002c42b97a9b468dc Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 14:55:06 -0700 Subject: [PATCH 2/6] Update sentencetransformermodel.py Signed-off-by: Thanawan Atchariyachanvanit --- opensearch_py_ml/ml_models/sentencetransformermodel.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/opensearch_py_ml/ml_models/sentencetransformermodel.py b/opensearch_py_ml/ml_models/sentencetransformermodel.py index 285a1414..8fbcb1a0 100644 --- a/opensearch_py_ml/ml_models/sentencetransformermodel.py +++ b/opensearch_py_ml/ml_models/sentencetransformermodel.py @@ -24,7 +24,7 @@ import yaml from accelerate import Accelerator, notebook_launcher from sentence_transformers import SentenceTransformer -from sentence_transformers.models import Normalize, Pooling, Transformer # , Dense +from sentence_transformers.models import Normalize, Pooling, Transformer from torch.utils.data import DataLoader from tqdm import tqdm from transformers import TrainingArguments, get_linear_schedule_with_warmup @@ -1079,12 +1079,7 @@ def make_model_config_json( pooling_mode = module.get_pooling_mode_str().upper() elif normalize_result is None and isinstance(module, Normalize): normalize_result = True - # Currently, we don't support Dense - # elif ( - # ... is not None - # isinstance(module, Dense) - # ): - # ... + # TODO: Support 'Dense' module if normalize_result is None: normalize_result = False except Exception as e: From 4c5bea3fc5e0ca862c7754b4d3424a9c382ef97c Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 14:58:36 -0700 Subject: [PATCH 3/6] Update CHANGELOG.md Signed-off-by: Thanawan Atchariyachanvanit --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a13eca9..5c1db446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix ModelUploader bug & Update model tracing demo notebook by @thanawan-atc in ([#185](https://github.com/opensearch-project/opensearch-py-ml/pull/185)) - Fix make_model_config_json function by @thanawan-atc in ([#188](https://github.com/opensearch-project/opensearch-py-ml/pull/188)) - Make make_model_config_json function more concise by @thanawan-atc in ([#191](https://github.com/opensearch-project/opensearch-py-ml/pull/191)) -- Enabled auto-truncation for any pretrained models ([#192]https://github.com/opensearch-project/opensearch-py-ml/pull/192) +- Enabled auto-truncation for any pretrained models ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) +- Generalize make_model_config_json function by @thanawan-atc in ([#200](https://github.com/opensearch-project/opensearch-py-ml/pull/200)) ## [1.0.0] From a9694b0c873d7d970aad0fcf3e19da3c9c1d4889 Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 14:59:44 -0700 Subject: [PATCH 4/6] Update CHANGELOG.md Signed-off-by: Thanawan Atchariyachanvanit --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c1db446..a9eb6339 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix ModelUploader bug & Update model tracing demo notebook by @thanawan-atc in ([#185](https://github.com/opensearch-project/opensearch-py-ml/pull/185)) - Fix make_model_config_json function by @thanawan-atc in ([#188](https://github.com/opensearch-project/opensearch-py-ml/pull/188)) - Make make_model_config_json function more concise by @thanawan-atc in ([#191](https://github.com/opensearch-project/opensearch-py-ml/pull/191)) -- Enabled auto-truncation for any pretrained models ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) +- Enabled auto-truncation for any pretrained models by @@Yerzhaisang ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) - Generalize make_model_config_json function by @thanawan-atc in ([#200](https://github.com/opensearch-project/opensearch-py-ml/pull/200)) ## [1.0.0] From 12217334bcf705ee584421eeb9b132ccaa928f8d Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 15:00:13 -0700 Subject: [PATCH 5/6] Update CHANGELOG.md Signed-off-by: Thanawan Atchariyachanvanit --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9eb6339..e8ef4686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix ModelUploader bug & Update model tracing demo notebook by @thanawan-atc in ([#185](https://github.com/opensearch-project/opensearch-py-ml/pull/185)) - Fix make_model_config_json function by @thanawan-atc in ([#188](https://github.com/opensearch-project/opensearch-py-ml/pull/188)) - Make make_model_config_json function more concise by @thanawan-atc in ([#191](https://github.com/opensearch-project/opensearch-py-ml/pull/191)) -- Enabled auto-truncation for any pretrained models by @@Yerzhaisang ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) +- Enabled auto-truncation for any pretrained models by @Yerzhaisang ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) - Generalize make_model_config_json function by @thanawan-atc in ([#200](https://github.com/opensearch-project/opensearch-py-ml/pull/200)) ## [1.0.0] From 9b9adfb6ade5a5aed6c27bc9f28219b39ed18493 Mon Sep 17 00:00:00 2001 From: Thanawan Atchariyachanvanit Date: Wed, 26 Jul 2023 15:00:43 -0700 Subject: [PATCH 6/6] Update CHANGELOG.md Signed-off-by: Thanawan Atchariyachanvanit --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8ef4686..fd7dd796 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Fix ModelUploader bug & Update model tracing demo notebook by @thanawan-atc in ([#185](https://github.com/opensearch-project/opensearch-py-ml/pull/185)) - Fix make_model_config_json function by @thanawan-atc in ([#188](https://github.com/opensearch-project/opensearch-py-ml/pull/188)) - Make make_model_config_json function more concise by @thanawan-atc in ([#191](https://github.com/opensearch-project/opensearch-py-ml/pull/191)) -- Enabled auto-truncation for any pretrained models by @Yerzhaisang ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) +- Enabled auto-truncation for any pretrained models by @Yerzhaisang in ([#192](https://github.com/opensearch-project/opensearch-py-ml/pull/192)) - Generalize make_model_config_json function by @thanawan-atc in ([#200](https://github.com/opensearch-project/opensearch-py-ml/pull/200)) ## [1.0.0]