Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add description field with make_model_config_json function #203

Merged
Merged
6 changes: 1 addition & 5 deletions opensearch_py_ml/ml_models/sentencetransformermodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ def _get_model_description_from_md_file(self, readme_file_path) -> str:
model_name = self.model_id.split("/")[1]
start_str = f"# {model_name}"
start = readme_data.find(start_str)
end = readme_data.find("## ", start)
end = readme_data.find("# ", start + len(start_str))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach has a potential issue: if there are other occurrences of # within the description section that are not intended to be section headers, it might lead to incorrect results. For example, if the description contains a sentence like "This model has an accuracy of 90% # which is impressive", the code might incorrectly identify that # as the start of a new section.

# Find the index of the next line that starts with a '#'
end = readme_data.find('\n# ', start + len(start_str))

What do you think about this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Talked to Dhrubo offline and decided to use end = readme_data.find("\n#", start + len(start_str)) with no spacebar after #. Note that clients can always overwrite description and we can do similar thing with the GitHub Actions workflow.


# If we cannot find the scope of description section, raise error.
if start == -1 or end == -1:
Expand All @@ -1044,10 +1044,6 @@ def _get_model_description_from_md_file(self, readme_file_path) -> str:
# Parse out the description section
description = readme_data[start + len(start_str) + 1 : end].strip()
description = description.split("\n")[0]
if not description.startswith("This is"):
assert (
False
), "Cannot find description that starts with 'This is' in README.md file"

# Remove hyperlink and reformat text
description = re.sub(r"\(.*?\)", "", description)
Expand Down
19 changes: 0 additions & 19 deletions tests/ml_models/test_sentencetransformermodel_pytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,25 +444,6 @@ def test_missing_expected_description_in_readme_file():
"description" not in model_config_data_torch
), "Should not have description in model config file"

with open(temp_path, "w") as f:
f.write(
"# sentence-transformers/msmarco-distilbert-base-tas-b\n\nNothing\n\n## Usage (Sentence-Transformers)"
)
model_config_path_torch = test_model10.make_model_config_json(
model_format="TORCH_SCRIPT"
)
try:
with open(model_config_path_torch) as json_file:
model_config_data_torch = json.load(json_file)
except Exception as exec:
assert (
False
), f"Creating model config file for tracing in torch_script raised an exception {exec}"

assert (
"description" not in model_config_data_torch
), "Should not have description in model config file"

clean_test_folder(TEST_FOLDER)


Expand Down
Loading