Skip to content

Commit

Permalink
Enabled auto-truncation format also for ONNX
Browse files Browse the repository at this point in the history
Signed-off-by: yerzhaisang <[email protected]>
  • Loading branch information
Yerzhaisang committed Jul 18, 2023
1 parent 50b1413 commit 27375e2
Showing 1 changed file with 17 additions and 5 deletions.
22 changes: 17 additions & 5 deletions opensearch_py_ml/ml_models/sentencetransformermodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,17 +760,17 @@ def save_as_pt(

# save tokenizer.json in save_json_folder_name
model.save(save_json_folder_path)
with open(save_json_folder_path + "/tokenizer.json") as user_file:
file_contents = user_file.read()
parsed_json = json.loads(file_contents)
if not parsed_json["truncation"]:
tokenizer_file_path = os.path.join(save_json_folder_path, "tokenizer.json")
with open(tokenizer_file_path) as user_file:
parsed_json = json.load(user_file)
if "truncation" not in parsed_json or parsed_json["truncation"] is None:
parsed_json["truncation"] = {
"direction": "Right",
"max_length": model.tokenizer.model_max_length,
"strategy": "LongestFirst",
"stride": 0,
}
with open(save_json_folder_path + "/tokenizer.json", "w") as file:
with open(tokenizer_file_path, "w") as file:
json.dump(parsed_json, file, indent=2)

# convert to pt format will need to be in cpu,
Expand Down Expand Up @@ -863,6 +863,18 @@ def save_as_onnx(

# save tokenizer.json in output_path
model.save(save_json_folder_path)
tokenizer_file_path = os.path.join(save_json_folder_path, "tokenizer.json")
with open(tokenizer_file_path) as user_file:
parsed_json = json.load(user_file)
if "truncation" not in parsed_json or parsed_json["truncation"] is None:
parsed_json["truncation"] = {
"direction": "Right",
"max_length": model.tokenizer.model_max_length,
"strategy": "LongestFirst",
"stride": 0,
}
with open(tokenizer_file_path, "w") as file:
json.dump(parsed_json, file, indent=2)

convert(
framework="pt",
Expand Down

0 comments on commit 27375e2

Please sign in to comment.