Skip to content

Commit 8b002b8

Browse files
authored
Fix the case that bos_token is null (microsoft#781)
1 parent b4ebfc9 commit 8b002b8

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

operators/tokenizer/bpe_json.hpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,16 @@ class TokenJsonConfig final {
4040
tokenizer_class_ = json_config.value("tokenizer_class", "");
4141

4242
auto tok_iter = json_config.find("bos_token");
43-
if (tok_iter != json_config.end() && tok_iter->is_object()) {
44-
bos_token_ = tok_iter->value("content", "");
45-
eos_token_ = json_config.value("/eos_token/content"_json_pointer, "");
46-
unk_token_ = json_config.value("/unk_token/content"_json_pointer, "");
47-
} else {
48-
bos_token_ = json_config.value("bos_token", "");
49-
eos_token_ = json_config.value("eos_token", "");
50-
unk_token_ = json_config.value("unk_token", "");
43+
if (tok_iter != json_config.end() && !tok_iter->is_null()) {
44+
if (tok_iter->is_object()) {
45+
bos_token_ = tok_iter->value("content", "");
46+
eos_token_ = json_config.value("/eos_token/content"_json_pointer, "");
47+
unk_token_ = json_config.value("/unk_token/content"_json_pointer, "");
48+
} else {
49+
bos_token_ = json_config.value("bos_token", "");
50+
eos_token_ = json_config.value("eos_token", "");
51+
unk_token_ = json_config.value("unk_token", "");
52+
}
5153
}
5254

5355
auto pad_iter = json_config.find("pad_token");
@@ -62,9 +64,7 @@ class TokenJsonConfig final {
6264
return {};
6365
}
6466

65-
const std::string& GetVocabDataFile() const {
66-
return vocab_path_;
67-
}
67+
const std::string& GetVocabDataFile() const { return vocab_path_; }
6868

6969
public:
7070
bool add_bos_token_{};

0 commit comments

Comments
 (0)