Skip to content

Commit d380a3b

Browse files
committed
adding special tokens
1 parent edd4653 commit d380a3b

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

data/template/tokenizer_options.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -351,4 +351,4 @@ def detokenize(self, ids):
351351
Returns:
352352
str: Decoded string.
353353
"""
354-
return self.tokenizer.decode(ids, skip_special_tokens=True)
354+
return self.tokenizer.decode(ids)

train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,7 @@ def load_tokenizer(self):
781781
self.decode = lambda l: ''.join([self.itos[i] for i in l])
782782
elif 'tokenizer' in meta and meta['tokenizer'] == 'qwen2':
783783
tokenizer = AutoTokenizer.from_pretrained(meta["qwen2_model"], trust_remote_code=True)
784-
self.encode = lambda s: tokenizer.encode(s, add_special_tokens=False)
784+
self.encode = lambda s: tokenizer.encode(s, add_special_tokens=True)
785785
self.decode = lambda l: tokenizer.decode(l)
786786
print(f"Using Qwen2 tokenizer: {meta['qwen2_model']}")
787787
elif 'tokenizer' in meta and meta['tokenizer'] == 'custom_char_with_byte_fallback':

0 commit comments

Comments
 (0)