Skip to content

Commit

Permalink
Bump to tokenizer==0.20.0
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielmbmb committed Sep 4, 2024
1 parent f9b6ed4 commit 3a7e077
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ hf-hub = "0.3.2"
lazy_static = "1.4.0"
serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108"
tokenizers = { version = "0.19.1", features = ["http"] }
tokenizers = { version = "0.20.0", features = ["http"] }
thiserror = "1.0.61"
accelerate-src = { version = "0.3.2" }
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
Expand Down
10 changes: 5 additions & 5 deletions candle-holder-tokenizers/src/tokenizers/bert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,13 @@ impl TokenizerBuilder<BertTokenizer> for BertTokenizerBuilder {
.ok_or_else(|| Error::TokenizerMissingConfig)?;

tokenizer
.with_normalizer(self.build_normalizer(tokenizer_config))
.with_pre_tokenizer(self.build_pre_tokenizer())
.with_post_processor(self.build_post_processor(
.with_normalizer(Some(self.build_normalizer(tokenizer_config)))
.with_pre_tokenizer(Some(self.build_pre_tokenizer()))
.with_post_processor(Some(self.build_post_processor(
(sep_token.clone(), sep_token_id),
(cls_token.clone(), cls_token_id),
)?)
.with_decoder(self.build_decoder());
)?))
.with_decoder(Some(self.build_decoder()));

Ok(CoreTokenizer::from(tokenizer))
}
Expand Down
8 changes: 4 additions & 4 deletions candle-holder-tokenizers/src/tokenizers/roberta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ impl TokenizerBuilder<RobertaTokenizer> for RobertaTokenizerBuilder {
> = TokenizerImpl::new(self.build_model(vocab, merges)?);

tokenizer
.with_pre_tokenizer(self.build_pre_tokenizer())
.with_post_processor(
.with_pre_tokenizer(Some(self.build_pre_tokenizer()))
.with_post_processor(Some(
self.build_post_processor((sep_token, sep_token_id), (cls_token, cls_token_id)),
)
.with_decoder(self.build_decoder());
))
.with_decoder(Some(self.build_decoder()));

Ok(CoreTokenizer::from(tokenizer))
}
Expand Down

0 comments on commit 3a7e077

Please sign in to comment.