From 3a7e077600404061d337880110579589266e790e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Mart=C3=ADn=20Bl=C3=A1zquez?= Date: Wed, 4 Sep 2024 23:03:50 +0200 Subject: [PATCH] Bump to `tokenizer==0.20.0` --- Cargo.toml | 2 +- candle-holder-tokenizers/src/tokenizers/bert.rs | 10 +++++----- candle-holder-tokenizers/src/tokenizers/roberta.rs | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c633261..911beb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ hf-hub = "0.3.2" lazy_static = "1.4.0" serde = { version = "1.0.192", features = ["derive"] } serde_json = "1.0.108" -tokenizers = { version = "0.19.1", features = ["http"] } +tokenizers = { version = "0.20.0", features = ["http"] } thiserror = "1.0.61" accelerate-src = { version = "0.3.2" } intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] } diff --git a/candle-holder-tokenizers/src/tokenizers/bert.rs b/candle-holder-tokenizers/src/tokenizers/bert.rs index 9e44478..413aea4 100644 --- a/candle-holder-tokenizers/src/tokenizers/bert.rs +++ b/candle-holder-tokenizers/src/tokenizers/bert.rs @@ -138,13 +138,13 @@ impl TokenizerBuilder for BertTokenizerBuilder { .ok_or_else(|| Error::TokenizerMissingConfig)?; tokenizer - .with_normalizer(self.build_normalizer(tokenizer_config)) - .with_pre_tokenizer(self.build_pre_tokenizer()) - .with_post_processor(self.build_post_processor( + .with_normalizer(Some(self.build_normalizer(tokenizer_config))) + .with_pre_tokenizer(Some(self.build_pre_tokenizer())) + .with_post_processor(Some(self.build_post_processor( (sep_token.clone(), sep_token_id), (cls_token.clone(), cls_token_id), - )?) - .with_decoder(self.build_decoder()); + )?)) + .with_decoder(Some(self.build_decoder())); Ok(CoreTokenizer::from(tokenizer)) } diff --git a/candle-holder-tokenizers/src/tokenizers/roberta.rs b/candle-holder-tokenizers/src/tokenizers/roberta.rs index ec80a33..406371c 100644 --- a/candle-holder-tokenizers/src/tokenizers/roberta.rs +++ b/candle-holder-tokenizers/src/tokenizers/roberta.rs @@ -110,11 +110,11 @@ impl TokenizerBuilder for RobertaTokenizerBuilder { > = TokenizerImpl::new(self.build_model(vocab, merges)?); tokenizer - .with_pre_tokenizer(self.build_pre_tokenizer()) - .with_post_processor( + .with_pre_tokenizer(Some(self.build_pre_tokenizer())) + .with_post_processor(Some( self.build_post_processor((sep_token, sep_token_id), (cls_token, cls_token_id)), - ) - .with_decoder(self.build_decoder()); + )) + .with_decoder(Some(self.build_decoder())); Ok(CoreTokenizer::from(tokenizer)) }