diff --git a/src/bpe/algorithm.rs b/src/bpe/algorithm.rs index fa230e9..e17200f 100644 --- a/src/bpe/algorithm.rs +++ b/src/bpe/algorithm.rs @@ -195,7 +195,7 @@ impl MergeState<'_, '_> { } #[inline] - pub fn iter(&self) -> Iter { + pub fn iter(&self) -> Iter<'_> { Iter { bpe: self.bpe, marks: &self.marks, diff --git a/src/bpe/mod.rs b/src/bpe/mod.rs index 8f04cb4..ce922a8 100644 --- a/src/bpe/mod.rs +++ b/src/bpe/mod.rs @@ -246,7 +246,7 @@ impl Bpe { "gpt2" => { let pre_type = gguf.get_str("tokenizer.ggml.pre").unwrap(); let regex_str = match pre_type { - "qwen2" | "deepseek-r1-qwen" => TOKENIZER_PRE_QWEN, + "qwen2" | "deepseek-r1-qwen" | "olmo" => TOKENIZER_PRE_QWEN, _ => unimplemented!("not supported pre_type {}", pre_type), }; match scores { @@ -337,7 +337,7 @@ impl Method for Bpe { vocab.into_iter() } - fn decode(&self, token: utok, buf: &mut TextBuf) -> Cow<[u8]> { + fn decode(&self, token: utok, buf: &mut TextBuf) -> Cow<'_, [u8]> { match &self.modeltype { Model::GPT2(_) => { if self.special.contains(&token) { diff --git a/src/lib.rs b/src/lib.rs index 3b4e438..f55c358 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ pub trait Method { fn vocab_size(&self) -> usize; fn internal_special(&self) -> impl IntoIterator; fn encode(&self, text: &str) -> impl IntoIterator + '_; - fn decode(&self, token: utok, buf: &mut TextBuf) -> Cow<[u8]>; + fn decode(&self, token: utok, buf: &mut TextBuf) -> Cow<'_, [u8]>; fn pre_encode<'s>(&self, text: &'s str) -> Cow<'s, str> { text.into() } diff --git a/src/lpe/mod.rs b/src/lpe/mod.rs index 6f858fa..b3eacef 100644 --- a/src/lpe/mod.rs +++ b/src/lpe/mod.rs @@ -165,7 +165,7 @@ impl Method for Lpe { tokens } #[inline] - fn decode(&self, token: utok, _buf: &mut TextBuf) -> Cow<[u8]> { + fn decode(&self, token: utok, _buf: &mut TextBuf) -> Cow<'_, [u8]> { self.token(token).into() } }