From 89856ef00d377d0b63ce91fb3c5d184dcbfa9124 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sat, 15 Apr 2023 17:32:53 -0400 Subject: [PATCH] Bugfix: only eval new tokens --- llama_cpp/llama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index b92801c92..edd2eef8e 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -280,6 +280,7 @@ def generate( if self.verbose: print("generate cache hit", file=sys.stderr) reset = False + tokens = tokens[len(self.tokens) :] ### if reset: self.reset()