guidance-ai · mmoskal · Nov 22, 2024 · Nov 22, 2024 · Nov 24, 2024
diff --git a/guidance/_parser.py b/guidance/_parser.py
@@ -91,8 +91,8 @@ def _process_prompt(self, prompt: bytes, ensure_bos_token: bool) -> list[int]:
 
         return self.tokenizer.recode(prompt_tokens)
 
-    def mid_process(self) -> tuple[Optional[bytes], LLInterpreterResponse]:
-        mask, ll_response_string = self.ll_interpreter.mid_process()
+    def compute_mask(self) -> tuple[Optional[bytes], LLInterpreterResponse]:
+        mask, ll_response_string = self.ll_interpreter.compute_mask()
         ll_response = LLInterpreterResponse.model_validate_json(ll_response_string)
         return mask, ll_response
 
@@ -111,14 +111,14 @@ def _parse(
         tokens = self._process_prompt(prompt=prompt, ensure_bos_token=ensure_bos_token)
 
         while True:
-            # Note: need to call/set has_pending_stop before spinning up the mid_process future
-            # as the two methods cannot be called concurrently
+            # Note: need to call/set has_pending_stop before spinning up the compute mask 
+            # future as the two methods cannot be called concurrently
             self._has_pending_stop = self.ll_interpreter.has_pending_stop()
-            mid_process_future = self._threadpool.submit(self.mid_process)
-            token = yield (tokens, mid_process_future)
+            mask_future = self._threadpool.submit(self.compute_mask)
+            token = yield (tokens, mask_future)
 
             # Upstairs should have already waited on this future
-            mask, ll_response = mid_process_future.result()
+            mask, ll_response = mask_future.result()
 
             if ll_response.stop:
                 # This is the only case in which the mask is None
@@ -141,7 +141,7 @@ def _parse(
                     prompt_tokens=tokens
                 )
 
-            backtrack, ff_tokens = self.ll_interpreter.post_process(token)
+            backtrack, ff_tokens = self.ll_interpreter.commit_token(token)
             if backtrack:
                 tokens = tokens[:-backtrack]
             tokens = tokens + ff_tokens
@@ -212,8 +212,8 @@ def next_byte_mask(self) -> NDArray[np.uint8]:
         return mask
 
     def _advance(self, token: Optional[int]) -> None:
-        tokens, mid_process_fut = self.token_parser.advance(token)
-        mask, ll_response = mid_process_fut.result()
+        tokens, mask_fut = self.token_parser.advance(token)
+        mask, ll_response = mask_fut.result()
         if ll_response.stop:
             assert mask is None
             self.token_parser.cleanup()

diff --git a/guidance/models/_model.py b/guidance/models/_model.py
@@ -144,7 +144,7 @@ def __call__(
         has_get_logits = True
         token = None
         while True:
-            tokens, mid_process_fut = parser.advance(token)
+            tokens, mask_fut = parser.advance(token)
 
             # Note that has_pending_stop implies that the response is a stop response,
             # but the converse is not true. We can therefore avoid some (but not all)
@@ -163,7 +163,7 @@ def __call__(
 
             # Important: don't wait on this future until after getting the logits;
             # this allows the mask to be built concurrently with model inference
-            mask, ll_response = mid_process_fut.result()
+            mask, ll_response = mask_fut.result()
 
             engine_response = ll_response.progress.to_engine_call_response()
             yield engine_response

diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
     "referencing",
     "requests",
     "tiktoken>=0.3",
-    "llguidance>=0.3.0,<0.4.0",
+    "llguidance>=0.4.1,<0.5.0",
 ]
 
 # Our basic list of 'extras'

diff --git a/tests/unit/test_ll.py b/tests/unit/test_ll.py
@@ -115,7 +115,7 @@ def check_grammar(grm: GrammarFunction, output: List[str]):
     idx = 1
     gen_tokens = tokenize_trace(output[idx])
     for _ in range(200):
-        mask, cmd = interp.mid_process()
+        mask, cmd = interp.compute_mask()
         cmd = json.loads(cmd)
         if log_level >= 1:
             print(mask is not None, cmd)
@@ -129,7 +129,7 @@ def check_grammar(grm: GrammarFunction, output: List[str]):
             tok = gen_tokens[0]
             del gen_tokens[0:1]
             assert mask[tok] > 0, f"Token {tok} not allowed"
-            bt, toks = interp.post_process(tok)
+            bt, toks = interp.commit_token(tok)
             if not toks or toks[0] != tok:
                 if output[idx + 1].startswith("1↶"):
                     # fast-forward with fake backtrack
@@ -149,7 +149,7 @@ def check_grammar(grm: GrammarFunction, output: List[str]):
                 assert len(toks) == 1
                 continue  # normal path
         else:
-            bt, toks = interp.post_process(None)
+            bt, toks = interp.commit_token(None)
 
         # forced byte checking
         assert not gen_tokens, "Expected more tokens to generate"