Pre-commit fix

iirzynska · iirzynska · commit 925eed99b4ff · 2025-10-08T12:34:42.000+03:00
Signed-off-by: Izabela Irzynska &lt;iirzynska@habana.ai&gt;
diff --git a/tests/unit_tests/prefix_caching/test_prefix_caching.py b/tests/unit_tests/prefix_caching/test_prefix_caching.py
@@ -1,5 +1,4 @@
 import pytest
-import torch
 
 import vllm_gaudi.extension.environment as environment
 
@@ -44,17 +43,19 @@ def get_vllm_config():
     )
     return vllm_config
 
+
 @pytest.fixture
 def model_runner():
     vllm_config = get_vllm_config()
     model_config = vllm_config.model_config
     num_heads = model_config.get_num_kv_heads(vllm_config.parallel_config)
     head_size = model_config.get_head_size()
-    environment.set_vllm_config(vllm_config)    
+    environment.set_vllm_config(vllm_config)
     vllm_config.compilation_config.static_forward_context = {"layer.0": Attention(num_heads, head_size, 0.1)}
     runner = HPUModelRunner(vllm_config, DEVICE)
     return runner
 
+
 def make_new_request(req_id, prompt_token_ids, num_computed_tokens=0):
     return NewRequestData(
         req_id=req_id,
@@ -67,11 +68,13 @@ def make_new_request(req_id, prompt_token_ids, num_computed_tokens=0):
         lora_request=None,
     )
 
-@pytest.mark.parametrize("prompt1, prompt2, num_common_prefix, expected_tokens", [
-    ([1, 2, 3, 4], [1, 2, 3, 4], 4, 0), # full prefix cache hit
-    ([1, 2, 3], [1, 2, 3, 6, 7], 3, 2) # partial prefix cache hit (3 cached, 2 new)
-])
 
+@pytest.mark.parametrize(
+    "prompt1, prompt2, num_common_prefix, expected_tokens",
+    [
+        ([1, 2, 3, 4], [1, 2, 3, 4], 4, 0),  # full prefix cache hit
+        ([1, 2, 3], [1, 2, 3, 6, 7], 3, 2)  # partial prefix cache hit (3 cached, 2 new)
+    ])
 def test_prefix_cache_hits(model_runner, prompt1, prompt2, num_common_prefix, expected_tokens, dist_init):
     req_id1 = "req1"
     req_id2 = "req2"
@@ -97,8 +100,7 @@ def test_prefix_cache_hits(model_runner, prompt1, prompt2, num_common_prefix, ex
     assert cached_state.prompt_token_ids == prompt1
     assert cached_state.num_computed_tokens == 0
     assert req_id1 in model_runner.requests
-    assert sched_out1.num_scheduled_tokens[req_id1] == len(prompt1) 
-    
+    assert sched_out1.num_scheduled_tokens[req_id1] == len(prompt1)
 
     # Second request: full prefix cache hit or partial prefix cache hit
     new_req2 = make_new_request(req_id2, prompt2, num_computed_tokens=num_common_prefix)
@@ -119,13 +121,16 @@ def test_prefix_cache_hits(model_runner, prompt1, prompt2, num_common_prefix, ex
     cached_state = model_runner.requests[req_id2]
 
     assert cached_state.prompt_token_ids == prompt2
-    assert cached_state.num_computed_tokens == num_common_prefix 
+    assert cached_state.num_computed_tokens == num_common_prefix
     assert req_id2 in model_runner.requests
     assert sched_out2.num_scheduled_tokens[req_id2] == expected_tokens
 
-@pytest.mark.parametrize("prompt, cache_first, cache_second", [
-    ([10, 11, 12], 3, 0), # first: all tokens cached, second: cache reset, all tokens need compute
-])
+
+@pytest.mark.parametrize(
+    "prompt, cache_first, cache_second",
+    [
+        ([10, 11, 12], 3, 0),  # first: all tokens cached, second: cache reset, all tokens need compute
+    ])
 def test_prefix_cache_reset(model_runner, prompt, cache_first, cache_second, dist_init):
     req_id = "req_reset"
     new_req_1 = make_new_request(req_id, prompt, num_computed_tokens=cache_first)