vllm-project · horheynm · Mar 26, 2025
diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py
@@ -108,6 +108,7 @@ def create_mock_file(self, extension, content, path, filename):
 
     def tearDown(self):
         shutil.rmtree(self.output)
+        self.monkeypatch.undo()
 
 
 @pytest.mark.integration
@@ -120,8 +121,11 @@ class TestOneshotCustomDatasetSmall(TestFinetuneNoRecipeCustomDataset):
     def setUp(self):
         import torch
 
+        self.monkeypatch = pytest.MonkeyPatch()
+
         if torch.cuda.is_available():
             self.device = "cuda:0"
+            self.monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
         else:
             self.device = "cpu"
 
@@ -143,8 +147,10 @@ def setUp(self):
         import torch
         from transformers import AutoModelForCausalLM
 
+        self.monkeypatch = pytest.MonkeyPatch()
         self.device = "cuda:0"
         self.output = "./oneshot_output"
+        self.monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
 
         self.model = AutoModelForCausalLM.from_pretrained(
             self.model, device_map=self.device, torch_dtype=torch.bfloat16

diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py b/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py
@@ -18,12 +18,14 @@ class TestFinetuneWithoutRecipe(unittest.TestCase):
 
     def setUp(self):
         self.output = "./finetune_output"
+        self.monkeypatch = pytest.MonkeyPatch()
 
     def test_finetune_without_recipe(self):
         from llmcompressor import train
 
         recipe_str = None
         device = "cuda:0"
+        self.monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
 
         concatenate_data = False
         max_steps = 50
@@ -42,3 +44,4 @@ def test_finetune_without_recipe(self):
 
     def tearDown(self):
         shutil.rmtree(self.output)
+        self.monkeypatch.undo()
diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py
@@ -55,6 +55,7 @@ def tearDown(self):
         # TODO: we get really nice stats from finetune that we should log
         # stored in results.json
         shutil.rmtree(self.output)
+        self.monkeypatch.undo()
 
 
 @pytest.mark.integration
@@ -66,11 +67,14 @@ class TestOneshotAndFinetuneSmall(TestOneshotAndFinetune):
     dataset_config_name = None
     num_train_epochs = None
     concat_txt = None
+    monkeypatch = pytest.MonkeyPatch()
 
     def setUp(self):
         import torch
 
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        if self.device == "cuda:0":
+            self.monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "0")
         self.output = "./finetune_output"
 
     def test_oneshot_then_finetune_small(self):