Reuse cos.device in reset_parameters (Lightning-AI#993)

carmocca · web-flow · commit 4b86dabc2af0 · 2024-03-04T19:12:07.000+01:00
diff --git a/lit_gpt/model.py b/lit_gpt/model.py
@@ -59,7 +59,7 @@ def max_seq_length(self, value: int) -> None:
 
     def reset_parameters(self) -> None:
         # Trigger resetting the rope-cache
-        self.cos, self.sin = self.rope_cache()
+        self.cos, self.sin = self.rope_cache(device=self.cos.device)
 
     def _init_weights(self, module: nn.Module) -> None:
         """Meant to be used with `gpt.apply(gpt._init_weights)`."""
diff --git a/tests/test_model.py b/tests/test_model.py
@@ -10,6 +10,7 @@
 from conftest import RunIf
 from lightning import Fabric
 from lightning.fabric.utilities.imports import _IS_WINDOWS
+from lightning.fabric.utilities.init import _materialize_meta_tensors
 
 # support running without installing as a package
 wd = Path(__file__).parent.parent.resolve()
@@ -832,3 +833,14 @@ def test_rope_init_under_fsdp():
     cos, sin = model.rope_cache(device=fabric.device)
     torch.testing.assert_close(model.cos, cos)
     torch.testing.assert_close(model.sin, sin)
+
+
+@RunIf(min_cuda_gpus=1)
+def test_reset_parameters_device():
+    from lit_gpt import GPT
+
+    with torch.device("meta"):
+        model = GPT.from_name("pythia-14m", n_layer=1)
+    _materialize_meta_tensors(model, torch.device("cuda"))
+    model.reset_parameters()
+    assert model.cos.device.type == "cuda"