diff --git a/cellfinder/core/classify/classify.py b/cellfinder/core/classify/classify.py
index 37fc06cc..b9ca1417 100644
--- a/cellfinder/core/classify/classify.py
+++ b/cellfinder/core/classify/classify.py
@@ -29,6 +29,7 @@ def main(
     model_weights: Optional[os.PathLike],
     network_depth: depth_type,
     max_workers: int = 3,
+    pin_memory: bool = False,
     *,
     callback: Optional[Callable[[int], None]] = None,
 ) -> List[Cell]:
@@ -74,6 +75,12 @@ def main(
     max_workers: int
         The number of sub-processes to use for data loading / processing.
         Defaults to 8.
+    pin_memory: bool
+        Pins data to be sent to the GPU to the CPU memory. This allows faster
+        GPU data speeds, but can only be used if the data used by the GPU can
+        stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
+        Otherwise, if there's a risk of the RAM being paged, it shouldn't be
+        used. Defaults to False.
     callback : Callable[int], optional
         A callback function that is called during classification. Called with
         the batch number once that batch has been classified.
diff --git a/cellfinder/core/detect/detect.py b/cellfinder/core/detect/detect.py
index 076b3ae5..e6c0d398 100644
--- a/cellfinder/core/detect/detect.py
+++ b/cellfinder/core/detect/detect.py
@@ -49,6 +49,7 @@ def main(
     plane_directory: Optional[str] = None,
     batch_size: Optional[int] = None,
     torch_device: Optional[str] = None,
+    pin_memory: bool = False,
     split_ball_xy_size: float = 6,
     split_ball_z_size: float = 15,
     split_ball_overlap_fraction: float = 0.8,
@@ -116,6 +117,12 @@ def main(
         The device on which to run the computation. If not specified (None),
         "cuda" will be used if a GPU is available, otherwise "cpu".
         You can also manually specify "cuda" or "cpu".
+    pin_memory: bool
+        Pins data to be sent to the GPU to the CPU memory. This allows faster
+        GPU data speeds, but can only be used if the data used by the GPU can
+        stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
+        Otherwise, if there's a risk of the RAM being paged, it shouldn't be
+        used. Defaults to False.
     split_ball_xy_size: float
         Similar to `ball_xy_size`, except the value to use for the 3d
         filter during cluster splitting.
@@ -192,6 +199,7 @@ def main(
         plane_directory=plane_directory,
         batch_size=batch_size,
         torch_device=torch_device,
+        pin_memory=pin_memory,
         n_splitting_iter=n_splitting_iter,
     )
 
diff --git a/cellfinder/core/detect/filters/setup_filters.py b/cellfinder/core/detect/filters/setup_filters.py
index 1cf13ee8..ef2d0bf1 100644
--- a/cellfinder/core/detect/filters/setup_filters.py
+++ b/cellfinder/core/detect/filters/setup_filters.py
@@ -189,6 +189,14 @@ class DetectionSettings:
     to run on the first GPU.
     """
 
+    pin_memory: bool = False
+    """
+    Pins data to be sent to the GPU to the CPU memory. This allows faster GPU
+    data speeds, but can only be used if the data used by the GPU can stay in
+    the CPU RAM while the GPU uses it. I.e. there's enough RAM. Otherwise, if
+    there's a risk of the RAM being paged, it shouldn't be used.
+    """
+
     n_free_cpus: int = 2
     """
     Number of free CPU cores to keep available and not use during parallel
diff --git a/cellfinder/core/detect/filters/volume/volume_filter.py b/cellfinder/core/detect/filters/volume/volume_filter.py
index 1cf432bf..0ba8e816 100644
--- a/cellfinder/core/detect/filters/volume/volume_filter.py
+++ b/cellfinder/core/detect/filters/volume/volume_filter.py
@@ -140,7 +140,7 @@ def _get_filter_buffers(
             tensor = torch.empty(
                 (batch_size, *self.settings.plane_shape),
                 dtype=torch_dtype,
-                pin_memory=not cpu,
+                pin_memory=not cpu and self.settings.pin_memory,
                 device="cpu",
             )
 
diff --git a/cellfinder/core/main.py b/cellfinder/core/main.py
index 7ff3418b..e1167b3d 100644
--- a/cellfinder/core/main.py
+++ b/cellfinder/core/main.py
@@ -37,6 +37,7 @@ def main(
     detected_cells: List[Cell] = None,
     detection_batch_size: Optional[int] = None,
     torch_device: Optional[str] = None,
+    pin_memory: bool = False,
     split_ball_xy_size: float = 6,
     split_ball_z_size: float = 15,
     split_ball_overlap_fraction: float = 0.8,
@@ -135,6 +136,12 @@ def main(
         The device on which to run the computation. If not specified (None),
         "cuda" will be used if a GPU is available, otherwise "cpu".
         You can also manually specify "cuda" or "cpu".
+    pin_memory: bool
+        Pins data to be sent to the GPU to the CPU memory. This allows faster
+        GPU data speeds, but can only be used if the data used by the GPU can
+        stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
+        Otherwise, if there's a risk of the RAM being paged, it shouldn't be
+        used. Defaults to False.
     split_ball_xy_size: float
         Similar to `ball_xy_size`, except the value to use for the 3d
         filter during cluster splitting.
@@ -180,6 +187,7 @@ def main(
             n_sds_above_mean_thresh,
             batch_size=detection_batch_size,
             torch_device=torch_device,
+            pin_memory=pin_memory,
             callback=detect_callback,
             split_ball_z_size=split_ball_z_size,
             split_ball_xy_size=split_ball_xy_size,
diff --git a/cellfinder/napari/detect/detect.py b/cellfinder/napari/detect/detect.py
index 2503d412..87cbf5c0 100644
--- a/cellfinder/napari/detect/detect.py
+++ b/cellfinder/napari/detect/detect.py
@@ -263,6 +263,7 @@ def widget(
         n_free_cpus: int,
         analyse_local: bool,
         use_gpu: bool,
+        pin_memory: bool,
         debug: bool,
         reset_button,
     ) -> None:
@@ -336,6 +337,12 @@ def widget(
             Only analyse planes around the current position
         use_gpu : bool
             If True, use GPU for processing (if available); otherwise, use CPU.
+        pin_memory: bool
+            Pins data to be sent to the GPU to the CPU memory. This allows
+            faster GPU data speeds, but can only be used if the data used by
+            the GPU can stay in the CPU RAM while the GPU uses it. I.e. there's
+            enough RAM. Otherwise, if there's a risk of the RAM being paged, it
+            shouldn't be used. Defaults to False.
         debug : bool
             Increase logging
         reset_button :
@@ -411,7 +418,13 @@ def widget(
             end_plane = len(signal_image.data)
 
         misc_inputs = MiscInputs(
-            start_plane, end_plane, n_free_cpus, analyse_local, use_gpu, debug
+            start_plane,
+            end_plane,
+            n_free_cpus,
+            analyse_local,
+            use_gpu,
+            pin_memory,
+            debug,
         )
 
         worker = Worker(
diff --git a/cellfinder/napari/detect/detect_containers.py b/cellfinder/napari/detect/detect_containers.py
index 5a130853..2281311e 100644
--- a/cellfinder/napari/detect/detect_containers.py
+++ b/cellfinder/napari/detect/detect_containers.py
@@ -153,6 +153,7 @@ class MiscInputs(InputContainer):
     n_free_cpus: int = 2
     analyse_local: bool = False
     use_gpu: bool = field(default_factory=lambda: torch.cuda.is_available())
+    pin_memory: bool = False
     debug: bool = False
 
     def as_core_arguments(self) -> dict:
@@ -179,5 +180,10 @@ def widget_representation(cls) -> dict:
                 value=cls.defaults()["use_gpu"],
                 enabled=torch.cuda.is_available(),
             ),
+            pin_memory=dict(
+                widget_type="CheckBox",
+                label="Pin data to memory",
+                value=cls.defaults()["pin_memory"],
+            ),
             debug=dict(value=cls.defaults()["debug"]),
         )