InfiniTensor · voltjia · May 30, 2025 · May 7, 2025 · May 30, 2025
diff --git a/src/ntops/kernels/clamp.py b/src/ntops/kernels/clamp.py
@@ -0,0 +1,18 @@
+import functools
+
+import ninetoothed
+import ninetoothed.language as ntl
+from ninetoothed import Tensor
+
+from ntops.kernels.element_wise import arrangement
+
+
+def application(input, min_val, max_val, output):
+    output = ntl.clamp(input, min_val, max_val)  # noqa: F841
+
+
+@functools.cache
+def make(ndim):
+    tensors = (Tensor(ndim), Tensor(ndim), Tensor(ndim), Tensor(ndim))
+
+    return ninetoothed.make(arrangement, application, tensors)
diff --git a/src/ntops/torch.py b/src/ntops/torch.py
@@ -7,6 +7,7 @@
 import ntops.kernels.bitwise_not
 import ntops.kernels.bitwise_or
 import ntops.kernels.bmm
+import ntops.kernels.clamp
 import ntops.kernels.cos
 import ntops.kernels.div
 import ntops.kernels.eq
@@ -113,6 +114,17 @@ def bmm(input, mat2, *, out=None):
     return out
 
 
+def clamp(input, min=None, max=None, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = ntops.kernels.clamp.make(input.ndim)
+
+    kernel(input, min, max, out)
+
+    return out
+
+
 def cos(input, *, out=None):
     if out is None:
         out = torch.empty_like(input)

diff --git a/tests/test_clamp.py b/tests/test_clamp.py
@@ -0,0 +1,21 @@
+import pytest
+import torch
+
+import ntops.torch
+from tests.skippers import skip_if_cuda_not_available
+from tests.utils import generate_arguments
+
+
+@skip_if_cuda_not_available
+@pytest.mark.parametrize(*generate_arguments())
+def test_cuda(shape, dtype, atol, rtol):
+    device = "cuda"
+
+    input = torch.randn(shape, dtype=dtype, device=device)
+    min = torch.randn(shape, dtype=dtype, device=device)
+    max = torch.randn(shape, dtype=dtype, device=device)
+
+    ninetoothed_output = ntops.torch.clamp(input, min, max)
+    reference_output = torch.clamp(input, min, max)
+
+    assert torch.allclose(ninetoothed_output, reference_output, atol=atol, rtol=rtol)