InfiniTensor
diff --git a/‎src/ntops/torch.py‎
Lines changed: 0 additions & 592 deletions b/‎src/ntops/torch.py‎
Lines changed: 0 additions & 592 deletions
diff --git a/‎src/ntops/torch/__init__.py‎
Lines changed: 77 additions & 0 deletions b/‎src/ntops/torch/__init__.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/ntops/torch/abs.py‎
Lines changed: 15 additions & 0 deletions b/‎src/ntops/torch/abs.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/ntops/torch/add.py‎
Lines changed: 15 additions & 0 deletions b/‎src/ntops/torch/add.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/ntops/torch/addmm.py‎
Lines changed: 18 additions & 0 deletions b/‎src/ntops/torch/addmm.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/ntops/torch/bitwise_and.py‎
Lines changed: 15 additions & 0 deletions b/‎src/ntops/torch/bitwise_and.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/ntops/torch/bitwise_not.py‎
Lines changed: 17 additions & 0 deletions b/‎src/ntops/torch/bitwise_not.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/ntops/torch/bitwise_or.py‎
Lines changed: 15 additions & 0 deletions b/‎src/ntops/torch/bitwise_or.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎src/ntops/torch/bmm.py‎
Lines changed: 18 additions & 0 deletions b/‎src/ntops/torch/bmm.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/ntops/torch/clamp.py‎
Lines changed: 15 additions & 0 deletions b/‎src/ntops/torch/clamp.py‎
Lines changed: 15 additions & 0 deletions
@@ -0,0 +1,77 @@
+from ntops.torch.abs import abs
+from ntops.torch.add import add
+from ntops.torch.addmm import addmm
+from ntops.torch.bitwise_and import bitwise_and
+from ntops.torch.bitwise_not import bitwise_not
+from ntops.torch.bitwise_or import bitwise_or
+from ntops.torch.bmm import bmm
+from ntops.torch.clamp import clamp
+from ntops.torch.cos import cos
+from ntops.torch.div import div
+from ntops.torch.dropout import dropout
+from ntops.torch.eq import eq
+from ntops.torch.exp import exp
+from ntops.torch.ge import ge
+from ntops.torch.gelu import gelu
+from ntops.torch.gt import gt
+from ntops.torch.isinf import isinf
+from ntops.torch.isnan import isnan
+from ntops.torch.layer_norm import layer_norm
+from ntops.torch.le import le
+from ntops.torch.lt import lt
+from ntops.torch.mm import mm
+from ntops.torch.mul import mul
+from ntops.torch.ne import ne
+from ntops.torch.neg import neg
+from ntops.torch.pow import pow
+from ntops.torch.relu import relu
+from ntops.torch.rms_norm import rms_norm
+from ntops.torch.rotary_position_embedding import rotary_position_embedding
+from ntops.torch.rsqrt import rsqrt
+from ntops.torch.scaled_dot_product_attention import scaled_dot_product_attention
+from ntops.torch.sigmoid import sigmoid
+from ntops.torch.silu import silu
+from ntops.torch.sin import sin
+from ntops.torch.softmax import softmax
+from ntops.torch.sub import sub
+from ntops.torch.tanh import tanh
+
+__all__ = [
+    "abs",
+    "add",
+    "addmm",
+    "bitwise_and",
+    "bitwise_not",
+    "bitwise_or",
+    "bmm",
+    "clamp",
+    "cos",
+    "div",
+    "dropout",
+    "eq",
+    "exp",
+    "ge",
+    "gelu",
+    "gt",
+    "isinf",
+    "isnan",
+    "layer_norm",
+    "le",
+    "lt",
+    "mm",
+    "mul",
+    "ne",
+    "neg",
+    "pow",
+    "relu",
+    "rms_norm",
+    "rotary_position_embedding",
+    "rsqrt",
+    "scaled_dot_product_attention",
+    "sigmoid",
+    "silu",
+    "sin",
+    "softmax",
+    "sub",
+    "tanh",
+]
@@ -0,0 +1,15 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def abs(input, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(ntops.kernels.abs.premake, input.ndim)
+
+    kernel(input, out)
+
+    return out
@@ -0,0 +1,15 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def add(input, other, *, alpha=1, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(ntops.kernels.add.premake, input.ndim)
+
+    kernel(input, other, alpha, out)
+
+    return out
@@ -0,0 +1,18 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make, _get_matmul_input_precision
+
+
+def addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None):
+    m, _ = mat1.shape
+    _, n = mat2.shape
+
+    if out is None:
+        out = torch.empty((m, n), dtype=input.dtype, device=input.device)
+
+    kernel = _cached_make(ntops.kernels.addmm.premake)
+
+    kernel(input, mat1, mat2, beta, alpha, out, _get_matmul_input_precision())
+
+    return out
@@ -0,0 +1,15 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def bitwise_and(input, other, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(ntops.kernels.bitwise_and.premake, input.ndim)
+
+    kernel(input, other, out)
+
+    return out
@@ -0,0 +1,17 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def bitwise_not(input, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(
+        ntops.kernels.bitwise_not.premake, input.ndim, input.dtype == torch.bool
+    )
+
+    kernel(input, out)
+
+    return out
@@ -0,0 +1,15 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def bitwise_or(input, other, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(ntops.kernels.bitwise_or.premake, input.ndim)
+
+    kernel(input, other, out)
+
+    return out
@@ -0,0 +1,18 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make, _get_matmul_input_precision
+
+
+def bmm(input, mat2, *, out=None):
+    b, m, _ = input.shape
+    _, _, n = mat2.shape
+
+    if out is None:
+        out = torch.empty((b, m, n), dtype=input.dtype, device=input.device)
+
+    kernel = _cached_make(ntops.kernels.bmm.premake)
+
+    kernel(input, mat2, out, _get_matmul_input_precision())
+
+    return out
@@ -0,0 +1,15 @@
+import torch
+
+import ntops
+from ntops.torch.utils import _cached_make
+
+
+def clamp(input, min=None, max=None, *, out=None):
+    if out is None:
+        out = torch.empty_like(input)
+
+    kernel = _cached_make(ntops.kernels.clamp.premake, input.ndim)
+
+    kernel(input, min, max, out)
+
+    return out