From 55f021146d220adea625aef03e8d8c0801a27cd6 Mon Sep 17 00:00:00 2001
From: Akihiro Nitta <nitta@akihironitta.com>
Date: Sun, 28 Jul 2024 19:12:31 +0000
Subject: [PATCH] update

---
 .gitattributes          |   1 +
 .pre-commit-config.yaml |   2 +
 test/ops/test_matmul.py | 170 ++++++++++++++++++++--------------------
 3 files changed, 88 insertions(+), 85 deletions(-)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 000000000..fcadb2cf9
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+* text eol=lf
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bf519a967..662ea5d40 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,8 @@ repos:
     hooks:
       - id: end-of-file-fixer
       - id: trailing-whitespace
+      - id: mixed-line-ending
+        args: [--fix, lf]
       - id: check-yaml
         exclude: |
           (?x)^(
diff --git a/test/ops/test_matmul.py b/test/ops/test_matmul.py
index f630f4eda..d8cc2fb8f 100644
--- a/test/ops/test_matmul.py
+++ b/test/ops/test_matmul.py
@@ -1,85 +1,85 @@
-import os
-
-import pytest
-import torch
-
-import pyg_lib
-from pyg_lib.testing import withCUDA
-
-os.environ['NVIDIA_TF32_OVERRIDE'] = '0'
-torch.backends.cuda.matmul.allow_tf32 = False
-torch.set_float32_matmul_precision('highest')  # Enforce FP32
-
-
-@withCUDA
-@pytest.mark.parametrize('dtype', [torch.float, torch.bfloat16])
-def test_segment_matmul_autograd(dtype, device):
-    if device.type == 'cuda' and dtype == torch.bfloat16:
-        pytest.skip('CUDA does not support bfloat16')
-
-    inputs = torch.randn((8, 16), requires_grad=True, device=device,
-                         dtype=dtype)
-    ptr = torch.tensor([0, 5, 8]).to(torch.device(device))
-    other = torch.randn((2, 16, 32), requires_grad=True, device=device,
-                        dtype=dtype)
-    bias = torch.randn((2, 32), requires_grad=True, device=device, dtype=dtype)
-    out = pyg_lib.ops.segment_matmul(inputs, ptr, other, bias)
-    assert out.size() == (8, 32)
-
-    out1 = inputs[ptr[0]:ptr[1]] @ other[0] + bias[0]
-    assert torch.allclose(out[ptr[0]:ptr[1]], out1, atol=1e-6)
-
-    out2 = inputs[ptr[1]:ptr[2]] @ other[1] + bias[1]
-    assert torch.allclose(out[ptr[1]:ptr[2]], out2, atol=1e-6)
-
-    out.mean().backward()
-    assert other.grad.size() == other.size()
-    assert inputs.grad.size() == inputs.size()
-
-
-@withCUDA
-@pytest.mark.parametrize('dtype', [torch.float, torch.bfloat16])
-@pytest.mark.parametrize('transposed', [True, False])
-def test_grouped_matmul_autograd(dtype, transposed, device):
-    if device.type == 'cuda' and dtype == torch.bfloat16:
-        pytest.skip('CUDA does not support bfloat16')
-
-    inputs = [
-        torch.randn(5, 16, device=device, requires_grad=True),
-        torch.randn(6, 9, device=device, requires_grad=True),
-        torch.randn(3, 32, device=device, requires_grad=True),
-    ]
-    if transposed:
-        others_origin = [
-            torch.randn(48, 16, device=device, requires_grad=True),
-            torch.randn(42, 9, device=device, requires_grad=True),
-            torch.randn(64, 32, device=device, requires_grad=True),
-        ]
-        others = [other.t() for other in others_origin]
-    else:
-        others = [
-            torch.randn(16, 48, device=device, requires_grad=True),
-            torch.randn(9, 42, device=device, requires_grad=True),
-            torch.randn(32, 64, device=device, requires_grad=True),
-        ]
-
-    biases = [
-        torch.randn(48, device=device, requires_grad=True),
-        torch.randn(42, device=device, requires_grad=True),
-        torch.randn(64, device=device, requires_grad=True),
-    ]
-
-    outs = pyg_lib.ops.grouped_matmul(inputs, others, biases)
-    assert len(outs) == len(inputs)
-
-    for i in range(len(outs)):
-        assert outs[i].size() == (inputs[i].size(0), others[i].size(-1))
-        expected = inputs[i] @ others[i] + biases[i]
-        assert torch.allclose(outs[i], expected, atol=1e-4)
-
-    sum([out.sum() for out in outs]).backward()
-    for i in range(len(outs)):
-        if transposed:
-            assert others_origin[i].grad.size() == others_origin[i].size()
-        else:
-            assert others[i].grad.size() == others[i].size()
+import os
+
+import pytest
+import torch
+
+import pyg_lib
+from pyg_lib.testing import withCUDA
+
+os.environ['NVIDIA_TF32_OVERRIDE'] = '0'
+torch.backends.cuda.matmul.allow_tf32 = False
+torch.set_float32_matmul_precision('highest')  # Enforce FP32
+
+
+@withCUDA
+@pytest.mark.parametrize('dtype', [torch.float, torch.bfloat16])
+def test_segment_matmul_autograd(dtype, device):
+    if device.type == 'cuda' and dtype == torch.bfloat16:
+        pytest.skip('CUDA does not support bfloat16')
+
+    inputs = torch.randn((8, 16), requires_grad=True, device=device,
+                         dtype=dtype)
+    ptr = torch.tensor([0, 5, 8]).to(torch.device(device))
+    other = torch.randn((2, 16, 32), requires_grad=True, device=device,
+                        dtype=dtype)
+    bias = torch.randn((2, 32), requires_grad=True, device=device, dtype=dtype)
+    out = pyg_lib.ops.segment_matmul(inputs, ptr, other, bias)
+    assert out.size() == (8, 32)
+
+    out1 = inputs[ptr[0]:ptr[1]] @ other[0] + bias[0]
+    assert torch.allclose(out[ptr[0]:ptr[1]], out1, atol=1e-6)
+
+    out2 = inputs[ptr[1]:ptr[2]] @ other[1] + bias[1]
+    assert torch.allclose(out[ptr[1]:ptr[2]], out2, atol=1e-6)
+
+    out.mean().backward()
+    assert other.grad.size() == other.size()
+    assert inputs.grad.size() == inputs.size()
+
+
+@withCUDA
+@pytest.mark.parametrize('dtype', [torch.float, torch.bfloat16])
+@pytest.mark.parametrize('transposed', [True, False])
+def test_grouped_matmul_autograd(dtype, transposed, device):
+    if device.type == 'cuda' and dtype == torch.bfloat16:
+        pytest.skip('CUDA does not support bfloat16')
+
+    inputs = [
+        torch.randn(5, 16, device=device, requires_grad=True),
+        torch.randn(6, 9, device=device, requires_grad=True),
+        torch.randn(3, 32, device=device, requires_grad=True),
+    ]
+    if transposed:
+        others_origin = [
+            torch.randn(48, 16, device=device, requires_grad=True),
+            torch.randn(42, 9, device=device, requires_grad=True),
+            torch.randn(64, 32, device=device, requires_grad=True),
+        ]
+        others = [other.t() for other in others_origin]
+    else:
+        others = [
+            torch.randn(16, 48, device=device, requires_grad=True),
+            torch.randn(9, 42, device=device, requires_grad=True),
+            torch.randn(32, 64, device=device, requires_grad=True),
+        ]
+
+    biases = [
+        torch.randn(48, device=device, requires_grad=True),
+        torch.randn(42, device=device, requires_grad=True),
+        torch.randn(64, device=device, requires_grad=True),
+    ]
+
+    outs = pyg_lib.ops.grouped_matmul(inputs, others, biases)
+    assert len(outs) == len(inputs)
+
+    for i in range(len(outs)):
+        assert outs[i].size() == (inputs[i].size(0), others[i].size(-1))
+        expected = inputs[i] @ others[i] + biases[i]
+        assert torch.allclose(outs[i], expected, atol=1e-4)
+
+    sum([out.sum() for out in outs]).backward()
+    for i in range(len(outs)):
+        if transposed:
+            assert others_origin[i].grad.size() == others_origin[i].size()
+        else:
+            assert others[i].grad.size() == others[i].size()