Don't compile for Pytorch 2.1 on CUDA 12.1 due to nvcc segfaults

tridao · tridao · commit 799f56fa90f7 · 2023-09-17T22:15:38.000-07:00
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -80,6 +80,11 @@ jobs:
               cuda-version: '11.7.1'
             - torch-version: '2.1.0.dev20230731'
               cuda-version: '11.8.0'
+            # Pytorch >= 2.1 with nvcc 12.1.0 segfaults during compilation, so
+            # we only use CUDA 12.2. setup.py as a special case that will
+            # download the wheel for CUDA 12.2 instead.
+            - torch-version: '2.1.0.dev20230731'
+              cuda-version: '12.1.0'
 
     steps:
       - name: Checkout
diff --git a/flash_attn/__init__.py b/flash_attn/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "2.2.3.post1"
+__version__ = "2.2.3.post2"
 
 from flash_attn.flash_attn_interface import (
     flash_attn_func,
diff --git a/setup.py b/setup.py
@@ -223,6 +223,8 @@ def get_wheel_url():
     # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
     torch_cuda_version = parse(torch.version.cuda)
     torch_version_raw = parse(torch.__version__)
+    if torch_version_raw.major == 2 and torch_version_raw.minor == 1:
+        torch_cuda_version = parse("12.2")
     python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
     platform_name = get_platform()
     flash_version = get_package_version()
diff --git a/training/Dockerfile b/training/Dockerfile
@@ -85,14 +85,11 @@ RUN pip install transformers==4.25.1 datasets==2.8.0 pytorch-lightning==1.8.6 tr
 RUN pip install git+https://github.com/mlcommons/logging.git@2.1.0
 
 # Install FlashAttention
-RUN pip install flash-attn==2.2.3.post1
+RUN pip install flash-attn==2.2.3.post2
 
 # Install CUDA extensions for cross-entropy, fused dense, layer norm
 RUN git clone https://github.com/HazyResearch/flash-attention \
-    && cd flash-attention && git checkout v2.2.3.post1 \
-    && cd csrc/fused_softmax && pip install . && cd ../../ \
-    && cd csrc/rotary && pip install . && cd ../../ \
+    && cd flash-attention && git checkout v2.2.3.post2 \
     && cd csrc/layer_norm && pip install . && cd ../../ \
     && cd csrc/fused_dense_lib && pip install . && cd ../../ \
-    && cd csrc/ft_attention && pip install . && cd ../../ \
     && cd .. && rm -rf flash-attention

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "2.2.3.post1"`
	`1`	`+__version__ = "2.2.3.post2"`
`2`	`2`
`3`	`3`	`from flash_attn.flash_attn_interface import (`
`4`	`4`	`flash_attn_func,`