File tree Expand file tree Collapse file tree 5 files changed +8
-4
lines changed Expand file tree Collapse file tree 5 files changed +8
-4
lines changed Original file line number Diff line number Diff line change 8
8
#include < c10/cuda/CUDAGuard.h>
9
9
#include < c10/cuda/CUDAStream.h>
10
10
#include < ATen/cuda/CUDAGeneratorImpl.h> // For at::Generator and at::PhiloxCudaState
11
- #include < ATen/cuda/CUDAGraphsUtils .cuh> // For at::cuda::philox::unpack
11
+ #include " philox_unpack .cuh" // For at::cuda::philox::unpack
12
12
13
13
#include < cutlass/numeric_types.h>
14
14
Original file line number Diff line number Diff line change 4
4
5
5
#pragma once
6
6
7
- #include < ATen/cuda/CUDAGraphsUtils .cuh> // For at::cuda::philox::unpack
7
+ #include " philox_unpack .cuh" // For at::cuda::philox::unpack
8
8
9
9
#include < cute/tensor.hpp>
10
10
Original file line number Diff line number Diff line change
1
+ // This is purely so that it works with torch 2.1. For torch 2.2+ we can include ATen/cuda/PhiloxUtils.cuh
2
+
3
+ #pragma once
4
+ #include < ATen/cuda/detail/UnpackRaw.cuh>
Original file line number Diff line number Diff line change 1
- __version__ = "2.7.1.post3 "
1
+ __version__ = "2.7.1.post4 "
2
2
3
3
from flash_attn .flash_attn_interface import (
4
4
flash_attn_func ,
Original file line number Diff line number Diff line change @@ -114,7 +114,7 @@ def check_if_rocm_home_none(global_option: str) -> None:
114
114
115
115
116
116
def append_nvcc_threads (nvcc_extra_args ):
117
- nvcc_threads = os .getenv ("NVCC_THREADS" ) or "4 "
117
+ nvcc_threads = os .getenv ("NVCC_THREADS" ) or "2 "
118
118
return nvcc_extra_args + ["--threads" , nvcc_threads ]
119
119
120
120
You can’t perform that action at this time.
0 commit comments