File tree Expand file tree Collapse file tree 3 files changed +4
-4
lines changed Expand file tree Collapse file tree 3 files changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -151,7 +151,7 @@ jobs:
151
151
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
152
152
export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH
153
153
# Currently for this setting the runner goes OOM if we pass --threads 4 to nvcc
154
- if [[ ${MATRIX_CUDA_VERSION} =~ "12." && ${MATRIX_TORCH_VERSION} == "2.1" ]]; then
154
+ if [[ ( ${MATRIX_CUDA_VERSION} == "121" || ${MATRIX_CUDA_VERSION} == "122" ) && ${MATRIX_TORCH_VERSION} == "2.1" ]]; then
155
155
export FLASH_ATTENTION_FORCE_SINGLE_THREAD="TRUE"
156
156
fi
157
157
# Limit MAX_JOBS otherwise the github runner goes OOM
Original file line number Diff line number Diff line change 1
- __version__ = "2.1.2.post2 "
1
+ __version__ = "2.1.2.post3 "
2
2
3
3
from flash_attn .flash_attn_interface import (
4
4
flash_attn_func ,
Original file line number Diff line number Diff line change @@ -85,11 +85,11 @@ RUN pip install transformers==4.25.1 datasets==2.8.0 pytorch-lightning==1.8.6 tr
85
85
RUN pip install git+https://github.com/mlcommons/
[email protected]
86
86
87
87
# Install FlashAttention
88
- RUN pip install flash-attn==2.1.2.post2
88
+ RUN pip install flash-attn==2.1.2.post3
89
89
90
90
# Install CUDA extensions for cross-entropy, fused dense, layer norm
91
91
RUN git clone https://github.com/HazyResearch/flash-attention \
92
- && cd flash-attention && git checkout v2.1.2.post2 \
92
+ && cd flash-attention && git checkout v2.1.2.post3 \
93
93
&& cd csrc/fused_softmax && pip install . && cd ../../ \
94
94
&& cd csrc/rotary && pip install . && cd ../../ \
95
95
&& cd csrc/xentropy && pip install . && cd ../../ \
You can’t perform that action at this time.
0 commit comments