From 6500544cd10644d6cf8ab0de3f00df7148fbd9a3 Mon Sep 17 00:00:00 2001 From: Seventeen17 <17aloha@gmail.com> Date: Fri, 15 Nov 2024 14:30:32 +0800 Subject: [PATCH] move redundant cod --- bazel/flash_attn.BUILD | 2 +- torch_xla/csrc/ops/flash_attention_varlen_forward.cpp | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/bazel/flash_attn.BUILD b/bazel/flash_attn.BUILD index 6d0197eb058..0f1370dbcd6 100644 --- a/bazel/flash_attn.BUILD +++ b/bazel/flash_attn.BUILD @@ -35,4 +35,4 @@ genrule( "popd", "cp external/flash_attn/build/*/*.so $(location flash_attn_cuda.so)"]), visibility = ["//visibility:public"], -) \ No newline at end of file +) diff --git a/torch_xla/csrc/ops/flash_attention_varlen_forward.cpp b/torch_xla/csrc/ops/flash_attention_varlen_forward.cpp index 37111c976a5..96c042f6869 100644 --- a/torch_xla/csrc/ops/flash_attention_varlen_forward.cpp +++ b/torch_xla/csrc/ops/flash_attention_varlen_forward.cpp @@ -92,11 +92,6 @@ void custom_call_flash_attention_varlen_forward(cudaStream_t stream, torch::from_blob(buffers[8 + buf_offset], {params.b + 1}, opts); at::Tensor rng_state = torch::from_blob(buffers[6 + buf_offset], {2}, opts.dtype(torch::kInt64)); - // Fill zeros for outputs. - // cudaMemsetAsync(buffers[4 + buf_offset], 0, params.b * params.h * - // params.seqlen_q * sizeof(torch::kFloat), cuda_stream); - // cudaMemsetAsync(buffers[5 + buf_offset], 0, params.b * params.seqlen_q * - // params.h * params.d * sizeof(scalar_type), cuda_stream); cudaMemsetAsync(rng_state.data_ptr(), 0, 2 * sizeof(int64_t), cuda_stream); softmax_lse.fill_(0); o_output.fill_(0); @@ -155,8 +150,6 @@ void custom_call_flash_attention_varlen_forward(cudaStream_t stream, // Cast to char to avoid compiler warning about narrowing at::cuda::CUDAGuard device_guard{(char)q.get_device()}; - auto dprops = at::cuda::getCurrentDeviceProperties(); - Flash_fwd_params launch_params; // Reset the parameters