-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Hipify during build instead of before cmake config (#13333)
### Description Currently, hipify happens before cmake is configured and then cmake glob the directories. This get rids of thoes customized python threading logic and opt for build system itself to generate the files. This also supersede the half baked branch [sukha/hipify-with-cmake](https://github.com/microsoft/onnxruntime/tree/sukha/hipify-with-cmake)
- Loading branch information
Showing
5 changed files
with
253 additions
and
321 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. | ||
|
||
find_package(Python3 COMPONENTS Interpreter REQUIRED) | ||
|
||
# GLOB pattern of file to be excluded | ||
set(contrib_ops_excluded_files | ||
"bert/attention.cc" | ||
"bert/attention.h" | ||
"bert/attention_impl.cu" | ||
"bert/attention_softmax.h" | ||
"bert/embed_layer_norm.cc" | ||
"bert/embed_layer_norm.h" | ||
"bert/embed_layer_norm_impl.cu" | ||
"bert/embed_layer_norm_impl.h" | ||
"bert/fast_gelu_impl.cu" | ||
"bert/fast_gelu_impl.h" | ||
"bert/fast_gelu.cc" | ||
"bert/fast_gelu.h" | ||
"bert/skip_layer_norm.cc" | ||
"bert/skip_layer_norm.h" | ||
"bert/skip_layer_norm_impl.cu" | ||
"bert/skip_layer_norm_impl.h" | ||
"bert/tensorrt_fused_multihead_attention/*" | ||
"bert/transformer_common.h" | ||
"bert/transformer_common.cc" | ||
"math/complex_mul.cc" | ||
"math/complex_mul.h" | ||
"math/complex_mul_impl.cu" | ||
"math/complex_mul_impl.h" | ||
"math/cufft_plan_cache.h" | ||
"math/fft_ops.cc" | ||
"math/fft_ops.h" | ||
"math/fft_ops_impl.cu" | ||
"math/fft_ops_impl.h" | ||
"quantization/attention_quantization.cc" | ||
"quantization/attention_quantization.h" | ||
"quantization/attention_quantization_impl.cu" | ||
"quantization/attention_quantization_impl.cuh" | ||
"quantization/quantize_dequantize_linear.cc" | ||
"quantization/qordered_ops/qordered_attention_impl.cu" | ||
"quantization/qordered_ops/qordered_attention_impl.h" | ||
"quantization/qordered_ops/qordered_attention_input_enum.h" | ||
"quantization/qordered_ops/qordered_attention.cc" | ||
"quantization/qordered_ops/qordered_attention.h" | ||
"quantization/qordered_ops/qordered_common.cuh" | ||
"quantization/qordered_ops/qordered_layer_norm.h" | ||
"quantization/qordered_ops/qordered_layer_norm.cc" | ||
"quantization/qordered_ops/qordered_layer_norm_impl.h" | ||
"quantization/qordered_ops/qordered_layer_norm_impl.cu" | ||
"quantization/qordered_ops/qordered_longformer_attention.cc" | ||
"quantization/qordered_ops/qordered_longformer_attention.h" | ||
"quantization/qordered_ops/qordered_matmul.h" | ||
"quantization/qordered_ops/qordered_matmul.cc" | ||
"quantization/qordered_ops/qordered_matmul_utils.h" | ||
"quantization/qordered_ops/qordered_matmul_utils.cc" | ||
"quantization/qordered_ops/qordered_qdq_impl.cu" | ||
"quantization/qordered_ops/qordered_qdq_impl.h" | ||
"quantization/qordered_ops/qordered_qdq.cc" | ||
"quantization/qordered_ops/qordered_qdq.h" | ||
"quantization/qordered_ops/qordered_unary_ops.h" | ||
"quantization/qordered_ops/qordered_unary_ops.cc" | ||
"quantization/qordered_ops/qordered_unary_ops_impl.h" | ||
"quantization/qordered_ops/qordered_unary_ops_impl.cu" | ||
"tensor/crop.cc" | ||
"tensor/crop.h" | ||
"tensor/crop_impl.cu" | ||
"tensor/crop_impl.h" | ||
"tensor/dynamicslice.cc" | ||
"tensor/image_scaler.cc" | ||
"tensor/image_scaler.h" | ||
"tensor/image_scaler_impl.cu" | ||
"tensor/image_scaler_impl.h" | ||
"transformers/beam_search.cc" | ||
"transformers/beam_search.h" | ||
"transformers/generation_device_helper.cc" | ||
"transformers/generation_device_helper.h" | ||
"transformers/beam_search_impl.cu" | ||
"transformers/beam_search_impl.h" | ||
"transformers/greedy_search.cc" | ||
"transformers/greedy_search.h" | ||
"transformers/dump_cuda_tensor.cc" | ||
"transformers/dump_cuda_tensor.h" | ||
"conv_transpose_with_dynamic_pads.cc" | ||
"conv_transpose_with_dynamic_pads.h" | ||
"cuda_contrib_kernels.cc" | ||
"cuda_contrib_kernels.h" | ||
"inverse.cc" | ||
"fused_conv.cc" | ||
) | ||
|
||
set(provider_excluded_files | ||
"atomic/common.cuh" | ||
"controlflow/if.cc" | ||
"controlflow/if.h" | ||
"controlflow/loop.cc" | ||
"controlflow/loop.h" | ||
"controlflow/scan.cc" | ||
"controlflow/scan.h" | ||
"cu_inc/common.cuh" | ||
"math/einsum_utils/einsum_auxiliary_ops.cc" | ||
"math/einsum_utils/einsum_auxiliary_ops.h" | ||
"math/einsum_utils/einsum_auxiliary_ops_diagonal.cu" | ||
"math/einsum_utils/einsum_auxiliary_ops_diagonal.h" | ||
"math/einsum.cc" | ||
"math/einsum.h" | ||
"math/gemm.cc" | ||
"math/matmul.cc" | ||
"math/softmax_impl.cu" | ||
"math/softmax_warpwise_impl.cuh" | ||
"math/softmax_common.cc" | ||
"math/softmax.cc" | ||
"nn/conv.cc" | ||
"nn/conv.h" | ||
"nn/conv_transpose.cc" | ||
"nn/conv_transpose.h" | ||
"reduction/reduction_ops.cc" | ||
"rnn/cudnn_rnn_base.cc" | ||
"rnn/cudnn_rnn_base.h" | ||
"rnn/gru.cc" | ||
"rnn/gru.h" | ||
"rnn/lstm.cc" | ||
"rnn/lstm.h" | ||
"rnn/rnn.cc" | ||
"rnn/rnn.h" | ||
"rnn/rnn_impl.cu" | ||
"rnn/rnn_impl.h" | ||
"shared_inc/cuda_call.h" | ||
"shared_inc/fpgeneric.h" | ||
"cuda_allocator.cc" | ||
"cuda_allocator.h" | ||
"cuda_call.cc" | ||
"cuda_common.cc" | ||
"cuda_common.h" | ||
"cuda_execution_provider_info.cc" | ||
"cuda_execution_provider_info.h" | ||
"cuda_execution_provider.cc" | ||
"cuda_execution_provider.h" | ||
"cuda_memory_check.cc" | ||
"cuda_memory_check.h" | ||
"cuda_fence.cc" | ||
"cuda_fence.h" | ||
"cuda_fwd.h" | ||
"cuda_kernel.h" | ||
"cuda_pch.cc" | ||
"cuda_pch.h" | ||
"cuda_profiler.cc" | ||
"cuda_profiler.h" | ||
"cuda_provider_factory.cc" | ||
"cuda_provider_factory.h" | ||
"cuda_utils.cu" | ||
"cudnn_common.cc" | ||
"cudnn_common.h" | ||
"fpgeneric.cu" | ||
"gpu_data_transfer.cc" | ||
"gpu_data_transfer.h" | ||
"integer_gemm.cc" | ||
) | ||
|
||
set(training_ops_excluded_files | ||
"activation/gelu_grad_impl_common.cuh" # uses custom tanh | ||
"collective/adasum_kernels.cc" | ||
"collective/adasum_kernels.h" | ||
"math/div_grad.cc" # miopen API differs from cudnn, no double type support | ||
"nn/batch_norm_grad.cc" # no double type support | ||
"nn/batch_norm_grad.h" # miopen API differs from cudnn | ||
"nn/batch_norm_internal.cc" # miopen API differs from cudnn, no double type support | ||
"nn/batch_norm_internal.h" # miopen API differs from cudnn, no double type support | ||
"nn/conv_grad.cc" | ||
"nn/conv_grad.h" | ||
"reduction/reduction_all.cc" # deterministic = true, ignore ctx setting | ||
"reduction/reduction_ops.cc" # no double type support | ||
"cuda_training_kernels.cc" | ||
"cuda_training_kernels.h" | ||
) | ||
|
||
|
||
# cuda_dir must be relative to REPO_ROOT | ||
function(hipify cuda_dir in_excluded_file_patterns out_generated_cc_files out_generated_cu_files) | ||
set(hipify_tool ${REPO_ROOT}/tools/ci_build/amd_hipify.py) | ||
|
||
file(GLOB_RECURSE srcs CONFIGURE_DEPENDS | ||
"${REPO_ROOT}/${cuda_dir}/cuda/*.h" | ||
"${REPO_ROOT}/${cuda_dir}/cuda/*.cc" | ||
"${REPO_ROOT}/${cuda_dir}/cuda/*.cuh" | ||
"${REPO_ROOT}/${cuda_dir}/cuda/*.cu" | ||
) | ||
|
||
# do exclusion | ||
set(excluded_file_patterns ${${in_excluded_file_patterns}}) | ||
list(TRANSFORM excluded_file_patterns PREPEND "${REPO_ROOT}/${cuda_dir}/cuda/") | ||
file(GLOB_RECURSE excluded_srcs CONFIGURE_DEPENDS ${excluded_file_patterns}) | ||
foreach(f ${excluded_srcs}) | ||
message(STATUS "Excluded from hipify: ${f}") | ||
endforeach() | ||
list(REMOVE_ITEM srcs ${excluded_srcs}) | ||
|
||
foreach(f ${srcs}) | ||
file(RELATIVE_PATH cuda_f_rel "${REPO_ROOT}" ${f}) | ||
string(REPLACE "cuda" "rocm" rocm_f_rel ${cuda_f_rel}) | ||
set(f_out "${CMAKE_CURRENT_BINARY_DIR}/amdgpu/${rocm_f_rel}") | ||
add_custom_command( | ||
OUTPUT ${f_out} | ||
COMMAND Python3::Interpreter ${hipify_tool} | ||
--hipify_perl ${onnxruntime_HIPIFY_PERL} | ||
${f} -o ${f_out} | ||
DEPENDS ${hipify_tool} ${f} | ||
COMMENT "Hipify: ${cuda_f_rel} -> amdgpu/${rocm_f_rel}" | ||
) | ||
if(f MATCHES "\\..*cuh?") | ||
list(APPEND generated_cu_files ${f_out}) | ||
else() | ||
list(APPEND generated_cc_files ${f_out}) | ||
endif() | ||
endforeach() | ||
|
||
set_source_files_properties(generated_cc_files PROPERTIES GENERATED TRUE) | ||
set_source_files_properties(generated_cu_files PROPERTIES GENERATED TRUE) | ||
set(${out_generated_cc_files} ${generated_cc_files} PARENT_SCOPE) | ||
set(${out_generated_cu_files} ${generated_cu_files} PARENT_SCOPE) | ||
endfunction() |
Oops, something went wrong.