Skip to content

Commit

Permalink
[CUDNN] Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Yudi Sun committed Jun 27, 2024
1 parent 8236c5e commit 5d5d8a6
Showing 1 changed file with 5 additions and 20 deletions.
25 changes: 5 additions & 20 deletions src/hidet/runtime/cuda/cudnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,6 @@ DLL void hidet_cudnn_conv2d_gemm(
int tx, int tw, int ty, int compute_type,
int pad_dim1, int pad_dim2, int str_dim1, int str_dim2, int dil_dim1, int dil_dim2)
{
auto begin1 = std::chrono::steady_clock::now();
lazy_load_cudnn();

cudnnHandle_t cur_handle = CudnnContext::current_handle();
Expand Down Expand Up @@ -662,39 +661,27 @@ DLL void hidet_cudnn_conv2d_gemm(
CHECK_CUDNN(cudnnSetTensor4dDescriptor(output_descriptor, CUDNN_TENSOR_NCHW, cudnnDataType_t(ty),
out_n, out_c, out_h, out_w));

// size_t workspaceSize{0};
// CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(cur_handle, input_descriptor, kernel_descriptor,
// convolution_descriptor, output_descriptor, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
// &workspaceSize));

size_t workspaceSize{2000000};
// std::cout << workspaceSize << std::endl;
// void *workspace = request_cuda_workspace(workspaceSize, false);
size_t workspaceSize{0};
CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(cur_handle, input_descriptor, kernel_descriptor,
convolution_descriptor, output_descriptor, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
&workspaceSize));
void *workspace = hidet_cuda_malloc_async(workspaceSize, cur_stream);

void *p_alpha = nullptr;
void *p_beta = nullptr;
cudnnDataType_t compType = cudnnDataType_t(compute_type);
set_alpha_beta(&p_alpha, &p_beta, compType);
auto end1 = std::chrono::steady_clock::now();
std::cout << "Time difference 1 = " << std::chrono::duration_cast<std::chrono::microseconds>(end1 - begin1).count() << "[µs]" << std::endl;

auto begin2 = std::chrono::steady_clock::now();
CHECK_CUDNN(cudnnConvolutionForward(cur_handle, p_alpha, input_descriptor, ptr_x, kernel_descriptor, ptr_w,
convolution_descriptor, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
workspace, workspaceSize,
p_beta, output_descriptor, ptr_y));
auto end2 = std::chrono::steady_clock::now();
std::cout << "Time difference 2 = " << std::chrono::duration_cast<std::chrono::microseconds>(end2 - begin2).count() << "[µs]" << std::endl;

auto begin3 = std::chrono::steady_clock::now();
CHECK_CUDNN(cudnnDestroyTensorDescriptor(input_descriptor));
CHECK_CUDNN(cudnnDestroyTensorDescriptor(output_descriptor));
CHECK_CUDNN(cudnnDestroyFilterDescriptor(kernel_descriptor));
CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(convolution_descriptor));
hidet_cuda_free_async(workspace, cur_stream);
auto end3 = std::chrono::steady_clock::now();
std::cout << "Time difference 3 = " << std::chrono::duration_cast<std::chrono::microseconds>(end3 - begin3).count() << "[µs]" << std::endl;
}

DLL void hidet_cudnn_conv2d(
Expand Down Expand Up @@ -830,9 +817,7 @@ DLL void hidet_cudnn_conv2d(
CHECK_CUDNN(cudnnBackendCreateDescriptor(CUDNN_BACKEND_ENGINE_DESCRIPTOR, &engine));
CHECK_CUDNN(cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_OPERATION_GRAPH,
CUDNN_TYPE_BACKEND_DESCRIPTOR, 1, &op_graph));
// TODO: Is it okay to hardcode the engine to be CUDNN_ATTR_ENGINE_GLOBAL_INDEX 0?
// As mentioned here: https://docs.nvidia.com/deeplearning/cudnn/developer/graph-api.html,
// Engine selection should be determined based on some heuristics.

int64_t gidx = 0;
CHECK_CUDNN(cudnnBackendSetAttribute(engine, CUDNN_ATTR_ENGINE_GLOBAL_INDEX,
CUDNN_TYPE_INT64, 1, &gidx));
Expand Down

0 comments on commit 5d5d8a6

Please sign in to comment.