From 82d74afed1c24ec0a1933d138adc70a2b55971b7 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 25 Jun 2024 13:34:44 -0400 Subject: [PATCH] Sanitizer fixes (#1916) * Add [skip-rapids] option. * Remove dead skip-tests logic. * Fix memory leak in CUB's radix sort custom test. * Fix memory leak in CUB's test_util_device * Fix memory leak in catch2_test_vsmem. * Fix #1895; Memory leak in device_select_api test. * Fix #1897; memory leak in test_block_radix_rank.cu * Fix #1896; memory leak in CUB's NVRTC test --- .github/actions/workflow-build/action.yml | 6 ------ .../workflows/ci-workflow-pull-request.yml | 2 +- .../catch2_test_device_radix_sort_custom.cu | 3 --- cub/test/catch2_test_device_select_api.cu | 21 ++++++++++++------- cub/test/catch2_test_nvrtc.cu | 1 + cub/test/catch2_test_util_device.cu | 15 ++++++++----- cub/test/catch2_test_vsmem.cu | 2 ++ cub/test/test_block_radix_rank.cu | 2 ++ 8 files changed, 29 insertions(+), 23 deletions(-) diff --git a/.github/actions/workflow-build/action.yml b/.github/actions/workflow-build/action.yml index 96a04a7c6c..57a9498952 100644 --- a/.github/actions/workflow-build/action.yml +++ b/.github/actions/workflow-build/action.yml @@ -9,10 +9,6 @@ inputs: description: "If true, the requested `workflows` will be ignored when a non-empty 'override' workflow exists in the matrix file." default: "false" required: false - skip_tests: - description: "Skip running tests" - default: "false" - required: false inspect_changes_script: description: "If defined, run this script to determine which projects/deps need to be tested." default: "" @@ -57,7 +53,6 @@ runs: shell: bash --noprofile --norc -euo pipefail {0} env: allow_override: ${{ inputs.allow_override == 'true' && '--allow-override' || ''}} - skip_tests: ${{ inputs.skip_tests == 'true' && '--skip-tests' || ''}} dirty_projects_flag: ${{ inputs.inspect_changes_script != '' && '--dirty-projects' || ''}} dirty_projects: ${{ inputs.inspect_changes_script != '' && steps.inspect-changes.outputs.dirty_projects || ''}} matrix_parser: ${{ inputs.matrix_parser && inputs.matrix_parser || '${GITHUB_ACTION_PATH}/build-workflow.py' }} @@ -67,7 +62,6 @@ runs: ${{ env.matrix_parser }} ${{ inputs.matrix_file }} \ --workflows ${{ inputs.workflows }} \ ${{ env.allow_override }} \ - ${{ env.skip_tests }} \ ${{ env.dirty_projects_flag }} ${{ env.dirty_projects }} echo "::group::Workflow" diff --git a/.github/workflows/ci-workflow-pull-request.yml b/.github/workflows/ci-workflow-pull-request.yml index e329d60ee4..06cd639164 100644 --- a/.github/workflows/ci-workflow-pull-request.yml +++ b/.github/workflows/ci-workflow-pull-request.yml @@ -62,7 +62,6 @@ jobs: nightly_workflow: ${{ contains(github.event.head_commit.message, '[workflow:nightly]') && 'nightly' || '' }} with: allow_override: "true" - skip_tests: ${{ toJSON(contains(github.event.head_commit.message, '[skip-tests]')) }} inspect_changes_script: ${{ toJSON(!contains(github.event.head_commit.message, '[all-projects]') && 'ci/inspect_changes.sh' || '') }} inspect_changes_base_sha: ${{ steps.export-pr-info.outputs.base_sha }} workflows: >- @@ -193,6 +192,7 @@ jobs: build-rapids: name: Build RAPIDS + if: ${{ !contains(github.event.head_commit.message, '[skip-rapids]') }} secrets: inherit permissions: actions: read diff --git a/cub/test/catch2_test_device_radix_sort_custom.cu b/cub/test/catch2_test_device_radix_sort_custom.cu index 4ffc5c008f..9f4a190c78 100644 --- a/cub/test/catch2_test_device_radix_sort_custom.cu +++ b/cub/test/catch2_test_device_radix_sort_custom.cu @@ -452,9 +452,6 @@ CUB_TEST("Device radix sort works with bits of custom i128_t keys (db)", "[radix constexpr int max_items = 1 << 18; const int num_items = GENERATE_COPY(take(4, random(max_items / 2, max_items))); - int* selector = nullptr; - cudaMallocHost(&selector, sizeof(int)); - c2h::device_vector keys_1(num_items); c2h::device_vector keys_2(num_items); c2h::gen(CUB_SEED(2), keys_1); diff --git a/cub/test/catch2_test_device_select_api.cu b/cub/test/catch2_test_device_select_api.cu index 6c230566f5..18b5ce887c 100644 --- a/cub/test/catch2_test_device_select_api.cu +++ b/cub/test/catch2_test_device_select_api.cu @@ -32,6 +32,7 @@ #include #include +#include #include @@ -58,10 +59,9 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][ is_even_t is_even{}; // Determine temporary device storage requirements - void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cub::DeviceSelect::FlaggedIf( - d_temp_storage, + nullptr, temp_storage_bytes, d_in.begin(), d_flags.begin(), @@ -71,11 +71,11 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf works with int data elements", "[select][ is_even); // Allocate temporary storage - cudaMalloc(&d_temp_storage, temp_storage_bytes); + c2h::device_vector temp_storage(temp_storage_bytes); // Run selection cub::DeviceSelect::FlaggedIf( - d_temp_storage, + thrust::raw_pointer_cast(temp_storage.data()), temp_storage_bytes, d_in.begin(), d_flags.begin(), @@ -102,17 +102,22 @@ CUB_TEST("cub::DeviceSelect::FlaggedIf in-place works with int data elements", " is_even_t is_even{}; // Determine temporary device storage requirements - void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; cub::DeviceSelect::FlaggedIf( - d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even); + nullptr, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even); // Allocate temporary storage - cudaMalloc(&d_temp_storage, temp_storage_bytes); + c2h::device_vector temp_storage(temp_storage_bytes); // Run selection cub::DeviceSelect::FlaggedIf( - d_temp_storage, temp_storage_bytes, d_data.begin(), d_flags.begin(), d_num_selected_out.data(), num_items, is_even); + thrust::raw_pointer_cast(temp_storage.data()), + temp_storage_bytes, + d_data.begin(), + d_flags.begin(), + d_num_selected_out.data(), + num_items, + is_even); thrust::device_vector expected{0, 1, 5}; // example-end segmented-select-flaggedif-inplace diff --git a/cub/test/catch2_test_nvrtc.cu b/cub/test/catch2_test_nvrtc.cu index b842f0a2e3..0e1b232ff6 100644 --- a/cub/test/catch2_test_nvrtc.cu +++ b/cub/test/catch2_test_nvrtc.cu @@ -294,6 +294,7 @@ TEST_CASE("Test nvrtc", "[test][nvrtc]") } REQUIRE(CUDA_SUCCESS == cuMemFree(d_ptr)); + REQUIRE(CUDA_SUCCESS == cuMemFree(d_err)); REQUIRE(CUDA_SUCCESS == cuModuleUnload(module)); REQUIRE(CUDA_SUCCESS == cuCtxDestroy(context)); } diff --git a/cub/test/catch2_test_util_device.cu b/cub/test/catch2_test_util_device.cu index 888bcf1d44..c59c076ec5 100644 --- a/cub/test/catch2_test_util_device.cu +++ b/cub/test/catch2_test_util_device.cu @@ -64,11 +64,16 @@ CUB_TEST("CUB correctly identifies the ptx version the kernel was compiled for", constexpr std::size_t single_item = 1; c2h::device_vector cuda_arch(single_item); - int* ptx_version{}; - cudaMallocHost(&ptx_version, sizeof(*ptx_version)); - // Query the arch the kernel was actually compiled for - get_cuda_arch_from_kernel(thrust::raw_pointer_cast(cuda_arch.data()), ptx_version); + int ptx_version = [&]() -> int { + int* buffer{}; + cudaMallocHost(&buffer, sizeof(*buffer)); + get_cuda_arch_from_kernel(thrust::raw_pointer_cast(cuda_arch.data()), buffer); + int result = *buffer; + cudaFreeHost(buffer); + return result; + }(); + int kernel_cuda_arch = cuda_arch[0]; // Host cub::PtxVersion @@ -79,6 +84,6 @@ CUB_TEST("CUB correctly identifies the ptx version the kernel was compiled for", REQUIRE(0 != kernel_cuda_arch); // Ensure that the ptx version corresponds to the arch the kernel was compiled for - REQUIRE(*ptx_version == kernel_cuda_arch); + REQUIRE(ptx_version == kernel_cuda_arch); REQUIRE(host_ptx_version == kernel_cuda_arch); } diff --git a/cub/test/catch2_test_vsmem.cu b/cub/test/catch2_test_vsmem.cu index a4ae61e08c..91069fcab1 100644 --- a/cub/test/catch2_test_vsmem.cu +++ b/cub/test/catch2_test_vsmem.cu @@ -474,4 +474,6 @@ CUB_TEST("Virtual shared memory works within algorithms", "[util][vsmem]", type_ // The virtual shared memory helper pads vsmem to a multiple of a line size, hence the range check REQUIRE(launch_config_info->config_vsmem_per_block >= expected_vsmem_per_block); } + + cudaFreeHost(launch_config_info); } diff --git a/cub/test/test_block_radix_rank.cu b/cub/test/test_block_radix_rank.cu index f642696c1f..6d36378882 100644 --- a/cub/test/test_block_radix_rank.cu +++ b/cub/test/test_block_radix_rank.cu @@ -337,5 +337,7 @@ int main(int argc, char** argv) Test<128>(); Test<130>(); + g_allocator.FreeAllCached(); + return 0; }