From fa1486e1d1d09116d2b5f57dfef7d9307ebc76c6 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Tue, 3 Sep 2024 16:31:30 -0400 Subject: [PATCH] Remove ERROR_TEST gtest from libcudf (#16722) Removes the `ERROR_TEST` gtest from libcudf. This test was only verifying some macros on mostly CUDA behavior and not libcudf specific functions. The tests have become troublesome to support in CI especially in conjunction with other tools like `compute-sanitizer`. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Jayjeet Chakraborty (https://github.com/JayjeetAtGithub) URL: https://github.com/rapidsai/cudf/pull/16722 --- cpp/tests/CMakeLists.txt | 4 - cpp/tests/error/error_handling_test.cu | 136 ------------------------- 2 files changed, 140 deletions(-) delete mode 100644 cpp/tests/error/error_handling_test.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f86acbcc51b..1bedb344a01 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -110,10 +110,6 @@ ConfigureTest(SCALAR_TEST scalar/scalar_test.cpp scalar/scalar_device_view_test. # * timestamps tests ------------------------------------------------------------------------------ ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu) -# ################################################################################################## -# * cudf tests ------------------------------------------------------------------------------------ -ConfigureTest(ERROR_TEST error/error_handling_test.cu) - # ################################################################################################## # * groupby tests --------------------------------------------------------------------------------- ConfigureTest( diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu deleted file mode 100644 index 9c7459fa69d..00000000000 --- a/cpp/tests/error/error_handling_test.cu +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -TEST(ExpectsTest, FalseCondition) -{ - EXPECT_THROW(CUDF_EXPECTS(false, "condition is false"), cudf::logic_error); -} - -TEST(ExpectsTest, TrueCondition) { EXPECT_NO_THROW(CUDF_EXPECTS(true, "condition is true")); } - -TEST(CudaTryTest, Error) { EXPECT_THROW(CUDF_CUDA_TRY(cudaErrorLaunchFailure), cudf::cuda_error); } - -TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); } - -TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); } - -namespace { -// Some silly kernel that will cause an error -CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; } -} // namespace - -// In a release build and without explicit synchronization, CUDF_CHECK_CUDA may -// or may not fail on erroneous asynchronous CUDA calls. Invoke -// cudaStreamSynchronize to guarantee failure on error. In a non-release build, -// CUDF_CHECK_CUDA deterministically fails on erroneous asynchronous CUDA -// calls. -TEST(StreamCheck, FailedKernel) -{ - rmm::cuda_stream stream; - int a; - test_kernel<<<0, 0, 0, stream.value()>>>(&a); -#ifdef NDEBUG - stream.synchronize(); -#endif - EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); -} - -TEST(StreamCheck, CatchFailedKernel) -{ - rmm::cuda_stream stream; - int a; - test_kernel<<<0, 0, 0, stream.value()>>>(&a); -#ifndef NDEBUG - stream.synchronize(); -#endif - EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); -} - -CUDF_KERNEL void kernel() { asm("trap;"); } - -TEST(DeathTest, CudaFatalError) -{ - testing::FLAGS_gtest_death_test_style = "threadsafe"; - auto call_kernel = []() { - kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(); - try { - CUDF_CUDA_TRY(cudaDeviceSynchronize()); - } catch (const cudf::fatal_cuda_error& fe) { - std::abort(); - } - }; - ASSERT_DEATH(call_kernel(), ""); -} - -#ifndef NDEBUG - -CUDF_KERNEL void assert_false_kernel() { cudf_assert(false && "this kernel should die"); } - -CUDF_KERNEL void assert_true_kernel() { cudf_assert(true && "this kernel should live"); } - -TEST(DebugAssertDeathTest, cudf_assert_false) -{ - testing::FLAGS_gtest_death_test_style = "threadsafe"; - - auto call_kernel = []() { - auto const stream = cudf::get_default_stream().value(); - assert_false_kernel<<<1, 1, 0, stream>>>(); - - // Kernel should fail with `cudaErrorAssert` - // This error invalidates the current device context, so we need to kill - // the current process. Running with EXPECT_DEATH spawns a new process for - // each attempted kernel launch - if (cudaErrorAssert == cudaDeviceSynchronize()) { std::abort(); } - - // If we reach this point, the cudf_assert didn't work so we exit normally, which will cause - // EXPECT_DEATH to fail. - }; - - EXPECT_DEATH(call_kernel(), "this kernel should die"); -} - -TEST(DebugAssert, cudf_assert_true) -{ - auto const stream = cudf::get_default_stream().value(); - assert_true_kernel<<<1, 1, 0, stream>>>(); - ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize()); -} - -#endif - -// These tests don't use CUDF_TEST_PROGRAM_MAIN because : -// 1.) They don't need the RMM Pool -// 2.) The RMM Pool interferes with the death test -int main(int argc, char** argv) -{ - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return 0; } - - ::testing::InitGoogleTest(&argc, argv); - auto const cmd_opts = parse_cudf_test_opts(argc, argv); - auto adaptor = make_stream_mode_adaptor(cmd_opts); - return RUN_ALL_TESTS(); -}