diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json
index 2a195c6c81d..a0e193ff0bf 100644
--- a/.devcontainer/cuda12.5-conda/devcontainer.json
+++ b/.devcontainer/cuda12.5-conda/devcontainer.json
@@ -15,9 +15,31 @@
   ],
   "hostRequirements": {"gpu": "optional"},
   "features": {
+    "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": {
+        "version": "12.5",
+        "installCompilers": false,
+        "installProfilers": true,
+        "installDevPackages": false,
+        "installcuDNN": false,
+        "installcuTensor": false,
+        "installNCCL": false,
+        "installCUDARuntime": false,
+        "installNVRTC": false,
+        "installOpenCL": false,
+        "installcuBLAS": false,
+        "installcuSPARSE": false,
+        "installcuFFT": false,
+        "installcuFile": false,
+        "installcuRAND": false,
+        "installcuSOLVER": false,
+        "installNPP": false,
+        "installnvJPEG": false,
+        "pruneStaticLibs": true
+      },
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {}
   },
   "overrideFeatureInstallOrder": [
+    "ghcr.io/rapidsai/devcontainers/features/cuda",
     "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils"
   ],
   "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"],
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index a22d3c5b9cc..1275aad757c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -47,11 +47,23 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
     with:
-      build_type: pull-request
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
       # Use the wheel container so we can skip conda solves and since our
       # primary static consumers (Spark) are not in conda anyway.
       container_image: "rapidsai/ci-wheel:latest"
       run_script: "ci/configure_cpp_static.sh"
+  clang-tidy:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
+    with:
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      run_script: "ci/clang_tidy.sh"
   conda-python-cudf-tests:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh
new file mode 100755
index 00000000000..4d5d3fc3136
--- /dev/null
+++ b/ci/clang_tidy.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+rapids-logger "Create clang-tidy conda environment"
+. /opt/conda/etc/profile.d/conda.sh
+
+ENV_YAML_DIR="$(mktemp -d)"
+
+rapids-dependency-file-generator \
+  --output conda \
+  --file-key clang_tidy \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"
+
+rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy
+
+# Temporarily allow unbound variables for conda activation.
+set +u
+conda activate clang_tidy
+set -u
+
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
+source rapids-configure-sccache
+
+# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled.
+cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja
+cmake --build cpp/build
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 870901d223b..95f36653c2c 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -93,6 +93,7 @@ sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md
 # .devcontainer files
 find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
     sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
+    sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
     sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}"
     sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}"
 done
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index 2d4f8c0d80e..12120a5c6d1 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -39,7 +39,7 @@ Checks:
        -clang-analyzer-optin.core.EnumCastOutOfRange,
        -clang-analyzer-optin.cplusplus.UninitializedObject'
 
-WarningsAsErrors: ''
+WarningsAsErrors: '*'
 HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
 ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
 FormatStyle:     none
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a0ea9579475..32a753c9f40 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -88,6 +88,7 @@ option(
   ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
 )
 mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
+option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF)
 
 message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
 message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
@@ -144,6 +145,58 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR)
   set(CUDF_GENERATED_INCLUDE_DIR ${CUDF_BINARY_DIR})
 endif()
 
+# ##################################################################################################
+# * clang-tidy configuration ----------------------------------------------------------------------
+if(CUDF_CLANG_TIDY)
+  find_program(
+    CLANG_TIDY_EXE
+    NAMES "clang-tidy"
+    DOC "Path to clang-tidy executable" REQUIRED
+  )
+
+  execute_process(
+    COMMAND ${CLANG_TIDY_EXE} --version
+    OUTPUT_VARIABLE CLANG_TIDY_OUTPUT
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  string(REGEX MATCH "LLVM version ([0-9]+\\.[0-9]+)\\.[0-9]+" LLVM_VERSION_MATCH
+               "${CLANG_TIDY_OUTPUT}"
+  )
+  # Discard the patch version and allow it to float. Empirically, results between patch versions are
+  # mostly stable, and looking at available packages on some package managers sometimes patch
+  # versions are skipped so we don't want to constrain to a patch version that the user can't
+  # install.
+  set(LLVM_VERSION "${CMAKE_MATCH_1}")
+  set(expected_clang_tidy_version 19.1)
+  if(NOT expected_clang_tidy_version VERSION_EQUAL LLVM_VERSION)
+    message(
+      FATAL_ERROR
+        "clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}"
+    )
+  endif()
+endif()
+
+# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES.
+function(enable_clang_tidy target)
+  set(_tidy_options)
+  set(_tidy_one_value)
+  set(_tidy_multi_value SKIPPED_FILES)
+  cmake_parse_arguments(
+    _TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN}
+  )
+
+  if(CUDF_CLANG_TIDY)
+    # clang will complain about unused link libraries on the compile line unless we specify
+    # -Qunused-arguments.
+    set_target_properties(
+      ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
+    )
+    foreach(file IN LISTS _TIDY_SKIPPED_FILES)
+      set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON)
+    endforeach()
+  endif()
+endfunction()
+
 # ##################################################################################################
 # * conda environment -----------------------------------------------------------------------------
 rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
@@ -714,6 +767,7 @@ target_compile_options(
   cudf PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
                "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
 )
+enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp)
 
 if(CUDF_BUILD_STACKTRACE_DEBUG)
   # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option
@@ -863,15 +917,7 @@ if(CUDF_BUILD_TESTUTIL)
 
   add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream)
 
-  add_library(
-    cudftestutil SHARED
-    tests/io/metadata_utilities.cpp
-    tests/utilities/column_utilities.cu
-    tests/utilities/debug_utilities.cu
-    tests/utilities/random_seed.cpp
-    tests/utilities/table_utilities.cu
-    tests/utilities/tdigest_utilities.cu
-  )
+  add_library(cudftestutil INTERFACE)
 
   set_target_properties(
     cudftestutil
@@ -880,32 +926,56 @@ if(CUDF_BUILD_TESTUTIL)
                # set target compile options
                CXX_STANDARD 17
                CXX_STANDARD_REQUIRED ON
-               CXX_VISIBILITY_PRESET hidden
                CUDA_STANDARD 17
                CUDA_STANDARD_REQUIRED ON
-               CUDA_VISIBILITY_PRESET hidden
-               POSITION_INDEPENDENT_CODE ON
-               INTERFACE_POSITION_INDEPENDENT_CODE ON
   )
 
   target_compile_options(
-    cudftestutil PUBLIC "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
-                        "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
+    cudftestutil INTERFACE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
+                           "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
   )
 
   target_link_libraries(
-    cudftestutil
-    PUBLIC Threads::Threads cudf cudftest_default_stream
-    PRIVATE GTest::gmock GTest::gtest $<TARGET_NAME_IF_EXISTS:conda_env>
+    cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream
+                           $<TARGET_NAME_IF_EXISTS:conda_env>
   )
 
   target_include_directories(
-    cudftestutil PUBLIC "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
-                        "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
+    cudftestutil INTERFACE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
+                           "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
   )
   rapids_cuda_set_runtime(cudftestutil USE_STATIC ${CUDA_STATIC_RUNTIME})
   add_library(cudf::cudftestutil ALIAS cudftestutil)
 
+  add_library(cudftestutil_impl INTERFACE)
+  add_library(cudf::cudftestutil_impl ALIAS cudftestutil_impl)
+  target_sources(
+    cudftestutil_impl
+    INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/io/metadata_utilities.cpp>
+              $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/utilities/column_utilities.cu>
+              $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/utilities/debug_utilities.cu>
+              $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/utilities/random_seed.cpp>
+              $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/utilities/table_utilities.cu>
+              $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/tests/utilities/tdigest_utilities.cu>
+              $<INSTALL_INTERFACE:src/cudftestutil/io/metadata_utilities.cpp>
+              $<INSTALL_INTERFACE:src/cudftestutil/utilities/column_utilities.cu>
+              $<INSTALL_INTERFACE:src/cudftestutil/utilities/debug_utilities.cu>
+              $<INSTALL_INTERFACE:src/cudftestutil/utilities/random_seed.cpp>
+              $<INSTALL_INTERFACE:src/cudftestutil/utilities/table_utilities.cu>
+              $<INSTALL_INTERFACE:src/cudftestutil/utilities/tdigest_utilities.cu>
+  )
+  target_link_libraries(cudftestutil_impl INTERFACE cudf::cudftestutil)
+
+  install(FILES tests/io/metadata_utilities.cpp DESTINATION src/cudftestutil/io)
+  install(
+    FILES tests/utilities/column_utilities.cu
+          tests/utilities/debug_utilities.cu
+          tests/utilities/random_seed.cpp
+          tests/utilities/table_utilities.cu
+          tests/utilities/tdigest_utilities.cu
+    DESTINATION src/cudftestutil/utilities
+  )
+
 endif()
 
 # * build cudf_identify_stream_usage --------------------------------------------------------------
@@ -1006,7 +1076,7 @@ install(
 set(_components_export_string)
 if(TARGET cudftestutil)
   install(
-    TARGETS cudftest_default_stream cudftestutil
+    TARGETS cudftest_default_stream cudftestutil cudftestutil_impl
     DESTINATION ${lib_dir}
     EXPORT cudf-testing-exports
   )
@@ -1046,14 +1116,15 @@ targets:
 This module offers an optional testing component which defines the
 following IMPORTED GLOBAL  targets:
 
- cudf::cudftestutil     - The main cudf testing library
+ cudf::cudftestutil          - The main cudf testing library
+ cudf::cudftestutil_impl     - C++ and CUDA sources to compile for definitions in cudf::cudftestutil
     ]=]
 )
 
 rapids_export(
   INSTALL cudf
   EXPORT_SET cudf-exports ${_components_export_string}
-  GLOBAL_TARGETS cudf cudftestutil
+  GLOBAL_TARGETS cudf cudftestutil cudftestutil_impl
   NAMESPACE cudf::
   DOCUMENTATION doc_string
 )
@@ -1074,7 +1145,7 @@ endif()
 rapids_export(
   BUILD cudf
   EXPORT_SET cudf-exports ${_components_export_string}
-  GLOBAL_TARGETS cudf cudftestutil
+  GLOBAL_TARGETS cudf cudftestutil cudftestutil_impl
   NAMESPACE cudf::
   DOCUMENTATION doc_string
   FINAL_CODE_BLOCK build_code_string
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index b0f75b25975..d6fc5dc6039 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -25,7 +25,7 @@ target_compile_options(
 target_link_libraries(
   cudf_datagen
   PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf
-         cudftestutil nvtx3::nvtx3-cpp
+         cudf::cudftestutil nvtx3::nvtx3-cpp
   PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
 )
 
@@ -49,7 +49,7 @@ target_compile_options(
 
 target_link_libraries(
   ndsh_data_generator
-  PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp
+  PUBLIC cudf GTest::gmock GTest::gtest cudf::cudftestutil nvtx3::nvtx3-cpp
   PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
 )
 
@@ -65,14 +65,14 @@ target_include_directories(
 # Use an OBJECT library so we only compile these helper source files only once
 add_library(
   cudf_benchmark_common OBJECT
-  "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp"
-  synchronization/synchronization.cpp
-  io/cuio_common.cpp
-  common/table_utilities.cpp
-  common/benchmark_utilities.cpp
-  common/nvbench_utilities.cpp
+  synchronization/synchronization.cpp io/cuio_common.cpp common/table_utilities.cpp
+  common/benchmark_utilities.cpp common/nvbench_utilities.cpp
 )
-target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env>)
+target_link_libraries(
+  cudf_benchmark_common PRIVATE cudf_datagen $<TARGET_NAME_IF_EXISTS:conda_env> GTest::gmock
+                                GTest::gtest
+)
+
 add_custom_command(
   OUTPUT CUDF_BENCHMARKS
   COMMAND echo Running benchmarks
@@ -99,7 +99,7 @@ function(ConfigureBench CMAKE_BENCH_NAME)
   )
   target_link_libraries(
     ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
-                                $<TARGET_NAME_IF_EXISTS:conda_env>
+                                cudf::cudftestutil_impl $<TARGET_NAME_IF_EXISTS:conda_env>
   )
   add_custom_command(
     OUTPUT CUDF_BENCHMARKS
@@ -127,8 +127,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
                INSTALL_RPATH "\$ORIGIN/../../../lib"
   )
   target_link_libraries(
-    ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen
-                                nvbench::nvbench $<TARGET_NAME_IF_EXISTS:conda_env>
+    ${CMAKE_BENCH_NAME}
+    PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen nvbench::nvbench
+            $<TARGET_NAME_IF_EXISTS:conda_env> cudf::cudftestutil_impl
   )
   install(
     TARGETS ${CMAKE_BENCH_NAME}
diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp
index 272c91133f8..2bd08f410e0 100644
--- a/cpp/include/cudf_test/testing_main.hpp
+++ b/cpp/include/cudf_test/testing_main.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/cxxopts.hpp>
 #include <cudf_test/stream_checking_resource_adaptor.hpp>
 
@@ -36,6 +37,12 @@
 namespace CUDF_EXPORT cudf {
 namespace test {
 
+struct config {
+  std::string rmm_mode;
+  std::string stream_mode;
+  std::string stream_error_mode;
+};
+
 /// MR factory functions
 inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
@@ -157,10 +164,9 @@ inline auto parse_cudf_test_opts(int argc, char** argv)
  * @param cmd_opts Command line options returned by parse_cudf_test_opts
  * @return Memory resource adaptor
  */
-inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts)
+inline auto make_memory_resource_adaptor(cudf::test::config const& config)
 {
-  auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();
-  auto resource       = cudf::test::create_memory_resource(rmm_mode);
+  auto resource = cudf::test::create_memory_resource(config.rmm_mode);
   cudf::set_current_device_resource(resource.get());
   return resource;
 }
@@ -176,37 +182,54 @@ inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts)
  * @param cmd_opts Command line options returned by parse_cudf_test_opts
  * @return Memory resource adaptor
  */
-inline auto make_stream_mode_adaptor(cxxopts::ParseResult const& cmd_opts)
+inline auto make_stream_mode_adaptor(cudf::test::config const& config)
 {
   auto resource                      = cudf::get_current_device_resource_ref();
-  auto const stream_mode             = cmd_opts["stream_mode"].as<std::string>();
-  auto const stream_error_mode       = cmd_opts["stream_error_mode"].as<std::string>();
-  auto const error_on_invalid_stream = (stream_error_mode == "error");
-  auto const check_default_stream    = (stream_mode == "new_cudf_default");
+  auto const error_on_invalid_stream = (config.stream_error_mode == "error");
+  auto const check_default_stream    = (config.stream_mode == "new_cudf_default");
   auto adaptor                       = cudf::test::stream_checking_resource_adaptor(
     resource, error_on_invalid_stream, check_default_stream);
-  if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) {
+  if ((config.stream_mode == "new_cudf_default") || (config.stream_mode == "new_testing_default")) {
     cudf::set_current_device_resource(&adaptor);
   }
   return adaptor;
 }
 
+/**
+ * @brief Should be called in every test program that uses rmm allocators since it maintains the
+ * lifespan of the rmm default memory resource. this function parses the command line to customize
+ * test behavior, like the allocation mode used for creating the default memory resource.
+ *
+ */
+inline void init_cudf_test(int argc, char** argv, cudf::test::config const& config_override = {})
+{
+  // static lifetime to keep rmm resource alive till tests end
+  auto const cmd_opts       = parse_cudf_test_opts(argc, argv);
+  cudf::test::config config = config_override;
+  if (config.rmm_mode.empty()) { config.rmm_mode = cmd_opts["rmm_mode"].as<std::string>(); }
+
+  if (config.stream_mode.empty()) {
+    config.stream_mode = cmd_opts["stream_mode"].as<std::string>();
+  }
+
+  if (config.stream_error_mode.empty()) {
+    config.stream_error_mode = cmd_opts["stream_error_mode"].as<std::string>();
+  }
+
+  [[maybe_unused]] static auto mr      = make_memory_resource_adaptor(config);
+  [[maybe_unused]] static auto adaptor = make_stream_mode_adaptor(config);
+}
+
 /**
  * @brief Macro that defines main function for gtest programs that use rmm
  *
- * Should be included in every test program that uses rmm allocators since
- * it maintains the lifespan of the rmm default memory resource.
  * This `main` function is a wrapper around the google test generated `main`,
- * maintaining the original functionality. In addition, this custom `main`
- * function parses the command line to customize test behavior, like the
- * allocation mode used for creating the default memory resource.
+ * maintaining the original functionality.
  */
-#define CUDF_TEST_PROGRAM_MAIN()                                            \
-  int main(int argc, char** argv)                                           \
-  {                                                                         \
-    ::testing::InitGoogleTest(&argc, argv);                                 \
-    auto const cmd_opts           = parse_cudf_test_opts(argc, argv);       \
-    [[maybe_unused]] auto mr      = make_memory_resource_adaptor(cmd_opts); \
-    [[maybe_unused]] auto adaptor = make_stream_mode_adaptor(cmd_opts);     \
-    return RUN_ALL_TESTS();                                                 \
+#define CUDF_TEST_PROGRAM_MAIN()            \
+  int main(int argc, char** argv)           \
+  {                                         \
+    ::testing::InitGoogleTest(&argc, argv); \
+    init_cudf_test(argc, argv);             \
+    return RUN_ALL_TESTS();                 \
   }
diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py
deleted file mode 100644
index e5e57dbf562..00000000000
--- a/cpp/scripts/run-clang-tidy.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import re
-import os
-import subprocess
-import argparse
-import json
-import multiprocessing as mp
-import shutil
-
-
-EXPECTED_VERSION = "16.0.6"
-VERSION_REGEX = re.compile(r"  LLVM version ([0-9.]+)")
-GPU_ARCH_REGEX = re.compile(r"sm_(\d+)")
-SPACES = re.compile(r"\s+")
-SEPARATOR = "-" * 16
-
-
-def parse_args():
-    argparser = argparse.ArgumentParser("Runs clang-tidy on a project")
-    argparser.add_argument("-cdb", type=str,
-                           # TODO This is a hack, needs to be fixed
-                           default="cpp/build/cuda-11.5.0/clang-tidy/release/compile_commands.clangd.json",
-                           help="Path to cmake-generated compilation database"
-                           " file. It is always found inside the root of the "
-                           "cmake build folder. So make sure that `cmake` has "
-                           "been run once before running this script!")
-    argparser.add_argument("-exe", type=str, default="clang-tidy",
-                           help="Path to clang-tidy exe")
-    argparser.add_argument("-ignore", type=str, default="[.]cu$|examples/kmeans/",
-                           help="Regex used to ignore files from checking")
-    argparser.add_argument("-select", type=str, default=None,
-                           help="Regex used to select files for checking")
-    argparser.add_argument("-j", type=int, default=-1,
-                           help="Number of parallel jobs to launch.")
-    args = argparser.parse_args()
-    if args.j <= 0:
-        args.j = mp.cpu_count()
-    args.ignore_compiled = re.compile(args.ignore) if args.ignore else None
-    args.select_compiled = re.compile(args.select) if args.select else None
-    ret = subprocess.check_output("%s --version" % args.exe, shell=True)
-    ret = ret.decode("utf-8")
-    version = VERSION_REGEX.search(ret)
-    if version is None:
-        raise Exception("Failed to figure out clang-tidy version!")
-    version = version.group(1)
-    if version != EXPECTED_VERSION:
-        raise Exception("clang-tidy exe must be v%s found '%s'" % \
-                        (EXPECTED_VERSION, version))
-    if not os.path.exists(args.cdb):
-        raise Exception("Compilation database '%s' missing" % args.cdb)
-    return args
-
-
-def get_all_commands(cdb):
-    with open(cdb) as fp:
-        return json.load(fp)
-
-
-def get_gpu_archs(command):
-    archs = []
-    for loc in range(len(command)):
-        if command[loc] != "-gencode":
-            continue
-        arch_flag = command[loc + 1]
-        match = GPU_ARCH_REGEX.search(arch_flag)
-        if match is not None:
-            archs.append("--cuda-gpu-arch=sm_%s" % match.group(1))
-    return archs
-
-
-def get_index(arr, item):
-    try:
-        return arr.index(item)
-    except:
-        return -1
-
-
-def remove_item(arr, item):
-    loc = get_index(arr, item)
-    if loc >= 0:
-        del arr[loc]
-    return loc
-
-
-def remove_item_plus_one(arr, item):
-    loc = get_index(arr, item)
-    if loc >= 0:
-        del arr[loc + 1]
-        del arr[loc]
-    return loc
-
-
-def get_clang_includes(exe):
-    dir = os.getenv("CONDA_PREFIX")
-    if dir is None:
-        ret = subprocess.check_output("which %s 2>&1" % exe, shell=True)
-        ret = ret.decode("utf-8")
-        dir = os.path.dirname(os.path.dirname(ret))
-    header = os.path.join(dir, "include", "ClangHeaders")
-    return ["-I", header]
-
-
-def get_tidy_args(cmd, exe):
-    command, file = cmd["command"], cmd["file"]
-    is_cuda = file.endswith(".cu")
-    command = re.split(SPACES, command)
-    # compiler is always clang++!
-    command[0] = "clang++"
-    # remove compilation and output targets from the original command
-    remove_item_plus_one(command, "-c")
-    remove_item_plus_one(command, "-o")
-    if is_cuda:
-        # replace nvcc's "-gencode ..." with clang's "--cuda-gpu-arch ..."
-        archs = get_gpu_archs(command)
-        command.extend(archs)
-        while True:
-            loc = remove_item_plus_one(command, "-gencode")
-            if loc < 0:
-                break
-        # "-x cuda" is the right usage in clang
-        loc = get_index(command, "-x")
-        if loc >= 0:
-            command[loc + 1] = "cuda"
-        remove_item_plus_one(command, "-ccbin")
-        remove_item(command, "--expt-extended-lambda")
-        remove_item(command, "--diag_suppress=unrecognized_gcc_pragma")
-    command.extend(get_clang_includes(exe))
-    return command, is_cuda
-
-
-def run_clang_tidy_command(tidy_cmd):
-    cmd = " ".join(tidy_cmd)
-    result = subprocess.run(cmd, check=False, shell=True,
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-    status = result.returncode == 0
-    if status:
-        out = ""
-    else:
-        out = "CMD: " + cmd
-    out += result.stdout.decode("utf-8").rstrip()
-    return status, out
-
-
-def run_clang_tidy(cmd, args):
-    command, is_cuda = get_tidy_args(cmd, args.exe)
-    tidy_cmd = [args.exe,
-                "-header-filter='.*cudf/cpp/(src|include|bench|comms).*'",
-                cmd["file"], "--", ]
-    tidy_cmd.extend(command)
-    status = True
-    out = ""
-    if is_cuda:
-        tidy_cmd.append("--cuda-device-only")
-        tidy_cmd.append(cmd["file"])
-        ret, out1 = run_clang_tidy_command(tidy_cmd)
-        out += out1
-        out += "%s" % SEPARATOR
-        if not ret:
-            status = ret
-        tidy_cmd[-2] = "--cuda-host-only"
-        ret, out1 = run_clang_tidy_command(tidy_cmd)
-        if not ret:
-            status = ret
-        out += out1
-    else:
-        tidy_cmd.append(cmd["file"])
-        ret, out1 = run_clang_tidy_command(tidy_cmd)
-        if not ret:
-            status = ret
-        out += out1
-    return status, out, cmd["file"]
-
-
-# yikes! global var :(
-results = []
-def collect_result(result):
-    global results
-    results.append(result)
-
-
-def print_result(passed, stdout, file):
-    status_str = "PASSED" if passed else "FAILED"
-    print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}")
-    if stdout:
-        print(stdout)
-        print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}")
-
-
-def print_results():
-    global results
-    status = True
-    for passed, stdout, file in results:
-        print_result(passed, stdout, file)
-        if not passed:
-            status = False
-    return status
-
-
-def run_tidy_for_all_files(args, all_files):
-    pool = None if args.j == 1 else mp.Pool(args.j)
-    # actual tidy checker
-    for cmd in all_files:
-        # skip files that we don't want to look at
-        if args.ignore_compiled is not None and \
-           re.search(args.ignore_compiled, cmd["file"]) is not None:
-            continue
-        if args.select_compiled is not None and \
-           re.search(args.select_compiled, cmd["file"]) is None:
-            continue
-        if pool is not None:
-            pool.apply_async(run_clang_tidy, args=(cmd, args),
-                             callback=collect_result)
-        else:
-            passed, stdout, file = run_clang_tidy(cmd, args)
-            collect_result((passed, stdout, file))
-    if pool is not None:
-        pool.close()
-        pool.join()
-    return print_results()
-
-
-def main():
-    args = parse_args()
-    # Attempt to making sure that we run this script from root of repo always
-    if not os.path.exists(".git"):
-        raise Exception("This needs to always be run from the root of repo")
-    # Check whether clang-tidy exists
-    # print(args)
-    if "exe" not in args and shutil.which("clang-tidy") is not None:
-        print("clang-tidy not found. Exiting...")
-        return
-    all_files = get_all_commands(args.cdb)
-    status = run_tidy_for_all_files(args, all_files)
-    if not status:
-        raise Exception("clang-tidy failed! Refer to the errors above.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 75767786272..0432b9d120a 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -16,7 +16,8 @@
 
 #include "flatten_single_pass_aggs.hpp"
 #include "groupby/common/utils.hpp"
-#include "groupby/hash/groupby_kernels.cuh"
+#include "groupby_kernels.cuh"
+#include "var_hash_functor.cuh"
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column.hpp>
@@ -261,7 +262,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final
       rmm::exec_policy(stream),
       thrust::make_counting_iterator(0),
       col.size(),
-      ::cudf::detail::var_hash_functor{
+      var_hash_functor{
         set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
     sparse_results->add_result(col, agg, std::move(var_result));
     dense_results->add_result(col, agg, to_dense_agg_result(agg));
diff --git a/cpp/src/groupby/hash/groupby_kernels.cuh b/cpp/src/groupby/hash/groupby_kernels.cuh
index 188d0cff3f1..86f4d76487f 100644
--- a/cpp/src/groupby/hash/groupby_kernels.cuh
+++ b/cpp/src/groupby/hash/groupby_kernels.cuh
@@ -16,8 +16,6 @@
 
 #pragma once
 
-#include "multi_pass_kernels.cuh"
-
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/aggregation/device_aggregators.cuh>
 #include <cudf/groupby.hpp>
diff --git a/cpp/src/groupby/hash/multi_pass_kernels.cuh b/cpp/src/groupby/hash/var_hash_functor.cuh
similarity index 69%
rename from cpp/src/groupby/hash/multi_pass_kernels.cuh
rename to cpp/src/groupby/hash/var_hash_functor.cuh
index 7043eafdc10..bb55cc9188c 100644
--- a/cpp/src/groupby/hash/multi_pass_kernels.cuh
+++ b/cpp/src/groupby/hash/var_hash_functor.cuh
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #pragma once
 
 #include <cudf/aggregation.hpp>
@@ -21,17 +20,14 @@
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/dictionary/dictionary_column_view.hpp>
-#include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/type_dispatcher.hpp>
 
+#include <cuco/static_set_ref.cuh>
 #include <cuda/atomic>
+#include <cuda/std/type_traits>
 
-#include <cmath>
-
-namespace cudf {
-namespace detail {
-
-template <typename SetType, bool target_has_nulls = true, bool source_has_nulls = true>
+namespace cudf::groupby::detail::hash {
+template <typename SetType>
 struct var_hash_functor {
   SetType set;
   bitmask_type const* __restrict__ row_bitmask;
@@ -47,13 +43,13 @@ struct var_hash_functor {
                    column_device_view sum,
                    column_device_view count,
                    size_type ddof)
-    : set(set),
-      row_bitmask(row_bitmask),
-      target(target),
-      source(source),
-      sum(sum),
-      count(count),
-      ddof(ddof)
+    : set{set},
+      row_bitmask{row_bitmask},
+      target{target},
+      source{source},
+      sum{sum},
+      count{count},
+      ddof{ddof}
   {
   }
 
@@ -64,23 +60,21 @@ struct var_hash_functor {
   }
 
   template <typename Source>
-  __device__ std::enable_if_t<!is_supported<Source>()> operator()(column_device_view const& source,
-                                                                  size_type source_index,
-                                                                  size_type target_index) noexcept
+  __device__ cuda::std::enable_if_t<!is_supported<Source>()> operator()(
+    column_device_view const& source, size_type source_index, size_type target_index) noexcept
   {
     CUDF_UNREACHABLE("Invalid source type for std, var aggregation combination.");
   }
 
   template <typename Source>
-  __device__ std::enable_if_t<is_supported<Source>()> operator()(column_device_view const& source,
-                                                                 size_type source_index,
-                                                                 size_type target_index) noexcept
+  __device__ cuda::std::enable_if_t<is_supported<Source>()> operator()(
+    column_device_view const& source, size_type source_index, size_type target_index) noexcept
   {
-    using Target    = target_type_t<Source, aggregation::VARIANCE>;
-    using SumType   = target_type_t<Source, aggregation::SUM>;
-    using CountType = target_type_t<Source, aggregation::COUNT_VALID>;
+    using Target    = cudf::detail::target_type_t<Source, aggregation::VARIANCE>;
+    using SumType   = cudf::detail::target_type_t<Source, aggregation::SUM>;
+    using CountType = cudf::detail::target_type_t<Source, aggregation::COUNT_VALID>;
 
-    if (source_has_nulls and source.is_null(source_index)) return;
+    if (source.is_null(source_index)) return;
     CountType group_size = count.element<CountType>(target_index);
     if (group_size == 0 or group_size - ddof <= 0) return;
 
@@ -91,8 +85,9 @@ struct var_hash_functor {
     ref.fetch_add(result, cuda::std::memory_order_relaxed);
     // STD sqrt is applied in finalize()
 
-    if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
+    if (target.is_null(target_index)) { target.set_valid(target_index); }
   }
+
   __device__ inline void operator()(size_type source_index)
   {
     if (row_bitmask == nullptr or cudf::bit_is_set(row_bitmask, source_index)) {
@@ -110,6 +105,4 @@ struct var_hash_functor {
     }
   }
 };
-
-}  // namespace detail
-}  // namespace cudf
+}  // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 76816071d8c..69a51fab5dc 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -83,8 +83,7 @@ struct tree_node {
 void check_input_size(std::size_t input_size)
 {
   // Transduce() writes symbol offsets that may be as large input_size-1
-  CUDF_EXPECTS(input_size == 0 ||
-                 (input_size - 1) <= std::numeric_limits<cudf::io::json::SymbolOffsetT>::max(),
+  CUDF_EXPECTS(input_size == 0 || (input_size - 1) <= std::numeric_limits<int32_t>::max(),
                "Given JSON input is too large");
 }
 }  // namespace
diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu
index 99a5b17bce8..c424d2b3b62 100644
--- a/cpp/src/io/json/read_json.cu
+++ b/cpp/src/io/json/read_json.cu
@@ -351,10 +351,16 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
    * JSON inputs.
    */
   std::size_t const total_source_size = sources_size(sources, 0, 0);
-  std::size_t chunk_offset            = reader_opts.get_byte_range_offset();
-  std::size_t chunk_size              = reader_opts.get_byte_range_size();
-  chunk_size                          = !chunk_size ? total_source_size - chunk_offset
-                                                    : std::min(chunk_size, total_source_size - chunk_offset);
+
+  // Batching is enabled only for JSONL inputs, not regular JSON files
+  CUDF_EXPECTS(
+    reader_opts.is_enabled_lines() || total_source_size < std::numeric_limits<int32_t>::max(),
+    "Parsing Regular JSON inputs of size greater than INT_MAX bytes is not supported");
+
+  std::size_t chunk_offset = reader_opts.get_byte_range_offset();
+  std::size_t chunk_size   = reader_opts.get_byte_range_size();
+  chunk_size               = !chunk_size ? total_source_size - chunk_offset
+                                         : std::min(chunk_size, total_source_size - chunk_offset);
 
   std::size_t const size_per_subchunk      = estimate_size_per_subchunk(chunk_size);
   std::size_t const batch_size_upper_bound = get_batch_size_upper_bound();
diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu
index 01ee5ad177d..ecf319e75ab 100644
--- a/cpp/src/io/orc/reader_impl_chunking.cu
+++ b/cpp/src/io/orc/reader_impl_chunking.cu
@@ -500,6 +500,8 @@ void reader_impl::load_next_stripe_data(read_mode mode)
   auto const [read_begin, read_end] =
     merge_selected_ranges(_file_itm_data.stripe_data_read_ranges, load_stripe_range);
 
+  bool stream_synchronized{false};
+
   for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) {
     auto const& read_info = _file_itm_data.data_read_info[read_idx];
     auto const source_ptr = _metadata.per_file_metadata[read_info.source_idx].source;
@@ -507,6 +509,13 @@ void reader_impl::load_next_stripe_data(read_mode mode)
       lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data());
 
     if (source_ptr->is_device_read_preferred(read_info.length)) {
+      // `device_read_async` may not use _stream at all.
+      // Instead, it may use some other stream(s) to sync the H->D memcpy.
+      // As such, we need to make sure the device buffers in `lvl_stripe_data` are ready first.
+      if (!stream_synchronized) {
+        _stream.synchronize();
+        stream_synchronized = true;
+      }
       device_read_tasks.push_back(
         std::pair(source_ptr->device_read_async(
                     read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream),
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 4596ec65ce7..799a84cbc37 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -56,8 +56,15 @@ function(ConfigureTest CMAKE_TEST_NAME)
 
   target_link_libraries(
     ${CMAKE_TEST_NAME}
-    PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main
-            nvtx3::nvtx3-cpp $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIBS}"
+    PRIVATE cudf::cudftestutil
+            cudf::cudftestutil_impl
+            GTest::gmock
+            GTest::gmock_main
+            GTest::gtest
+            GTest::gtest_main
+            nvtx3::nvtx3-cpp
+            $<TARGET_NAME_IF_EXISTS:conda_env>
+            "${_CUDF_TEST_EXTRA_LIBS}"
   )
   rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
   rapids_test_add(
@@ -76,6 +83,7 @@ function(ConfigureTest CMAKE_TEST_NAME)
         "GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
     )
   endif()
+  enable_clang_tidy(${CMAKE_TEST_NAME})
 endfunction()
 
 # ##################################################################################################
diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp
index a961f73d955..8be7e087b6d 100644
--- a/cpp/tests/interop/nanoarrow_utils.hpp
+++ b/cpp/tests/interop/nanoarrow_utils.hpp
@@ -256,7 +256,8 @@ std::enable_if_t<std::is_same_v<T, bool>, nanoarrow::UniqueArray> get_nanoarrow_
     ArrowBitmap out;
     ArrowBitmapInit(&out);
     NANOARROW_THROW_NOT_OK(ArrowBitmapResize(&out, b.size(), 1));
-    std::memset(out.buffer.data, 0, out.buffer.size_bytes);
+    // TODO: Investigate clang-tidy issue further after nanoarrow is made compliant
+    std::memset(out.buffer.data, 0, out.buffer.size_bytes);  // NOLINT
 
     for (size_t i = 0; i < b.size(); ++i) {
       ArrowBitSetTo(out.buffer.data, i, static_cast<uint8_t>(b[i]));
diff --git a/cpp/tests/io/metadata_utilities.cpp b/cpp/tests/io/metadata_utilities.cpp
index 84f04f67038..380d66c53f9 100644
--- a/cpp/tests/io/metadata_utilities.cpp
+++ b/cpp/tests/io/metadata_utilities.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,9 @@
  * limitations under the License.
  */
 
+#include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/io_metadata_utilities.hpp>
 
-#include <gmock/gmock.h>
-
 namespace cudf::test {
 
 void expect_metadata_equal(cudf::io::table_input_metadata in_meta,
diff --git a/cpp/tests/large_strings/large_strings_fixture.cpp b/cpp/tests/large_strings/large_strings_fixture.cpp
index 249319da7f7..7b61be113f9 100644
--- a/cpp/tests/large_strings/large_strings_fixture.cpp
+++ b/cpp/tests/large_strings/large_strings_fixture.cpp
@@ -123,12 +123,9 @@ LargeStringsData* StringsLargeTest::g_ls_data = nullptr;
 int main(int argc, char** argv)
 {
   ::testing::InitGoogleTest(&argc, argv);
-  auto const cmd_opts = parse_cudf_test_opts(argc, argv);
-  // hardcoding the CUDA memory resource to keep from exceeding the pool
-  auto mr = cudf::test::make_cuda();
-  cudf::set_current_device_resource(mr.get());
-  auto adaptor = make_stream_mode_adaptor(cmd_opts);
-
+  cudf::test::config config;
+  config.rmm_mode = "cuda";
+  init_cudf_test(argc, argv, config);
   // create object to automatically be destroyed at the end of main()
   auto lsd = cudf::test::StringsLargeTest::get_ls_data();
 
diff --git a/cpp/tests/utilities/table_utilities.cu b/cpp/tests/utilities/table_utilities.cu
index 354c0b1b57e..8e4906408de 100644
--- a/cpp/tests/utilities/table_utilities.cu
+++ b/cpp/tests/utilities/table_utilities.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,10 +15,9 @@
  */
 
 #include <cudf_test/column_utilities.hpp>
+#include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/table_utilities.hpp>
 
-#include <gmock/gmock.h>
-
 namespace cudf::test::detail {
 void expect_table_properties_equal(cudf::table_view lhs, cudf::table_view rhs)
 {
diff --git a/dependencies.yaml b/dependencies.yaml
index ca17917c905..ff97b67f0ce 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -6,10 +6,18 @@ files:
       cuda: ["11.8", "12.5"]
       arch: [x86_64]
     includes:
+      # Note that clang-tidy is not included here because cudf's preferred
+      # version conflicts with the rest of RAPIDS as well as its own
+      # clang-format version. Until we update our clang-format version we will
+      # not be able to install both into the same environment. Moreover, using
+      # this version will break compatibility with other RAPIDS libraries that
+      # are still using 16.0.6, and as such will and that would break any
+      # unified environment like that used in unified devcontainers.
       - build_base
       - build_all
       - build_cpp
       - build_python_common
+      - clang_format
       - cuda
       - cuda_version
       - depends_on_cupy
@@ -86,6 +94,16 @@ files:
     includes:
       - develop
       - py_version
+  clang_tidy:
+    output: none
+    includes:
+      - build_all
+      - build_base
+      - clang_tidy
+      - cuda
+      - cuda_version
+      - develop
+      - py_version
   docs:
     output: none
     includes:
@@ -553,11 +571,21 @@ dependencies:
           # pre-commit requires identify minimum version 1.0, but clang-format requires textproto support and that was
           # added in 2.5.20, so we need to call out the minimum version needed for our plugins
           - identify>=2.5.20
+      - output_types: conda
+        packages:
+          - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version.
+  clang_format:
+    common:
       - output_types: conda
         packages:
           - clang==16.0.6
           - clang-tools=16.0.6
-          - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version.
+  clang_tidy:
+    common:
+      - output_types: conda
+        packages:
+          - clang==19.1.0
+          - clang-tools==19.1.0
   docs:
     common:
       - output_types: [conda]