diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index 2a195c6c81d..a0e193ff0bf 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -15,9 +15,31 @@ ], "hostRequirements": {"gpu": "optional"}, "features": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.12": { + "version": "12.5", + "installCompilers": false, + "installProfilers": true, + "installDevPackages": false, + "installcuDNN": false, + "installcuTensor": false, + "installNCCL": false, + "installCUDARuntime": false, + "installNVRTC": false, + "installOpenCL": false, + "installcuBLAS": false, + "installcuSPARSE": false, + "installcuFFT": false, + "installcuFile": false, + "installcuRAND": false, + "installcuSOLVER": false, + "installNPP": false, + "installnvJPEG": false, + "pruneStaticLibs": true + }, "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.12": {} }, "overrideFeatureInstallOrder": [ + "ghcr.io/rapidsai/devcontainers/features/cuda", "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" ], "initializeCommand": ["/bin/bash", "-c", "mkdir -m 0755 -p ${localWorkspaceFolder}/../.{aws,cache,config,conda/pkgs,conda/${localWorkspaceFolderBasename}-cuda12.5-envs}"], diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a22d3c5b9cc..1275aad757c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -47,11 +47,23 @@ jobs: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: - build_type: pull-request + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} # Use the wheel container so we can skip conda solves and since our # primary static consumers (Spark) are not in conda anyway. container_image: "rapidsai/ci-wheel:latest" run_script: "ci/configure_cpp_static.sh" + clang-tidy: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + run_script: "ci/clang_tidy.sh" conda-python-cudf-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 diff --git a/ci/clang_tidy.sh b/ci/clang_tidy.sh new file mode 100755 index 00000000000..4d5d3fc3136 --- /dev/null +++ b/ci/clang_tidy.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +rapids-logger "Create clang-tidy conda environment" +. /opt/conda/etc/profile.d/conda.sh + +ENV_YAML_DIR="$(mktemp -d)" + +rapids-dependency-file-generator \ + --output conda \ + --file-key clang_tidy \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" + +rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy + +# Temporarily allow unbound variables for conda activation. +set +u +conda activate clang_tidy +set -u + +RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)" + +source rapids-configure-sccache + +# Run the build via CMake, which will run clang-tidy when CUDF_CLANG_TIDY is enabled. +cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_CLANG_TIDY=ON -GNinja +cmake --build cpp/build diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 870901d223b..95f36653c2c 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -93,6 +93,7 @@ sed_runner "s/cudf-.*-SNAPSHOT/cudf-${NEXT_FULL_JAVA_TAG}/g" java/ci/README.md # .devcontainer files find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}" + sed_runner "s@rapidsai/devcontainers/features/cuda:[0-9.]*@rapidsai/devcontainers/features/cuda:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapidsai/devcontainers/features/rapids-build-utils:[0-9.]*@rapidsai/devcontainers/features/rapids-build-utils:${NEXT_SHORT_TAG_PEP440}@" "${filename}" sed_runner "s@rapids-\${localWorkspaceFolderBasename}-[0-9.]*@rapids-\${localWorkspaceFolderBasename}-${NEXT_SHORT_TAG}@g" "${filename}" done diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index 2d4f8c0d80e..12120a5c6d1 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -39,7 +39,7 @@ Checks: -clang-analyzer-optin.core.EnumCastOutOfRange, -clang-analyzer-optin.cplusplus.UninitializedObject' -WarningsAsErrors: '' +WarningsAsErrors: '*' HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*' ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*' FormatStyle: none diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a0ea9579475..32a753c9f40 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -88,6 +88,7 @@ option( ${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL} ) mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL) +option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF) message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}") message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") @@ -144,6 +145,58 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR) set(CUDF_GENERATED_INCLUDE_DIR ${CUDF_BINARY_DIR}) endif() +# ################################################################################################## +# * clang-tidy configuration ---------------------------------------------------------------------- +if(CUDF_CLANG_TIDY) + find_program( + CLANG_TIDY_EXE + NAMES "clang-tidy" + DOC "Path to clang-tidy executable" REQUIRED + ) + + execute_process( + COMMAND ${CLANG_TIDY_EXE} --version + OUTPUT_VARIABLE CLANG_TIDY_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REGEX MATCH "LLVM version ([0-9]+\\.[0-9]+)\\.[0-9]+" LLVM_VERSION_MATCH + "${CLANG_TIDY_OUTPUT}" + ) + # Discard the patch version and allow it to float. Empirically, results between patch versions are + # mostly stable, and looking at available packages on some package managers sometimes patch + # versions are skipped so we don't want to constrain to a patch version that the user can't + # install. + set(LLVM_VERSION "${CMAKE_MATCH_1}") + set(expected_clang_tidy_version 19.1) + if(NOT expected_clang_tidy_version VERSION_EQUAL LLVM_VERSION) + message( + FATAL_ERROR + "clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}" + ) + endif() +endif() + +# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES. +function(enable_clang_tidy target) + set(_tidy_options) + set(_tidy_one_value) + set(_tidy_multi_value SKIPPED_FILES) + cmake_parse_arguments( + _TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN} + ) + + if(CUDF_CLANG_TIDY) + # clang will complain about unused link libraries on the compile line unless we specify + # -Qunused-arguments. + set_target_properties( + ${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments" + ) + foreach(file IN LISTS _TIDY_SKIPPED_FILES) + set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON) + endforeach() + endif() +endfunction() + # ################################################################################################## # * conda environment ----------------------------------------------------------------------------- rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) @@ -714,6 +767,7 @@ target_compile_options( cudf PRIVATE "$<$:${CUDF_CXX_FLAGS}>" "$<$:${CUDF_CUDA_FLAGS}>" ) +enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp) if(CUDF_BUILD_STACKTRACE_DEBUG) # Remove any optimization level to avoid nvcc warning "incompatible redefinition for option @@ -863,15 +917,7 @@ if(CUDF_BUILD_TESTUTIL) add_library(cudf::cudftest_default_stream ALIAS cudftest_default_stream) - add_library( - cudftestutil SHARED - tests/io/metadata_utilities.cpp - tests/utilities/column_utilities.cu - tests/utilities/debug_utilities.cu - tests/utilities/random_seed.cpp - tests/utilities/table_utilities.cu - tests/utilities/tdigest_utilities.cu - ) + add_library(cudftestutil INTERFACE) set_target_properties( cudftestutil @@ -880,32 +926,56 @@ if(CUDF_BUILD_TESTUTIL) # set target compile options CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON - CXX_VISIBILITY_PRESET hidden CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON - CUDA_VISIBILITY_PRESET hidden - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON ) target_compile_options( - cudftestutil PUBLIC "$:${CUDF_CXX_FLAGS}>>" - "$:${CUDF_CUDA_FLAGS}>>" + cudftestutil INTERFACE "$:${CUDF_CXX_FLAGS}>>" + "$:${CUDF_CUDA_FLAGS}>>" ) target_link_libraries( - cudftestutil - PUBLIC Threads::Threads cudf cudftest_default_stream - PRIVATE GTest::gmock GTest::gtest $ + cudftestutil INTERFACE Threads::Threads cudf cudftest_default_stream + $ ) target_include_directories( - cudftestutil PUBLIC "$" - "$" + cudftestutil INTERFACE "$" + "$" ) rapids_cuda_set_runtime(cudftestutil USE_STATIC ${CUDA_STATIC_RUNTIME}) add_library(cudf::cudftestutil ALIAS cudftestutil) + add_library(cudftestutil_impl INTERFACE) + add_library(cudf::cudftestutil_impl ALIAS cudftestutil_impl) + target_sources( + cudftestutil_impl + INTERFACE $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + ) + target_link_libraries(cudftestutil_impl INTERFACE cudf::cudftestutil) + + install(FILES tests/io/metadata_utilities.cpp DESTINATION src/cudftestutil/io) + install( + FILES tests/utilities/column_utilities.cu + tests/utilities/debug_utilities.cu + tests/utilities/random_seed.cpp + tests/utilities/table_utilities.cu + tests/utilities/tdigest_utilities.cu + DESTINATION src/cudftestutil/utilities + ) + endif() # * build cudf_identify_stream_usage -------------------------------------------------------------- @@ -1006,7 +1076,7 @@ install( set(_components_export_string) if(TARGET cudftestutil) install( - TARGETS cudftest_default_stream cudftestutil + TARGETS cudftest_default_stream cudftestutil cudftestutil_impl DESTINATION ${lib_dir} EXPORT cudf-testing-exports ) @@ -1046,14 +1116,15 @@ targets: This module offers an optional testing component which defines the following IMPORTED GLOBAL targets: - cudf::cudftestutil - The main cudf testing library + cudf::cudftestutil - The main cudf testing library + cudf::cudftestutil_impl - C++ and CUDA sources to compile for definitions in cudf::cudftestutil ]=] ) rapids_export( INSTALL cudf EXPORT_SET cudf-exports ${_components_export_string} - GLOBAL_TARGETS cudf cudftestutil + GLOBAL_TARGETS cudf cudftestutil cudftestutil_impl NAMESPACE cudf:: DOCUMENTATION doc_string ) @@ -1074,7 +1145,7 @@ endif() rapids_export( BUILD cudf EXPORT_SET cudf-exports ${_components_export_string} - GLOBAL_TARGETS cudf cudftestutil + GLOBAL_TARGETS cudf cudftestutil cudftestutil_impl NAMESPACE cudf:: DOCUMENTATION doc_string FINAL_CODE_BLOCK build_code_string diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index b0f75b25975..d6fc5dc6039 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -25,7 +25,7 @@ target_compile_options( target_link_libraries( cudf_datagen PUBLIC GTest::gmock GTest::gtest benchmark::benchmark nvbench::nvbench Threads::Threads cudf - cudftestutil nvtx3::nvtx3-cpp + cudf::cudftestutil nvtx3::nvtx3-cpp PRIVATE $ ) @@ -49,7 +49,7 @@ target_compile_options( target_link_libraries( ndsh_data_generator - PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp + PUBLIC cudf GTest::gmock GTest::gtest cudf::cudftestutil nvtx3::nvtx3-cpp PRIVATE $ ) @@ -65,14 +65,14 @@ target_include_directories( # Use an OBJECT library so we only compile these helper source files only once add_library( cudf_benchmark_common OBJECT - "${CUDF_SOURCE_DIR}/tests/utilities/random_seed.cpp" - synchronization/synchronization.cpp - io/cuio_common.cpp - common/table_utilities.cpp - common/benchmark_utilities.cpp - common/nvbench_utilities.cpp + synchronization/synchronization.cpp io/cuio_common.cpp common/table_utilities.cpp + common/benchmark_utilities.cpp common/nvbench_utilities.cpp ) -target_link_libraries(cudf_benchmark_common PRIVATE cudf_datagen $) +target_link_libraries( + cudf_benchmark_common PRIVATE cudf_datagen $ GTest::gmock + GTest::gtest +) + add_custom_command( OUTPUT CUDF_BENCHMARKS COMMAND echo Running benchmarks @@ -99,7 +99,7 @@ function(ConfigureBench CMAKE_BENCH_NAME) ) target_link_libraries( ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main - $ + cudf::cudftestutil_impl $ ) add_custom_command( OUTPUT CUDF_BENCHMARKS @@ -127,8 +127,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME) INSTALL_RPATH "\$ORIGIN/../../../lib" ) target_link_libraries( - ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen - nvbench::nvbench $ + ${CMAKE_BENCH_NAME} + PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen nvbench::nvbench + $ cudf::cudftestutil_impl ) install( TARGETS ${CMAKE_BENCH_NAME} diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp index 272c91133f8..2bd08f410e0 100644 --- a/cpp/include/cudf_test/testing_main.hpp +++ b/cpp/include/cudf_test/testing_main.hpp @@ -16,6 +16,7 @@ #pragma once +#include #include #include @@ -36,6 +37,12 @@ namespace CUDF_EXPORT cudf { namespace test { +struct config { + std::string rmm_mode; + std::string stream_mode; + std::string stream_error_mode; +}; + /// MR factory functions inline auto make_cuda() { return std::make_shared(); } @@ -157,10 +164,9 @@ inline auto parse_cudf_test_opts(int argc, char** argv) * @param cmd_opts Command line options returned by parse_cudf_test_opts * @return Memory resource adaptor */ -inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts) +inline auto make_memory_resource_adaptor(cudf::test::config const& config) { - auto const rmm_mode = cmd_opts["rmm_mode"].as(); - auto resource = cudf::test::create_memory_resource(rmm_mode); + auto resource = cudf::test::create_memory_resource(config.rmm_mode); cudf::set_current_device_resource(resource.get()); return resource; } @@ -176,37 +182,54 @@ inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts) * @param cmd_opts Command line options returned by parse_cudf_test_opts * @return Memory resource adaptor */ -inline auto make_stream_mode_adaptor(cxxopts::ParseResult const& cmd_opts) +inline auto make_stream_mode_adaptor(cudf::test::config const& config) { auto resource = cudf::get_current_device_resource_ref(); - auto const stream_mode = cmd_opts["stream_mode"].as(); - auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); - auto const error_on_invalid_stream = (stream_error_mode == "error"); - auto const check_default_stream = (stream_mode == "new_cudf_default"); + auto const error_on_invalid_stream = (config.stream_error_mode == "error"); + auto const check_default_stream = (config.stream_mode == "new_cudf_default"); auto adaptor = cudf::test::stream_checking_resource_adaptor( resource, error_on_invalid_stream, check_default_stream); - if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { + if ((config.stream_mode == "new_cudf_default") || (config.stream_mode == "new_testing_default")) { cudf::set_current_device_resource(&adaptor); } return adaptor; } +/** + * @brief Should be called in every test program that uses rmm allocators since it maintains the + * lifespan of the rmm default memory resource. this function parses the command line to customize + * test behavior, like the allocation mode used for creating the default memory resource. + * + */ +inline void init_cudf_test(int argc, char** argv, cudf::test::config const& config_override = {}) +{ + // static lifetime to keep rmm resource alive till tests end + auto const cmd_opts = parse_cudf_test_opts(argc, argv); + cudf::test::config config = config_override; + if (config.rmm_mode.empty()) { config.rmm_mode = cmd_opts["rmm_mode"].as(); } + + if (config.stream_mode.empty()) { + config.stream_mode = cmd_opts["stream_mode"].as(); + } + + if (config.stream_error_mode.empty()) { + config.stream_error_mode = cmd_opts["stream_error_mode"].as(); + } + + [[maybe_unused]] static auto mr = make_memory_resource_adaptor(config); + [[maybe_unused]] static auto adaptor = make_stream_mode_adaptor(config); +} + /** * @brief Macro that defines main function for gtest programs that use rmm * - * Should be included in every test program that uses rmm allocators since - * it maintains the lifespan of the rmm default memory resource. * This `main` function is a wrapper around the google test generated `main`, - * maintaining the original functionality. In addition, this custom `main` - * function parses the command line to customize test behavior, like the - * allocation mode used for creating the default memory resource. + * maintaining the original functionality. */ -#define CUDF_TEST_PROGRAM_MAIN() \ - int main(int argc, char** argv) \ - { \ - ::testing::InitGoogleTest(&argc, argv); \ - auto const cmd_opts = parse_cudf_test_opts(argc, argv); \ - [[maybe_unused]] auto mr = make_memory_resource_adaptor(cmd_opts); \ - [[maybe_unused]] auto adaptor = make_stream_mode_adaptor(cmd_opts); \ - return RUN_ALL_TESTS(); \ +#define CUDF_TEST_PROGRAM_MAIN() \ + int main(int argc, char** argv) \ + { \ + ::testing::InitGoogleTest(&argc, argv); \ + init_cudf_test(argc, argv); \ + return RUN_ALL_TESTS(); \ } diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py deleted file mode 100644 index e5e57dbf562..00000000000 --- a/cpp/scripts/run-clang-tidy.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import re -import os -import subprocess -import argparse -import json -import multiprocessing as mp -import shutil - - -EXPECTED_VERSION = "16.0.6" -VERSION_REGEX = re.compile(r" LLVM version ([0-9.]+)") -GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") -SPACES = re.compile(r"\s+") -SEPARATOR = "-" * 16 - - -def parse_args(): - argparser = argparse.ArgumentParser("Runs clang-tidy on a project") - argparser.add_argument("-cdb", type=str, - # TODO This is a hack, needs to be fixed - default="cpp/build/cuda-11.5.0/clang-tidy/release/compile_commands.clangd.json", - help="Path to cmake-generated compilation database" - " file. It is always found inside the root of the " - "cmake build folder. So make sure that `cmake` has " - "been run once before running this script!") - argparser.add_argument("-exe", type=str, default="clang-tidy", - help="Path to clang-tidy exe") - argparser.add_argument("-ignore", type=str, default="[.]cu$|examples/kmeans/", - help="Regex used to ignore files from checking") - argparser.add_argument("-select", type=str, default=None, - help="Regex used to select files for checking") - argparser.add_argument("-j", type=int, default=-1, - help="Number of parallel jobs to launch.") - args = argparser.parse_args() - if args.j <= 0: - args.j = mp.cpu_count() - args.ignore_compiled = re.compile(args.ignore) if args.ignore else None - args.select_compiled = re.compile(args.select) if args.select else None - ret = subprocess.check_output("%s --version" % args.exe, shell=True) - ret = ret.decode("utf-8") - version = VERSION_REGEX.search(ret) - if version is None: - raise Exception("Failed to figure out clang-tidy version!") - version = version.group(1) - if version != EXPECTED_VERSION: - raise Exception("clang-tidy exe must be v%s found '%s'" % \ - (EXPECTED_VERSION, version)) - if not os.path.exists(args.cdb): - raise Exception("Compilation database '%s' missing" % args.cdb) - return args - - -def get_all_commands(cdb): - with open(cdb) as fp: - return json.load(fp) - - -def get_gpu_archs(command): - archs = [] - for loc in range(len(command)): - if command[loc] != "-gencode": - continue - arch_flag = command[loc + 1] - match = GPU_ARCH_REGEX.search(arch_flag) - if match is not None: - archs.append("--cuda-gpu-arch=sm_%s" % match.group(1)) - return archs - - -def get_index(arr, item): - try: - return arr.index(item) - except: - return -1 - - -def remove_item(arr, item): - loc = get_index(arr, item) - if loc >= 0: - del arr[loc] - return loc - - -def remove_item_plus_one(arr, item): - loc = get_index(arr, item) - if loc >= 0: - del arr[loc + 1] - del arr[loc] - return loc - - -def get_clang_includes(exe): - dir = os.getenv("CONDA_PREFIX") - if dir is None: - ret = subprocess.check_output("which %s 2>&1" % exe, shell=True) - ret = ret.decode("utf-8") - dir = os.path.dirname(os.path.dirname(ret)) - header = os.path.join(dir, "include", "ClangHeaders") - return ["-I", header] - - -def get_tidy_args(cmd, exe): - command, file = cmd["command"], cmd["file"] - is_cuda = file.endswith(".cu") - command = re.split(SPACES, command) - # compiler is always clang++! - command[0] = "clang++" - # remove compilation and output targets from the original command - remove_item_plus_one(command, "-c") - remove_item_plus_one(command, "-o") - if is_cuda: - # replace nvcc's "-gencode ..." with clang's "--cuda-gpu-arch ..." - archs = get_gpu_archs(command) - command.extend(archs) - while True: - loc = remove_item_plus_one(command, "-gencode") - if loc < 0: - break - # "-x cuda" is the right usage in clang - loc = get_index(command, "-x") - if loc >= 0: - command[loc + 1] = "cuda" - remove_item_plus_one(command, "-ccbin") - remove_item(command, "--expt-extended-lambda") - remove_item(command, "--diag_suppress=unrecognized_gcc_pragma") - command.extend(get_clang_includes(exe)) - return command, is_cuda - - -def run_clang_tidy_command(tidy_cmd): - cmd = " ".join(tidy_cmd) - result = subprocess.run(cmd, check=False, shell=True, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - status = result.returncode == 0 - if status: - out = "" - else: - out = "CMD: " + cmd - out += result.stdout.decode("utf-8").rstrip() - return status, out - - -def run_clang_tidy(cmd, args): - command, is_cuda = get_tidy_args(cmd, args.exe) - tidy_cmd = [args.exe, - "-header-filter='.*cudf/cpp/(src|include|bench|comms).*'", - cmd["file"], "--", ] - tidy_cmd.extend(command) - status = True - out = "" - if is_cuda: - tidy_cmd.append("--cuda-device-only") - tidy_cmd.append(cmd["file"]) - ret, out1 = run_clang_tidy_command(tidy_cmd) - out += out1 - out += "%s" % SEPARATOR - if not ret: - status = ret - tidy_cmd[-2] = "--cuda-host-only" - ret, out1 = run_clang_tidy_command(tidy_cmd) - if not ret: - status = ret - out += out1 - else: - tidy_cmd.append(cmd["file"]) - ret, out1 = run_clang_tidy_command(tidy_cmd) - if not ret: - status = ret - out += out1 - return status, out, cmd["file"] - - -# yikes! global var :( -results = [] -def collect_result(result): - global results - results.append(result) - - -def print_result(passed, stdout, file): - status_str = "PASSED" if passed else "FAILED" - print(f"{SEPARATOR} File:{file} {status_str} {SEPARATOR}") - if stdout: - print(stdout) - print(f"{SEPARATOR} File:{file} ENDS {SEPARATOR}") - - -def print_results(): - global results - status = True - for passed, stdout, file in results: - print_result(passed, stdout, file) - if not passed: - status = False - return status - - -def run_tidy_for_all_files(args, all_files): - pool = None if args.j == 1 else mp.Pool(args.j) - # actual tidy checker - for cmd in all_files: - # skip files that we don't want to look at - if args.ignore_compiled is not None and \ - re.search(args.ignore_compiled, cmd["file"]) is not None: - continue - if args.select_compiled is not None and \ - re.search(args.select_compiled, cmd["file"]) is None: - continue - if pool is not None: - pool.apply_async(run_clang_tidy, args=(cmd, args), - callback=collect_result) - else: - passed, stdout, file = run_clang_tidy(cmd, args) - collect_result((passed, stdout, file)) - if pool is not None: - pool.close() - pool.join() - return print_results() - - -def main(): - args = parse_args() - # Attempt to making sure that we run this script from root of repo always - if not os.path.exists(".git"): - raise Exception("This needs to always be run from the root of repo") - # Check whether clang-tidy exists - # print(args) - if "exe" not in args and shutil.which("clang-tidy") is not None: - print("clang-tidy not found. Exiting...") - return - all_files = get_all_commands(args.cdb) - status = run_tidy_for_all_files(args, all_files) - if not status: - raise Exception("clang-tidy failed! Refer to the errors above.") - - -if __name__ == "__main__": - main() diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 75767786272..0432b9d120a 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -16,7 +16,8 @@ #include "flatten_single_pass_aggs.hpp" #include "groupby/common/utils.hpp" -#include "groupby/hash/groupby_kernels.cuh" +#include "groupby_kernels.cuh" +#include "var_hash_functor.cuh" #include #include @@ -261,7 +262,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final rmm::exec_policy(stream), thrust::make_counting_iterator(0), col.size(), - ::cudf::detail::var_hash_functor{ + var_hash_functor{ set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof}); sparse_results->add_result(col, agg, std::move(var_result)); dense_results->add_result(col, agg, to_dense_agg_result(agg)); diff --git a/cpp/src/groupby/hash/groupby_kernels.cuh b/cpp/src/groupby/hash/groupby_kernels.cuh index 188d0cff3f1..86f4d76487f 100644 --- a/cpp/src/groupby/hash/groupby_kernels.cuh +++ b/cpp/src/groupby/hash/groupby_kernels.cuh @@ -16,8 +16,6 @@ #pragma once -#include "multi_pass_kernels.cuh" - #include #include #include diff --git a/cpp/src/groupby/hash/multi_pass_kernels.cuh b/cpp/src/groupby/hash/var_hash_functor.cuh similarity index 69% rename from cpp/src/groupby/hash/multi_pass_kernels.cuh rename to cpp/src/groupby/hash/var_hash_functor.cuh index 7043eafdc10..bb55cc9188c 100644 --- a/cpp/src/groupby/hash/multi_pass_kernels.cuh +++ b/cpp/src/groupby/hash/var_hash_functor.cuh @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once #include @@ -21,17 +20,14 @@ #include #include #include -#include #include +#include #include +#include -#include - -namespace cudf { -namespace detail { - -template +namespace cudf::groupby::detail::hash { +template struct var_hash_functor { SetType set; bitmask_type const* __restrict__ row_bitmask; @@ -47,13 +43,13 @@ struct var_hash_functor { column_device_view sum, column_device_view count, size_type ddof) - : set(set), - row_bitmask(row_bitmask), - target(target), - source(source), - sum(sum), - count(count), - ddof(ddof) + : set{set}, + row_bitmask{row_bitmask}, + target{target}, + source{source}, + sum{sum}, + count{count}, + ddof{ddof} { } @@ -64,23 +60,21 @@ struct var_hash_functor { } template - __device__ std::enable_if_t()> operator()(column_device_view const& source, - size_type source_index, - size_type target_index) noexcept + __device__ cuda::std::enable_if_t()> operator()( + column_device_view const& source, size_type source_index, size_type target_index) noexcept { CUDF_UNREACHABLE("Invalid source type for std, var aggregation combination."); } template - __device__ std::enable_if_t()> operator()(column_device_view const& source, - size_type source_index, - size_type target_index) noexcept + __device__ cuda::std::enable_if_t()> operator()( + column_device_view const& source, size_type source_index, size_type target_index) noexcept { - using Target = target_type_t; - using SumType = target_type_t; - using CountType = target_type_t; + using Target = cudf::detail::target_type_t; + using SumType = cudf::detail::target_type_t; + using CountType = cudf::detail::target_type_t; - if (source_has_nulls and source.is_null(source_index)) return; + if (source.is_null(source_index)) return; CountType group_size = count.element(target_index); if (group_size == 0 or group_size - ddof <= 0) return; @@ -91,8 +85,9 @@ struct var_hash_functor { ref.fetch_add(result, cuda::std::memory_order_relaxed); // STD sqrt is applied in finalize() - if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); } + if (target.is_null(target_index)) { target.set_valid(target_index); } } + __device__ inline void operator()(size_type source_index) { if (row_bitmask == nullptr or cudf::bit_is_set(row_bitmask, source_index)) { @@ -110,6 +105,4 @@ struct var_hash_functor { } } }; - -} // namespace detail -} // namespace cudf +} // namespace cudf::groupby::detail::hash diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 76816071d8c..69a51fab5dc 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -83,8 +83,7 @@ struct tree_node { void check_input_size(std::size_t input_size) { // Transduce() writes symbol offsets that may be as large input_size-1 - CUDF_EXPECTS(input_size == 0 || - (input_size - 1) <= std::numeric_limits::max(), + CUDF_EXPECTS(input_size == 0 || (input_size - 1) <= std::numeric_limits::max(), "Given JSON input is too large"); } } // namespace diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 99a5b17bce8..c424d2b3b62 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -351,10 +351,16 @@ table_with_metadata read_json(host_span> sources, * JSON inputs. */ std::size_t const total_source_size = sources_size(sources, 0, 0); - std::size_t chunk_offset = reader_opts.get_byte_range_offset(); - std::size_t chunk_size = reader_opts.get_byte_range_size(); - chunk_size = !chunk_size ? total_source_size - chunk_offset - : std::min(chunk_size, total_source_size - chunk_offset); + + // Batching is enabled only for JSONL inputs, not regular JSON files + CUDF_EXPECTS( + reader_opts.is_enabled_lines() || total_source_size < std::numeric_limits::max(), + "Parsing Regular JSON inputs of size greater than INT_MAX bytes is not supported"); + + std::size_t chunk_offset = reader_opts.get_byte_range_offset(); + std::size_t chunk_size = reader_opts.get_byte_range_size(); + chunk_size = !chunk_size ? total_source_size - chunk_offset + : std::min(chunk_size, total_source_size - chunk_offset); std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); std::size_t const batch_size_upper_bound = get_batch_size_upper_bound(); diff --git a/cpp/src/io/orc/reader_impl_chunking.cu b/cpp/src/io/orc/reader_impl_chunking.cu index 01ee5ad177d..ecf319e75ab 100644 --- a/cpp/src/io/orc/reader_impl_chunking.cu +++ b/cpp/src/io/orc/reader_impl_chunking.cu @@ -500,6 +500,8 @@ void reader_impl::load_next_stripe_data(read_mode mode) auto const [read_begin, read_end] = merge_selected_ranges(_file_itm_data.stripe_data_read_ranges, load_stripe_range); + bool stream_synchronized{false}; + for (auto read_idx = read_begin; read_idx < read_end; ++read_idx) { auto const& read_info = _file_itm_data.data_read_info[read_idx]; auto const source_ptr = _metadata.per_file_metadata[read_info.source_idx].source; @@ -507,6 +509,13 @@ void reader_impl::load_next_stripe_data(read_mode mode) lvl_stripe_data[read_info.level][read_info.stripe_idx - stripe_start].data()); if (source_ptr->is_device_read_preferred(read_info.length)) { + // `device_read_async` may not use _stream at all. + // Instead, it may use some other stream(s) to sync the H->D memcpy. + // As such, we need to make sure the device buffers in `lvl_stripe_data` are ready first. + if (!stream_synchronized) { + _stream.synchronize(); + stream_synchronized = true; + } device_read_tasks.push_back( std::pair(source_ptr->device_read_async( read_info.offset, read_info.length, dst_base + read_info.dst_pos, _stream), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4596ec65ce7..799a84cbc37 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -56,8 +56,15 @@ function(ConfigureTest CMAKE_TEST_NAME) target_link_libraries( ${CMAKE_TEST_NAME} - PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main - nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIBS}" + PRIVATE cudf::cudftestutil + cudf::cudftestutil_impl + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + nvtx3::nvtx3-cpp + $ + "${_CUDF_TEST_EXTRA_LIBS}" ) rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) rapids_test_add( @@ -76,6 +83,7 @@ function(ConfigureTest CMAKE_TEST_NAME) "GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$" ) endif() + enable_clang_tidy(${CMAKE_TEST_NAME}) endfunction() # ################################################################################################## diff --git a/cpp/tests/interop/nanoarrow_utils.hpp b/cpp/tests/interop/nanoarrow_utils.hpp index a961f73d955..8be7e087b6d 100644 --- a/cpp/tests/interop/nanoarrow_utils.hpp +++ b/cpp/tests/interop/nanoarrow_utils.hpp @@ -256,7 +256,8 @@ std::enable_if_t, nanoarrow::UniqueArray> get_nanoarrow_ ArrowBitmap out; ArrowBitmapInit(&out); NANOARROW_THROW_NOT_OK(ArrowBitmapResize(&out, b.size(), 1)); - std::memset(out.buffer.data, 0, out.buffer.size_bytes); + // TODO: Investigate clang-tidy issue further after nanoarrow is made compliant + std::memset(out.buffer.data, 0, out.buffer.size_bytes); // NOLINT for (size_t i = 0; i < b.size(); ++i) { ArrowBitSetTo(out.buffer.data, i, static_cast(b[i])); diff --git a/cpp/tests/io/metadata_utilities.cpp b/cpp/tests/io/metadata_utilities.cpp index 84f04f67038..380d66c53f9 100644 --- a/cpp/tests/io/metadata_utilities.cpp +++ b/cpp/tests/io/metadata_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,9 @@ * limitations under the License. */ +#include #include -#include - namespace cudf::test { void expect_metadata_equal(cudf::io::table_input_metadata in_meta, diff --git a/cpp/tests/large_strings/large_strings_fixture.cpp b/cpp/tests/large_strings/large_strings_fixture.cpp index 249319da7f7..7b61be113f9 100644 --- a/cpp/tests/large_strings/large_strings_fixture.cpp +++ b/cpp/tests/large_strings/large_strings_fixture.cpp @@ -123,12 +123,9 @@ LargeStringsData* StringsLargeTest::g_ls_data = nullptr; int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); - auto const cmd_opts = parse_cudf_test_opts(argc, argv); - // hardcoding the CUDA memory resource to keep from exceeding the pool - auto mr = cudf::test::make_cuda(); - cudf::set_current_device_resource(mr.get()); - auto adaptor = make_stream_mode_adaptor(cmd_opts); - + cudf::test::config config; + config.rmm_mode = "cuda"; + init_cudf_test(argc, argv, config); // create object to automatically be destroyed at the end of main() auto lsd = cudf::test::StringsLargeTest::get_ls_data(); diff --git a/cpp/tests/utilities/table_utilities.cu b/cpp/tests/utilities/table_utilities.cu index 354c0b1b57e..8e4906408de 100644 --- a/cpp/tests/utilities/table_utilities.cu +++ b/cpp/tests/utilities/table_utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,10 +15,9 @@ */ #include +#include #include -#include - namespace cudf::test::detail { void expect_table_properties_equal(cudf::table_view lhs, cudf::table_view rhs) { diff --git a/dependencies.yaml b/dependencies.yaml index ca17917c905..ff97b67f0ce 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -6,10 +6,18 @@ files: cuda: ["11.8", "12.5"] arch: [x86_64] includes: + # Note that clang-tidy is not included here because cudf's preferred + # version conflicts with the rest of RAPIDS as well as its own + # clang-format version. Until we update our clang-format version we will + # not be able to install both into the same environment. Moreover, using + # this version will break compatibility with other RAPIDS libraries that + # are still using 16.0.6, and as such will and that would break any + # unified environment like that used in unified devcontainers. - build_base - build_all - build_cpp - build_python_common + - clang_format - cuda - cuda_version - depends_on_cupy @@ -86,6 +94,16 @@ files: includes: - develop - py_version + clang_tidy: + output: none + includes: + - build_all + - build_base + - clang_tidy + - cuda + - cuda_version + - develop + - py_version docs: output: none includes: @@ -553,11 +571,21 @@ dependencies: # pre-commit requires identify minimum version 1.0, but clang-format requires textproto support and that was # added in 2.5.20, so we need to call out the minimum version needed for our plugins - identify>=2.5.20 + - output_types: conda + packages: + - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version. + clang_format: + common: - output_types: conda packages: - clang==16.0.6 - clang-tools=16.0.6 - - &doxygen doxygen=1.9.1 # pre-commit hook needs a specific version. + clang_tidy: + common: + - output_types: conda + packages: + - clang==19.1.0 + - clang-tools==19.1.0 docs: common: - output_types: [conda]