diff --git a/build/apply-patches b/build/apply-patches new file mode 100755 index 0000000000..991613e6dc --- /dev/null +++ b/build/apply-patches @@ -0,0 +1,39 @@ +#!/bin/bash + +# +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Run a command in a Docker container with devtoolset + +set -e + +BASE_DIR=$( git rev-parse --show-toplevel ) + +PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")} + +CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")} + +pushd "$CUDF_DIR" +if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then + echo "Error: CUDF repository has uncommitted changes. No patches will be applied..." + exit 1 +fi + +find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do + echo "patching with: $file" + patch --no-backup-if-mismatch -f -t --reject-file=- -p1 -i "$file" +done +popd diff --git a/build/unapply-patches b/build/unapply-patches new file mode 100755 index 0000000000..186a781ade --- /dev/null +++ b/build/unapply-patches @@ -0,0 +1,44 @@ +#!/bin/bash + +# +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Run a command in a Docker container with devtoolset + +set -e + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +PATCH_DIR=${PATCH_DIR:-$(realpath "$SCRIPT_DIR/../patches/")} + +CUDF_DIR=${CUDF_DIR:-$(realpath "$SCRIPT_DIR/../thirdparty/cudf/")} + + +pushd "$CUDF_DIR" +if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then + #only try to remove patches if it looks like something was changed + find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV -r | while IFS= read -r -d '' file; do + echo "patching with: $file" + patch -R --no-backup-if-mismatch --reject-file=- -f -t -p1 -i "$file" + done +fi + +# Check for modifications +if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then + echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected" + exit 1 +fi +popd diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index af5c2183cd..a07d8d4ac4 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -70,24 +70,18 @@ echo "Test against ${cudf_sha}..." MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" set +e -${MVN} verify ${MVN_MIRROR} \ +# Don't do a full build. Just try to update/build CUDF with no patches on top of it. +${MVN} validate ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -Dlibcudf.dependency.mode=latest \ + -Dsubmodule.patch.skip \ -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ -DBUILD_TESTS=ON \ -DUSE_SANITIZER=ON -verify_status=$? +validate_status=$? set -e -test_pass="False" -if [[ "${verify_status}" == "0" ]]; then - echo "Test passed, will try merge the change" - test_pass="True" -else - echo "Test failed, will update the result" -fi - build_name=$(${MVN} help:evaluate -Dexpression=project.build.finalName -q -DforceStdout) cuda_version=$(${MVN} help:evaluate -Dexpression=cuda.version -q -DforceStdout) . ci/check-cuda-dependencies.sh "target/${build_name}-${cuda_version}.jar" @@ -100,8 +94,11 @@ echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha # Bash the wrong nvcomp version to the correct version until # nvcomp version mismatch is fixed. https://github.com/rapidsai/cudf/issues/16772. -echo "Workaround nvcomp 4.0.1 / 4.0.1.0 versioning issues" -sed -i 's/4\.0\.1\.0/4.0.1/' thirdparty/cudf-pins/versions.json +echo "Revert nvcomp to 3.0.6" +sed -i -e 's/4\.0\.1\.0/3.0.6/' \ + -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz|' \ + -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz|' \ + thirdparty/cudf-pins/versions.json # Do the git add after the build so that we get # the updated versions.json generated by the build @@ -110,6 +107,25 @@ git add . git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}" sha=$(git rev-parse HEAD) +set +e +# now build and test everything with the patches in place +${MVN} clean verify ${MVN_MIRROR} \ + -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ + -Dlibcudf.build.configure=true \ + -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ + -DBUILD_TESTS=ON \ + -DUSE_SANITIZER=ON +verify_status=$? +set -e + +test_pass="False" +if [[ ( "${verify_status}" == "0" ) && ( "${validate_status}" == "0" ) ]]; then + echo "Test passed, will try merge the change" + test_pass="True" +else + echo "Test failed, will update the result" +fi + # push the intermediate branch and create PR against REF # if test passed, it will try auto-merge the PR # if test failed, it will only comment the test result in the PR diff --git a/patches/revert_nvcomp4.patch b/patches/revert_nvcomp4.patch new file mode 100644 index 0000000000..88b58b14dc --- /dev/null +++ b/patches/revert_nvcomp4.patch @@ -0,0 +1,907 @@ +diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh +index fb93b06dbe..e5565c4b53 100755 +--- a/ci/build_wheel_cudf.sh ++++ b/ci/build_wheel_cudf.sh +@@ -23,6 +23,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" + python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libnvcomp.so \ ++ --exclude libnvcomp_bitcomp.so \ ++ --exclude libnvcomp_gdeflate.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* + +diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh +index 5e9f7f8a0c..0e4745bda2 100755 +--- a/ci/build_wheel_pylibcudf.sh ++++ b/ci/build_wheel_pylibcudf.sh +@@ -21,6 +21,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" + python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libnvcomp.so \ ++ --exclude libnvcomp_bitcomp.so \ ++ --exclude libnvcomp_gdeflate.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* + +diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml +index 5a05dfd053..e7363645d6 100644 +--- a/conda/environments/all_cuda-118_arch-x86_64.yaml ++++ b/conda/environments/all_cuda-118_arch-x86_64.yaml +@@ -58,7 +58,7 @@ dependencies: + - numpy>=1.23,<3.0a0 + - numpydoc + - nvcc_linux-64=11.8 +-- nvcomp==4.0.1 ++- nvcomp==3.0.6 + - nvtx>=0.2.1 + - openpyxl + - packaging +diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml +index 8490296233..3559a1a341 100644 +--- a/conda/environments/all_cuda-125_arch-x86_64.yaml ++++ b/conda/environments/all_cuda-125_arch-x86_64.yaml +@@ -56,7 +56,7 @@ dependencies: + - numba>=0.57 + - numpy>=1.23,<3.0a0 + - numpydoc +-- nvcomp==4.0.1 ++- nvcomp==3.0.6 + - nvtx>=0.2.1 + - openpyxl + - packaging +diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml +index dc75eb4b25..67d501d746 100644 +--- a/conda/recipes/libcudf/conda_build_config.yaml ++++ b/conda/recipes/libcudf/conda_build_config.yaml +@@ -35,7 +35,7 @@ spdlog_version: + - ">=1.14.1,<1.15" + + nvcomp_version: +- - "=4.0.1" ++ - "=3.0.6" + + zlib_version: + - ">=1.2.13" +diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp +index 0d74a4158a..f3260d0cb5 100644 +--- a/cpp/include/cudf/io/nvcomp_adapter.hpp ++++ b/cpp/include/cudf/io/nvcomp_adapter.hpp +@@ -24,7 +24,7 @@ + namespace CUDF_EXPORT cudf { + namespace io::nvcomp { + +-enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP }; ++enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; + + /** + * @brief Set of parameters that impact whether nvCOMP features are enabled. +@@ -36,20 +36,33 @@ struct feature_status_parameters { + int lib_patch_version; ///< patch version + bool are_all_integrations_enabled; ///< all integrations + bool are_stable_integrations_enabled; ///< stable integrations ++ int compute_capability_major; ///< cuda compute major version + + /** +- * @brief Default constructor using the current version of nvcomp and current environment +- * variables ++ * @brief Default Constructor + */ + feature_status_parameters(); + + /** +- * @brief Constructor using the current version of nvcomp ++ * @brief feature_status_parameters Constructor + * ++ * @param major positive integer representing major value of nvcomp ++ * @param minor positive integer representing minor value of nvcomp ++ * @param patch positive integer representing patch value of nvcomp + * @param all_enabled if all integrations are enabled + * @param stable_enabled if stable integrations are enabled ++ * @param cc_major CUDA compute capability + */ +- feature_status_parameters(bool all_enabled, bool stable_enabled); ++ feature_status_parameters( ++ int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) ++ : lib_major_version{major}, ++ lib_minor_version{minor}, ++ lib_patch_version{patch}, ++ are_all_integrations_enabled{all_enabled}, ++ are_stable_integrations_enabled{stable_enabled}, ++ compute_capability_major{cc_major} ++ { ++ } + }; + + /** +@@ -61,7 +74,8 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para + lhs.lib_minor_version == rhs.lib_minor_version and + lhs.lib_patch_version == rhs.lib_patch_version and + lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and +- lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled; ++ lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and ++ lhs.compute_capability_major == rhs.compute_capability_major; + } + + /** +diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp +index c3187f73a9..3191e8f015 100644 +--- a/cpp/src/io/comp/nvcomp_adapter.cpp ++++ b/cpp/src/io/comp/nvcomp_adapter.cpp +@@ -22,46 +22,94 @@ + #include + #include + +-#include +-#include + #include + #include +-#include + + #include + ++#define NVCOMP_DEFLATE_HEADER ++#if __has_include(NVCOMP_DEFLATE_HEADER) ++#include NVCOMP_DEFLATE_HEADER ++#endif ++ ++#define NVCOMP_ZSTD_HEADER ++#if __has_include(NVCOMP_ZSTD_HEADER) ++#include NVCOMP_ZSTD_HEADER ++#endif ++ ++// When building with nvcomp 4.0 or newer, map the new version macros to the old ones ++#ifndef NVCOMP_MAJOR_VERSION ++#define NVCOMP_MAJOR_VERSION NVCOMP_VER_MAJOR ++#define NVCOMP_MINOR_VERSION NVCOMP_VER_MINOR ++#define NVCOMP_PATCH_VERSION NVCOMP_VER_PATCH ++#endif ++ ++#define NVCOMP_HAS_ZSTD_DECOMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 3)) ++ ++#define NVCOMP_HAS_ZSTD_COMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 4)) ++ ++#define NVCOMP_HAS_DEFLATE(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 5)) ++ ++#define NVCOMP_HAS_DECOMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) \ ++ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 1)) ++ ++#define NVCOMP_HAS_COMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 6)) ++ ++// ZSTD is stable for nvcomp 2.3.2 or newer ++#define NVCOMP_ZSTD_DECOMP_IS_STABLE(MAJOR, MINOR, PATCH) \ ++ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 2)) ++ + namespace cudf::io::nvcomp { + + // Dispatcher for nvcompBatchedDecompressGetTempSizeEx + template +-auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args) ++std::optional batched_decompress_get_temp_size_ex(compression_type compression, ++ Args&&... args) + { ++#if NVCOMP_HAS_DECOMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + switch (compression) { + case compression_type::SNAPPY: + return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward(args)...); + case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward(args)...); ++#else ++ return std::nullopt; ++#endif + case compression_type::LZ4: + return nvcompBatchedLZ4DecompressGetTempSizeEx(std::forward(args)...); +- case compression_type::DEFLATE: +- return nvcompBatchedDeflateDecompressGetTempSizeEx(std::forward(args)...); +- case compression_type::GZIP: +- return nvcompBatchedGzipDecompressGetTempSizeEx(std::forward(args)...); +- default: CUDF_FAIL("Unsupported compression type"); ++ case compression_type::DEFLATE: [[fallthrough]]; ++ default: return std::nullopt; + } ++#endif ++ return std::nullopt; + } +-size_t batched_decompress_temp_size(compression_type compression, +- size_t num_chunks, +- size_t max_uncomp_chunk_size, +- size_t max_total_uncomp_size) +-{ +- size_t temp_size = 0; +- nvcompStatus_t nvcomp_status = batched_decompress_get_temp_size_ex( +- compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); + +- CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, +- "Unable to get scratch size for decompression"); +- return temp_size; ++// Dispatcher for nvcompBatchedDecompressGetTempSize ++template ++auto batched_decompress_get_temp_size(compression_type compression, Args&&... args) ++{ ++ switch (compression) { ++ case compression_type::SNAPPY: ++ return nvcompBatchedSnappyDecompressGetTempSize(std::forward(args)...); ++ case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); ++#else ++ CUDF_FAIL("Decompression error: " + ++ nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif ++ case compression_type::DEFLATE: ++#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ return nvcompBatchedDeflateDecompressGetTempSize(std::forward(args)...); ++#else ++ CUDF_FAIL("Decompression error: " + ++ nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); ++#endif ++ case compression_type::LZ4: ++ return nvcompBatchedLZ4DecompressGetTempSize(std::forward(args)...); ++ default: CUDF_FAIL("Unsupported compression type"); ++ } + } + + // Dispatcher for nvcompBatchedDecompressAsync +@@ -72,12 +120,20 @@ auto batched_decompress_async(compression_type compression, Args&&... args) + case compression_type::SNAPPY: + return nvcompBatchedSnappyDecompressAsync(std::forward(args)...); + case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + return nvcompBatchedZstdDecompressAsync(std::forward(args)...); ++#else ++ CUDF_FAIL("Decompression error: " + ++ nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif + case compression_type::DEFLATE: ++#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + return nvcompBatchedDeflateDecompressAsync(std::forward(args)...); ++#else ++ CUDF_FAIL("Decompression error: " + ++ nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); ++#endif + case compression_type::LZ4: return nvcompBatchedLZ4DecompressAsync(std::forward(args)...); +- case compression_type::GZIP: +- return nvcompBatchedGzipDecompressAsync(std::forward(args)...); + default: CUDF_FAIL("Unsupported compression type"); + } + } +@@ -89,11 +145,31 @@ std::string compression_type_name(compression_type compression) + case compression_type::ZSTD: return "Zstandard"; + case compression_type::DEFLATE: return "Deflate"; + case compression_type::LZ4: return "LZ4"; +- case compression_type::GZIP: return "GZIP"; + } + return "compression_type(" + std::to_string(static_cast(compression)) + ")"; + } + ++size_t batched_decompress_temp_size(compression_type compression, ++ size_t num_chunks, ++ size_t max_uncomp_chunk_size, ++ size_t max_total_uncomp_size) ++{ ++ size_t temp_size = 0; ++ auto nvcomp_status = batched_decompress_get_temp_size_ex( ++ compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); ++ ++ if (nvcomp_status.value_or(nvcompStatus_t::nvcompErrorInternal) != ++ nvcompStatus_t::nvcompSuccess) { ++ nvcomp_status = ++ batched_decompress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size, &temp_size); ++ } ++ ++ CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, ++ "Unable to get scratch size for decompression"); ++ ++ return temp_size; ++} ++ + void batched_decompress(compression_type compression, + device_span const> inputs, + device_span const> outputs, +@@ -128,10 +204,54 @@ void batched_decompress(compression_type compression, + update_compression_results(nvcomp_statuses, actual_uncompressed_data_sizes, results, stream); + } + +-size_t batched_compress_temp_size(compression_type compression, +- size_t batch_size, +- size_t max_uncompressed_chunk_bytes, +- size_t max_total_uncompressed_bytes) ++// Wrapper for nvcompBatchedCompressGetTempSize ++auto batched_compress_get_temp_size(compression_type compression, ++ size_t batch_size, ++ size_t max_uncompressed_chunk_bytes) ++{ ++ size_t temp_size = 0; ++ nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; ++ switch (compression) { ++ case compression_type::SNAPPY: ++ nvcomp_status = nvcompBatchedSnappyCompressGetTempSize( ++ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size); ++ break; ++ case compression_type::DEFLATE: ++#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ nvcomp_status = nvcompBatchedDeflateCompressGetTempSize( ++ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size); ++ break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); ++#endif ++ case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ nvcomp_status = nvcompBatchedZstdCompressGetTempSize( ++ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size); ++ break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif ++ case compression_type::LZ4: ++ nvcomp_status = nvcompBatchedLZ4CompressGetTempSize( ++ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedLZ4DefaultOpts, &temp_size); ++ break; ++ default: CUDF_FAIL("Unsupported compression type"); ++ } ++ ++ CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, ++ "Unable to get scratch size for compression"); ++ return temp_size; ++} ++ ++#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++// Wrapper for nvcompBatchedCompressGetTempSizeEx ++auto batched_compress_get_temp_size_ex(compression_type compression, ++ size_t batch_size, ++ size_t max_uncompressed_chunk_bytes, ++ size_t max_total_uncompressed_bytes) + { + size_t temp_size = 0; + nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; +@@ -171,8 +291,28 @@ size_t batched_compress_temp_size(compression_type compression, + "Unable to get scratch size for compression"); + return temp_size; + } ++#endif ++ ++size_t batched_compress_temp_size(compression_type compression, ++ size_t num_chunks, ++ size_t max_uncomp_chunk_size, ++ size_t max_total_uncomp_size) ++{ ++#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ try { ++ return batched_compress_get_temp_size_ex( ++ compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size); ++ } catch (...) { ++ // Ignore errors in the expanded version; fall back to the old API in case of failure ++ CUDF_LOG_WARN( ++ "CompressGetTempSizeEx call failed, falling back to CompressGetTempSize; this may increase " ++ "the memory usage"); ++ } ++#endif ++ ++ return batched_compress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size); ++} + +-// Wrapper for nvcompBatchedCompressGetMaxOutputChunkSize + size_t compress_max_output_chunk_size(compression_type compression, + uint32_t max_uncompressed_chunk_bytes) + { +@@ -188,13 +328,23 @@ size_t compress_max_output_chunk_size(compression_type compression, + capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size); + break; + case compression_type::DEFLATE: ++#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize( + capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size); + break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); ++#endif + case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + status = nvcompBatchedZstdCompressGetMaxOutputChunkSize( + capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size); + break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif + case compression_type::LZ4: + status = nvcompBatchedLZ4CompressGetMaxOutputChunkSize( + capped_uncomp_bytes, nvcompBatchedLZ4DefaultOpts, &max_comp_chunk_size); +@@ -234,6 +384,7 @@ static void batched_compress_async(compression_type compression, + stream.value()); + break; + case compression_type::DEFLATE: ++#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs, + device_uncompressed_bytes, + max_uncompressed_chunk_bytes, +@@ -245,7 +396,12 @@ static void batched_compress_async(compression_type compression, + nvcompBatchedDeflateDefaultOpts, + stream.value()); + break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); ++#endif + case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) + nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs, + device_uncompressed_bytes, + max_uncompressed_chunk_bytes, +@@ -257,6 +413,10 @@ static void batched_compress_async(compression_type compression, + nvcompBatchedZstdDefaultOpts, + stream.value()); + break; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif + case compression_type::LZ4: + nvcomp_status = nvcompBatchedLZ4CompressAsync(device_uncompressed_ptrs, + device_uncompressed_bytes, +@@ -318,18 +478,16 @@ void batched_compress(compression_type compression, + } + + feature_status_parameters::feature_status_parameters() +- : feature_status_parameters(nvcomp_integration::is_all_enabled(), +- nvcomp_integration::is_stable_enabled()) +-{ +-} +- +-feature_status_parameters::feature_status_parameters(bool all_enabled, bool stable_enabled) +- : lib_major_version{NVCOMP_VER_MAJOR}, +- lib_minor_version{NVCOMP_VER_MINOR}, +- lib_patch_version{NVCOMP_VER_PATCH}, +- are_all_integrations_enabled{all_enabled}, +- are_stable_integrations_enabled{stable_enabled} ++ : lib_major_version{NVCOMP_MAJOR_VERSION}, ++ lib_minor_version{NVCOMP_MINOR_VERSION}, ++ lib_patch_version{NVCOMP_PATCH_VERSION}, ++ are_all_integrations_enabled{nvcomp_integration::is_all_enabled()}, ++ are_stable_integrations_enabled{nvcomp_integration::is_stable_enabled()} + { ++ int device; ++ CUDF_CUDA_TRY(cudaGetDevice(&device)); ++ CUDF_CUDA_TRY( ++ cudaDeviceGetAttribute(&compute_capability_major, cudaDevAttrComputeCapabilityMajor, device)); + } + + // Represents all parameters required to determine status of a compression/decompression feature +@@ -352,21 +510,42 @@ std::optional is_compression_disabled_impl(compression_type compres + { + switch (compression) { + case compression_type::DEFLATE: { ++ if (not NVCOMP_HAS_DEFLATE( ++ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { ++ return "nvCOMP 2.5 or newer is required for Deflate compression"; ++ } + if (not params.are_all_integrations_enabled) { + return "DEFLATE compression is experimental, you can enable it through " + "`LIBCUDF_NVCOMP_POLICY` environment variable."; + } + return std::nullopt; + } ++ case compression_type::SNAPPY: { ++ if (not params.are_stable_integrations_enabled) { ++ return "Snappy compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " ++ "environment variable."; ++ } ++ return std::nullopt; ++ } ++ case compression_type::ZSTD: { ++ if (not NVCOMP_HAS_ZSTD_COMP( ++ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { ++ return "nvCOMP 2.4 or newer is required for Zstandard compression"; ++ } ++ if (not params.are_stable_integrations_enabled) { ++ return "Zstandard compression is experimental, you can enable it through " ++ "`LIBCUDF_NVCOMP_POLICY` environment variable."; ++ } ++ return std::nullopt; ++ } + case compression_type::LZ4: +- case compression_type::SNAPPY: +- case compression_type::ZSTD: + if (not params.are_stable_integrations_enabled) { +- return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; ++ return "LZ4 compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " ++ "environment variable."; + } + return std::nullopt; +- default: return "Unsupported compression type"; + } ++ return "Unsupported compression type"; + } + + std::optional is_compression_disabled(compression_type compression, +@@ -398,26 +577,58 @@ std::optional is_compression_disabled(compression_type compression, + return reason; + } + ++std::optional is_zstd_decomp_disabled(feature_status_parameters const& params) ++{ ++ if (not NVCOMP_HAS_ZSTD_DECOMP( ++ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { ++ return "nvCOMP 2.3 or newer is required for Zstandard decompression"; ++ } ++ ++ if (NVCOMP_ZSTD_DECOMP_IS_STABLE( ++ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { ++ if (not params.are_stable_integrations_enabled) { ++ return "Zstandard decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " ++ "environment variable."; ++ } ++ } else if (not params.are_all_integrations_enabled) { ++ return "Zstandard decompression is experimental, you can enable it through " ++ "`LIBCUDF_NVCOMP_POLICY` environment variable."; ++ } ++ ++ return std::nullopt; ++} ++ + std::optional is_decompression_disabled_impl(compression_type compression, + feature_status_parameters params) + { + switch (compression) { +- case compression_type::DEFLATE: +- case compression_type::GZIP: { ++ case compression_type::DEFLATE: { ++ if (not NVCOMP_HAS_DEFLATE( ++ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { ++ return "nvCOMP 2.5 or newer is required for Deflate decompression"; ++ } + if (not params.are_all_integrations_enabled) { + return "DEFLATE decompression is experimental, you can enable it through " + "`LIBCUDF_NVCOMP_POLICY` environment variable."; + } + return std::nullopt; + } +- case compression_type::LZ4: +- case compression_type::SNAPPY: +- case compression_type::ZSTD: { ++ case compression_type::SNAPPY: { + if (not params.are_stable_integrations_enabled) { +- return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; ++ return "Snappy decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " ++ "environment variable."; + } + return std::nullopt; + } ++ case compression_type::ZSTD: return is_zstd_decomp_disabled(params); ++ case compression_type::LZ4: { ++ if (not params.are_stable_integrations_enabled) { ++ return "LZ4 decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " ++ "environment variable."; ++ } ++ return std::nullopt; ++ } ++ default: return "Unsupported compression type"; + } + return "Unsupported compression type"; + } +@@ -451,27 +662,43 @@ std::optional is_decompression_disabled(compression_type compressio + return reason; + } + +-size_t required_alignment(compression_type compression) ++size_t compress_input_alignment_bits(compression_type compression) + { + switch (compression) { +- case compression_type::GZIP: +- case compression_type::DEFLATE: return nvcompDeflateRequiredAlignment; +- case compression_type::SNAPPY: return nvcompSnappyRequiredAlignment; +- case compression_type::ZSTD: return nvcompZstdRequiredAlignment; +- case compression_type::LZ4: return nvcompLZ4RequiredAlignment; ++ case compression_type::DEFLATE: return 0; ++ case compression_type::SNAPPY: return 0; ++ case compression_type::ZSTD: return 2; ++ case compression_type::LZ4: return 2; + default: CUDF_FAIL("Unsupported compression type"); + } + } + +-std::optional compress_max_allowed_chunk_size(compression_type compression) ++size_t compress_output_alignment_bits(compression_type compression) + { + switch (compression) { +- case compression_type::DEFLATE: return nvcompDeflateCompressionMaxAllowedChunkSize; +- case compression_type::SNAPPY: return nvcompSnappyCompressionMaxAllowedChunkSize; +- case compression_type::ZSTD: return nvcompZstdCompressionMaxAllowedChunkSize; +- case compression_type::LZ4: return nvcompLZ4CompressionMaxAllowedChunkSize; ++ case compression_type::DEFLATE: return 3; ++ case compression_type::SNAPPY: return 0; ++ case compression_type::ZSTD: return 0; ++ case compression_type::LZ4: return 2; + default: CUDF_FAIL("Unsupported compression type"); + } + } + ++std::optional compress_max_allowed_chunk_size(compression_type compression) ++{ ++ switch (compression) { ++ case compression_type::DEFLATE: return 64 * 1024; ++ case compression_type::SNAPPY: return std::nullopt; ++ case compression_type::ZSTD: ++#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) ++ return nvcompZstdCompressionMaxAllowedChunkSize; ++#else ++ CUDF_FAIL("Compression error: " + ++ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); ++#endif ++ case compression_type::LZ4: return 16 * 1024 * 1024; ++ default: return std::nullopt; ++ } ++} ++ + } // namespace cudf::io::nvcomp +diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp +index 583bd6a352..43c79e3237 100644 +--- a/cpp/src/io/comp/nvcomp_adapter.hpp ++++ b/cpp/src/io/comp/nvcomp_adapter.hpp +@@ -75,12 +75,20 @@ size_t batched_decompress_temp_size(compression_type compression, + uint32_t max_uncomp_chunk_size); + + /** +- * @brief Gets input and output alignment requirements for the given compression type. ++ * @brief Gets input alignment requirements for the given compression type. + * + * @param compression Compression type +- * @returns required alignment ++ * @returns required alignment, in bits + */ +-[[nodiscard]] size_t required_alignment(compression_type compression); ++[[nodiscard]] size_t compress_input_alignment_bits(compression_type compression); ++ ++/** ++ * @brief Gets output alignment requirements for the given compression type. ++ * ++ * @param compression Compression type ++ * @returns required alignment, in bits ++ */ ++[[nodiscard]] size_t compress_output_alignment_bits(compression_type compression); + + /** + * @brief Maximum size of uncompressed chunks that can be compressed with nvCOMP. +diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu +index 60a64fb0ee..40cfbe763b 100644 +--- a/cpp/src/io/orc/writer_impl.cu ++++ b/cpp/src/io/orc/writer_impl.cu +@@ -533,20 +533,20 @@ auto uncomp_block_alignment(CompressionKind compression_kind) + { + if (compression_kind == NONE or + nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { +- return 1ul; ++ return 1u; + } + +- return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); ++ return 1u << nvcomp::compress_input_alignment_bits(to_nvcomp_compression_type(compression_kind)); + } + + auto comp_block_alignment(CompressionKind compression_kind) + { + if (compression_kind == NONE or + nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { +- return 1ul; ++ return 1u; + } + +- return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); ++ return 1u << nvcomp::compress_output_alignment_bits(to_nvcomp_compression_type(compression_kind)); + } + + /** +diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu +index c588fedb85..bab70c126b 100644 +--- a/cpp/src/io/parquet/reader_impl_chunking.cu ++++ b/cpp/src/io/parquet/reader_impl_chunking.cu +@@ -865,18 +865,8 @@ std::vector compute_page_splits_by_row(device_span=2.5.0,<2.6.0a0 + # Align nvcomp version with rapids-cmake +- - nvcomp==4.0.1 ++ - nvcomp==3.0.6 + - spdlog>=1.14.1,<1.15 + rapids_build_skbuild: + common: +diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md +index 97b961b455..adcdaa51e7 100644 +--- a/docs/cudf/source/user_guide/io/io.md ++++ b/docs/cudf/source/user_guide/io/io.md +@@ -75,6 +75,7 @@ IO format. + + + ++ + **Notes:** + + - \[¹\] - Not all orientations are GPU-accelerated. +@@ -176,9 +177,4 @@ If no value is set, behavior will be the same as the "STABLE" option. + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + | DEFLATE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | Experimental | Experimental | ❌ | + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ +- | LZ4 | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ | +- +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ +- | GZIP | ❌ | ❌ | Experimental | Experimental | ❌ | ❌ | ❌ | ❌ | ❌ | +- +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ +- + ``` +diff --git a/java/pom.xml b/java/pom.xml +index e4f1cdf64e..9694e741f1 100644 +--- a/java/pom.xml ++++ b/java/pom.xml +@@ -1,6 +1,6 @@ + +