Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge cudf 23.08 with spark-rapids-jni fixes for hash utility functions that moved. #1271

Merged
merged 2 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions src/main/cpp/src/murmur_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/hashing.hpp>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>

Expand Down Expand Up @@ -86,10 +85,10 @@ struct SparkMurmurHash3_32 {
// casting byte-to-int, but C++ does not.
uint32_t k1 = static_cast<uint32_t>(std::to_integer<int8_t>(data[i]));
k1 *= c1;
k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
k1 = cudf::hashing::detail::rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = cudf::detail::rotate_bits_left(h, rot_c2);
h = cudf::hashing::detail::rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}
return h;
Expand All @@ -106,10 +105,10 @@ struct SparkMurmurHash3_32 {
for (cudf::size_type i = 0; i < nblocks; i++) {
uint32_t k1 = getblock32(data, i * BLOCK_SIZE);
k1 *= c1;
k1 = cudf::detail::rotate_bits_left(k1, rot_c1);
k1 = cudf::hashing::detail::rotate_bits_left(k1, rot_c1);
k1 *= c2;
h ^= k1;
h = cudf::detail::rotate_bits_left(h, rot_c2);
h = cudf::hashing::detail::rotate_bits_left(h, rot_c2);
h = h * 5 + c3;
}

Expand Down Expand Up @@ -168,14 +167,14 @@ template <>
spark_hash_value_type __device__ inline SparkMurmurHash3_32<float>::operator()(
float const& key) const
{
return compute<float>(cudf::detail::normalize_nans(key));
return compute<float>(cudf::hashing::detail::normalize_nans(key));
}

template <>
spark_hash_value_type __device__ inline SparkMurmurHash3_32<double>::operator()(
double const& key) const
{
return compute<double>(cudf::detail::normalize_nans(key));
return compute<double>(cudf::hashing::detail::normalize_nans(key));
}

template <>
Expand Down
7 changes: 3 additions & 4 deletions src/main/cpp/src/xxhash64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
#include "hash.cuh"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/hashing.hpp>
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/detail/utilities/hash_functions.cuh>
#include <cudf/hashing/detail/hash_functions.cuh>
#include <cudf/table/table_device_view.cuh>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -228,13 +227,13 @@ hash_value_type __device__ inline XXHash_64<uint16_t>::operator()(uint16_t const
template <>
hash_value_type __device__ inline XXHash_64<float>::operator()(float const& key) const
{
return compute<float>(cudf::detail::normalize_nans_and_zeros(key));
return compute<float>(cudf::hashing::detail::normalize_nans_and_zeros(key));
}

template <>
hash_value_type __device__ inline XXHash_64<double>::operator()(double const& key) const
{
return compute<double>(cudf::detail::normalize_nans_and_zeros(key));
return compute<double>(cudf::hashing::detail::normalize_nans_and_zeros(key));
}

template <>
Expand Down
2 changes: 1 addition & 1 deletion thirdparty/cudf
Submodule cudf updated 42 files
+1 −1 conda/recipes/libcudf/meta.yaml
+2 −2 cpp/CMakeLists.txt
+1 −1 cpp/include/cudf/detail/aggregation/result_cache.hpp
+1 −1 cpp/include/cudf/detail/join.hpp
+0 −381 cpp/include/cudf/detail/utilities/hash_functions.cuh
+3 −3 cpp/include/cudf/hashing.hpp
+35 −0 cpp/include/cudf/hashing/detail/default_hash.cuh
+66 −0 cpp/include/cudf/hashing/detail/hash_functions.cuh
+8 −19 cpp/include/cudf/hashing/detail/hashing.hpp
+194 −0 cpp/include/cudf/hashing/detail/murmurhash3_x86_32.cuh
+5 −4 cpp/include/cudf/join.hpp
+12 −13 cpp/include/cudf/table/experimental/row_operators.cuh
+2 −2 cpp/include/cudf/table/row_operators.cuh
+2 −2 cpp/include/nvtext/minhash.hpp
+1 −1 cpp/src/column/column_view.cpp
+7 −7 cpp/src/groupby/hash/groupby.cu
+2 −2 cpp/src/hash/concurrent_unordered_map.cuh
+3 −41 cpp/src/hash/hashing.cu
+106 −7 cpp/src/hash/md5_hash.cu
+18 −7 cpp/src/hash/murmurhash3_x86_32.cu
+44 −31 cpp/src/hash/spark_murmurhash3_x86_32.cu
+4 −3 cpp/src/hash/unordered_multiset.cuh
+3 −3 cpp/src/io/json/json_gpu.cu
+7 −7 cpp/src/io/json/json_tree.cu
+1 −1 cpp/src/io/orc/dict_enc.cu
+1 −1 cpp/src/io/parquet/chunk_dict.cu
+3 −2 cpp/src/io/parquet/page_data.cu
+3 −2 cpp/src/join/join_common_utils.hpp
+2 −1 cpp/src/join/mixed_join_common_utils.cuh
+29 −3 cpp/src/partitioning/partitioning.cu
+0 −1 cpp/src/search/contains_table.cu
+0 −1 cpp/src/stream_compaction/stream_compaction_common.hpp
+2 −3 cpp/src/text/generate_ngrams.cu
+4 −3 cpp/src/text/minhash.cu
+0 −1 cpp/src/text/subword/bpe_tokenizer.cu
+2 −2 cpp/src/text/subword/bpe_tokenizer.cuh
+0 −1 cpp/src/text/subword/load_merges_file.cu
+2 −1 cpp/tests/CMakeLists.txt
+20 −19 cpp/tests/hashing/murmurhash3_x86_32_test.cpp
+36 −29 cpp/tests/hashing/spark_murmurhash3_x86_32_test.cpp
+1 −1 cpp/tests/io/json_tree.cpp
+20 −0 cpp/tests/sort/sort_nested_types_tests.cpp