Skip to content

Commit 302516c

Browse files
committed
Add sanity checks for predicates on vectors
This commit adds four new sanity checks, `true_for_all`, `false_for_all`, `true_for_any`, and `false_for_any` which basically do what they say on the tin.
1 parent 884457a commit 302516c

File tree

7 files changed

+552
-0
lines changed

7 files changed

+552
-0
lines changed

cmake/traccc-compiler-options-cuda.cmake

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ set( CMAKE_CUDA_ARCHITECTURES "52" CACHE STRING
2727
# not marked with __device__.
2828
traccc_add_flag( CMAKE_CUDA_FLAGS "--expt-relaxed-constexpr" )
2929

30+
# Allow the use of lambdas with __device__ specifiers.
31+
traccc_add_flag( CMAKE_CUDA_FLAGS "--extended-lambda" )
32+
3033
# Make CUDA generate debug symbols for the device code as well in a debug
3134
# build.
3235
traccc_add_flag( CMAKE_CUDA_FLAGS_DEBUG "-G --keep -src-in-ptx" )

device/cuda/src/sanity/predicate.cuh

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include "../utils/cuda_error_handling.hpp"
13+
#include "traccc/cuda/utils/stream.hpp"
14+
15+
// VecMem include(s).
16+
#include <vecmem/containers/data/vector_view.hpp>
17+
#include <vecmem/containers/device_vector.hpp>
18+
#include <vecmem/memory/memory_resource.hpp>
19+
#include <vecmem/memory/unique_ptr.hpp>
20+
#include <vecmem/utils/copy.hpp>
21+
22+
// CUDA include
23+
#include <cuda_runtime.h>
24+
25+
// System include
26+
#include <concepts>
27+
28+
namespace traccc::cuda {
29+
namespace kernels {
30+
template <typename P, typename T>
31+
requires std::predicate<P, T> __global__ void true_for_all_kernel(
32+
P projection, vecmem::data::vector_view<T> _in, bool* out) {
33+
int tid = threadIdx.x + blockIdx.x * blockDim.x;
34+
35+
vecmem::device_vector<T> in(_in);
36+
37+
if (tid < in.size()) {
38+
if (!projection(in.at(tid))) {
39+
*out = false;
40+
}
41+
}
42+
}
43+
} // namespace kernels
44+
45+
/**
46+
* @brief Sanity check that a predicate is true for all elements of a vector.
47+
*
48+
* @note This function runs in O(n) time.
49+
*
50+
* @tparam P The type of the predicate.
51+
* @tparam T The type of the vector.
52+
* @param predicate A projection object of type `P`.
53+
* @param mr A memory resource used for allocating intermediate memory.
54+
* @param copy A copy object.
55+
* @param stream A wrapped CUDA stream.
56+
* @param vector The vector which to check for contiguity.
57+
* @return true If `predicate` is true for all elements of `vector`.
58+
* @return false Otherwise.
59+
*/
60+
template <typename P, typename T>
61+
requires std::predicate<P, T> bool true_for_all(
62+
P&& predicate, vecmem::memory_resource& mr, vecmem::copy& copy,
63+
stream& stream, vecmem::data::vector_view<T> vector) {
64+
// This should never be a performance-critical step, so we can keep the
65+
// block size fixed.
66+
constexpr int block_size = 512;
67+
68+
cudaStream_t cuda_stream =
69+
reinterpret_cast<cudaStream_t>(stream.cudaStream());
70+
71+
// Grab the number of elements in our vector.
72+
const std::uint32_t n = copy.get_size(vector);
73+
74+
// Allocate memory for outputs, then set them up.
75+
vecmem::unique_alloc_ptr<bool> device_out =
76+
vecmem::make_unique_alloc<bool>(mr);
77+
78+
bool initial_out = true;
79+
80+
TRACCC_CUDA_ERROR_CHECK(
81+
cudaMemcpyAsync(device_out.get(), &initial_out, sizeof(bool),
82+
cudaMemcpyHostToDevice, cuda_stream));
83+
84+
// Launch the main kernel.
85+
kernels::true_for_all_kernel<P, T>
86+
<<<(n + block_size - 1) / block_size, block_size, 0, cuda_stream>>>(
87+
predicate, vector, device_out.get());
88+
89+
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
90+
91+
// Copy the total number of squashed elements, e.g. the size of the
92+
// resulting vector.
93+
bool host_out;
94+
95+
TRACCC_CUDA_ERROR_CHECK(
96+
cudaMemcpyAsync(&host_out, device_out.get(), sizeof(bool),
97+
cudaMemcpyDeviceToHost, cuda_stream));
98+
99+
stream.synchronize();
100+
101+
return host_out;
102+
}
103+
104+
template <typename P, typename T>
105+
requires std::predicate<P, T> bool false_for_all(
106+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
107+
stream& stream, vecmem::data::vector_view<T> vector) {
108+
return true_for_all(
109+
[projection] __device__<typename... Args>(Args && ... args) {
110+
return !projection(std::forward<Args>(args)...);
111+
},
112+
mr, copy, stream, vector);
113+
}
114+
115+
template <typename P, typename T>
116+
requires std::predicate<P, T> bool true_for_any(
117+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
118+
stream& stream, vecmem::data::vector_view<T> vector) {
119+
return !false_for_all(std::forward<P>(projection), mr, copy, stream,
120+
vector);
121+
}
122+
123+
template <typename P, typename T>
124+
requires std::predicate<P, T> bool false_for_any(
125+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
126+
stream& stream, vecmem::data::vector_view<T> vector) {
127+
return !true_for_all(std::forward<P>(projection), mr, copy, stream, vector);
128+
}
129+
} // namespace traccc::cuda

device/sycl/src/sanity/predicate.hpp

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include <traccc/sycl/utils/queue_wrapper.hpp>
13+
14+
#include "../utils/get_queue.hpp"
15+
16+
// VecMem include(s).
17+
#include <vecmem/containers/data/vector_view.hpp>
18+
#include <vecmem/containers/device_vector.hpp>
19+
#include <vecmem/memory/memory_resource.hpp>
20+
#include <vecmem/memory/unique_ptr.hpp>
21+
#include <vecmem/utils/copy.hpp>
22+
23+
// SYCL include
24+
#include <CL/sycl.hpp>
25+
26+
// System include
27+
#include <concepts>
28+
29+
namespace traccc::sycl {
30+
31+
/**
32+
* @brief Sanity check that a predicate is true for all elements of a vector.
33+
*
34+
* @note This function runs in O(n) time.
35+
*
36+
* @tparam P The type of the predicate.
37+
* @tparam T The type of the vector.
38+
* @param predicate A projection object of type `P`.
39+
* @param mr A memory resource used for allocating intermediate memory.
40+
* @param copy A copy object.
41+
* @param stream A wrapped CUDA stream.
42+
* @param vector The vector which to check for contiguity.
43+
* @return true If `predicate` is true for all elements of `vector`.
44+
* @return false Otherwise.
45+
*/
46+
template <typename P, typename T>
47+
requires std::predicate<P, T> bool true_for_all(
48+
P&& predicate, vecmem::memory_resource& mr, vecmem::copy& copy,
49+
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
50+
// This should never be a performance-critical step, so we can keep the
51+
// block size fixed.
52+
constexpr int block_size = 512;
53+
54+
cl::sycl::queue& queue = details::get_queue(queue_wrapper);
55+
56+
// Grab the number of elements in our vector.
57+
const std::uint32_t n = copy.get_size(vector);
58+
59+
// Allocate memory for outputs, then set them up.
60+
vecmem::unique_alloc_ptr<bool> device_out =
61+
vecmem::make_unique_alloc<bool>(mr);
62+
63+
bool initial_out = true;
64+
65+
cl::sycl::event kernel1_memcpy1 =
66+
queue.memcpy(out.get(), &initial_out, sizeof(bool));
67+
68+
// Launch the main kernel.
69+
cl::sycl::nd_range<1> kernel_range{
70+
cl::sycl::range<1>(((n + block_size - 1) / block_size) * block_size),
71+
cl::sycl::range<1>(block_size)};
72+
73+
cl::sycl::event kernel1 = queue.submit([&](cl::sycl::handler& h) {
74+
h.depends_on(kernel1_memcpy1);
75+
h.parallel_for<kernels::TrueForAllPredicate<P>>(
76+
kernel_range, [=, out = out.get()](cl::sycl::nd_item<1> item) {
77+
std::size_t tid = item.get_global_linear_id();
78+
79+
vecmem::device_vector<T> in(_in);
80+
81+
if (tid < in.size()) {
82+
if (!projection(in.at(tid))) {
83+
*out = false;
84+
}
85+
}
86+
});
87+
});
88+
89+
// Copy the total number of squashed elements, e.g. the size of the
90+
// resulting vector.
91+
bool host_out;
92+
93+
queue.memcpy(&host_out, out.get(), sizeof(bool), {kernel1})
94+
.wait_and_throw();
95+
96+
return host_out;
97+
}
98+
99+
template <typename P, typename T>
100+
requires std::predicate<P, T> bool false_for_all(
101+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
102+
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
103+
return true_for_all(
104+
[projection]<typename... Args>(Args && ... args) {
105+
return !projection(std::forward<Args>(args)...);
106+
},
107+
mr, copy, queue_wrapper, vector);
108+
}
109+
110+
template <typename P, typename T>
111+
requires std::predicate<P, T> bool true_for_any(
112+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
113+
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
114+
return !false_for_all(std::forward<P>(projection), mr, copy, queue_wrapper,
115+
vector);
116+
}
117+
118+
template <typename P, typename T>
119+
requires std::predicate<P, T> bool false_for_any(
120+
P&& projection, vecmem::memory_resource& mr, vecmem::copy& copy,
121+
queue_wrapper& queue_wrapper, vecmem::data::vector_view<T> vector) {
122+
return !true_for_all(std::forward<P>(projection), mr, copy, queue_wrapper,
123+
vector);
124+
}
125+
} // namespace traccc::sycl

tests/cuda/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ traccc_add_test(
4444
test_unique_lock.cu
4545
test_sanity_contiguous_on.cu
4646
test_sanity_ordered_on.cu
47+
test_sanity_predicate.cu
4748
test_sort.cu
4849

4950
LINK_LIBRARIES

0 commit comments

Comments
 (0)