Skip to content

Commit

Permalink
Detect more address stability
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Nov 6, 2024
1 parent c97f2e3 commit 7a23e84
Show file tree
Hide file tree
Showing 7 changed files with 329 additions and 10 deletions.
84 changes: 80 additions & 4 deletions libcudacxx/include/cuda/__functional/address_stability.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,74 @@
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/conjunction.h>
#include <cuda/std/__type_traits/integral_constant.h>
#include <cuda/std/__type_traits/is_function.h>
#include <cuda/std/__type_traits/is_reference.h>
#include <cuda/std/__type_traits/negation.h>
#include <cuda/std/__type_traits/remove_pointer.h>
#include <cuda/std/__type_traits/void_t.h>
#include <cuda/std/__utility/forward.h>
#include <cuda/std/__utility/move.h>

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

template <typename _FuncPtr>
struct __all_parameters_by_value_fptr : _CUDA_VSTD::false_type
{};

// TODO(bgruber): does the reference detection even work for proxy references? If a callable takes a
// thrust::device_reference<T> or a std::reference_wrapper<T>, allowing a copy would be wrong

template <typename R, typename... Args>
struct __all_parameters_by_value_fptr<R (*)(Args...)>
: _CUDA_VSTD::conjunction<_CUDA_VSTD::_Not<_CUDA_VSTD::is_reference<Args>>...>
{};

template <typename R, typename C, typename... Args>
struct __all_parameters_by_value_fptr<R (C::*)(Args...)>
: _CUDA_VSTD::conjunction<_CUDA_VSTD::_Not<_CUDA_VSTD::is_reference<Args>>...>
{};

template <typename R, typename C, typename... Args>
struct __all_parameters_by_value_fptr<R (C::*)(Args...) const>
: _CUDA_VSTD::conjunction<_CUDA_VSTD::_Not<_CUDA_VSTD::is_reference<Args>>...>
{};

// case for when we cannot address the call target
template <typename _F, typename _SFINAE = void>
struct __all_parameters_by_value : _CUDA_VSTD::false_type
{};

// case for function pointers
template <typename _FP>
struct __all_parameters_by_value<
_FP,
_CUDA_VSTD::__enable_if_t<_CUDA_VSTD::is_pointer<_FP>::value
&& _CUDA_VSTD::is_function<_CUDA_VSTD::__remove_pointer_t<_FP>>::value>>
: __all_parameters_by_value_fptr<_FP>
{};

// case for function objects
template <typename _F>
struct __all_parameters_by_value<_F, _CUDA_VSTD::void_t<decltype(&_F::operator())>>
: __all_parameters_by_value_fptr<decltype(&_F::operator())>
{};

// case for functions
template <typename _F>
struct __all_parameters_by_value<_F, _CUDA_VSTD::__enable_if_t<_CUDA_VSTD::is_function<_F>::value>>
: __all_parameters_by_value_fptr<decltype(&::cuda::std::declval<_F>())>
{};

//! Trait telling whether a function object type F does not rely on the memory addresses of its arguments. The nested
//! value is true when the addresses of the arguments do not matter and arguments can be provided from arbitrary copies
//! of the respective sources. This trait can be specialized for custom function objects types.
//! @see proclaim_copyable_arguments
// TODO(bgruber): if we ever get something like declcall (https://wg21.link/P2825), we should use it here to inspect the
// signature of the function that overload resolution chose
template <typename F, typename SFINAE = void>
struct proclaims_copyable_arguments : _CUDA_VSTD::false_type
struct proclaims_copyable_arguments : _CUDA_VSTD::conjunction<__all_parameters_by_value<F>>
{};

#if !defined(_CCCL_NO_VARIABLE_TEMPLATES)
Expand All @@ -39,10 +96,29 @@ _CCCL_INLINE_VAR constexpr bool proclaims_copyable_arguments_v = proclaims_copya
#endif // !_CCCL_NO_VARIABLE_TEMPLATES

// Wrapper for a callable to mark it as permitting copied arguments
template <typename F>
struct __callable_permitting_copied_arguments : F
template <typename _F, typename _SFINAE = void>
struct __callable_permitting_copied_arguments : _F
{
using F::operator();
using _F::operator();
};

// TODO(bgruber): maybe just provide one implementation that stores the callable as a member
template <typename _FP>
struct __callable_permitting_copied_arguments<
_FP,
_CUDA_VSTD::__enable_if_t<_CUDA_VSTD::is_pointer<_FP>::value
&& _CUDA_VSTD::is_function<_CUDA_VSTD::__remove_pointer_t<_FP>>::value>>
{
_FP __fp;

// TODO(bgruber): we may just use ::cuda::std::invoke() here
template <typename... _Args>
auto operator()(_Args&&... args) const -> decltype(__fp(_CUDA_VSTD::forward<_Args>(args)...))
{
return __fp(_CUDA_VSTD::forward<_Args>(args)...);
}

static constexpr bool allows_copied_arguments = true;
};

template <typename F>
Expand Down
7 changes: 7 additions & 0 deletions libcudacxx/include/cuda/std/__functional/not_fn.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# pragma system_header
#endif // no system header

#include <cuda/__functional/address_stability.h>
#include <cuda/std/__functional/invoke.h>
#include <cuda/std/__functional/perfect_forward.h>
#include <cuda/std/__type_traits/decay.h>
Expand Down Expand Up @@ -72,4 +73,10 @@ _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 auto not_fn(_Fn&& __f)

_LIBCUDACXX_END_NAMESPACE_STD

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA
template <typename _Fn>
struct proclaims_copyable_arguments<_CUDA_VSTD::__not_fn_t<_Fn>> : ::cuda::proclaims_copyable_arguments<_Fn>
{};
_LIBCUDACXX_END_NAMESPACE_CUDA

#endif // _LIBCUDACXX___FUNCTIONAL_NOT_FN_H
45 changes: 45 additions & 0 deletions libcudacxx/include/cuda/std/__functional/operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@
# pragma system_header
#endif // no system header

#include <cuda/__functional/address_stability.h>
#include <cuda/std/__functional/binary_function.h>
#include <cuda/std/__functional/unary_function.h>
#include <cuda/std/__type_traits/conjunction.h>
#include <cuda/std/__type_traits/is_class.h>
#include <cuda/std/__type_traits/is_enum.h>
#include <cuda/std/__type_traits/is_void.h>
#include <cuda/std/__utility/forward.h>

_LIBCUDACXX_BEGIN_NAMESPACE_STD
Expand Down Expand Up @@ -527,4 +532,44 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT logical_or<void>

_LIBCUDACXX_END_NAMESPACE_STD

_LIBCUDACXX_BEGIN_NAMESPACE_CUDA

template <typename _T>
struct __has_builtin_operators
: _CUDA_VSTD::bool_constant<!_CUDA_VSTD::is_class<_T>::value && !_CUDA_VSTD::is_enum<_T>::value
&& !_CUDA_VSTD::is_void<_T>::value>
{};

#define _LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(functor) \
/*we know what plus<T> etc. does if T is not a type that could have a weird operatorX() */ \
template <typename _T> \
struct proclaims_copyable_arguments<functor<_T>> : __has_builtin_operators<_T> \
{}; \
/*we do not know what plus<void> etc. does, which depends on the types it is invoked on */ \
template <> \
struct proclaims_copyable_arguments<functor<void>> : _CUDA_VSTD::false_type \
{};

_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::plus);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::minus);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::multiplies);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::divides);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::modulus);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::negate);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::bit_and);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::bit_not);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::bit_or);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::bit_xor);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::equal_to);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::not_equal_to);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::less);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::less_equal);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::greater_equal);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::greater);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::logical_and);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::logical_not);
_LIBCUDACXX_MARK_CAN_COPY_ARGUMENTS(_CUDA_VSTD::logical_or);

_LIBCUDACXX_END_NAMESPACE_CUDA

#endif // _LIBCUDACXX___FUNCTIONAL_OPERATIONS_H
2 changes: 2 additions & 0 deletions libcudacxx/include/cuda/std/__functional/ranges_operations.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
_LIBCUDACXX_BEGIN_NAMESPACE_RANGES
_LIBCUDACXX_BEGIN_NAMESPACE_RANGES_ABI

// TODO(bgruber): do we need to specialize proclaims_copyable_arguments here as well?

struct equal_to
{
_LIBCUDACXX_TEMPLATE(class _Tp, class _Up)
Expand Down
170 changes: 164 additions & 6 deletions thrust/testing/address_stability.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,181 @@

#include <unittest/unittest.h>

struct addable
{
_CCCL_HOST_DEVICE friend auto operator+(const addable&, const addable&) -> addable
{
return addable{};
}
};

void TestAddressStabilityLibcuxx()
{
using ::cuda::proclaim_copyable_arguments;
using ::cuda::proclaims_copyable_arguments;

// libcu++ function objects with known types
static_assert(proclaims_copyable_arguments<::cuda::std::plus<int>>::value, "");
static_assert(!proclaims_copyable_arguments<::cuda::std::plus<>>::value, "");

// libcu++ function objects with unknown types
static_assert(!proclaims_copyable_arguments<::cuda::std::plus<addable>>::value, "");
static_assert(!proclaims_copyable_arguments<::cuda::std::plus<>>::value, "");

// libcu++ function objects with unknown types and opt-in
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(cuda::std::plus<addable>{}))>::value,
"");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(cuda::std::plus<>{}))>::value, "");
}
DECLARE_UNITTEST(TestAddressStabilityLibcuxx);

void TestAddressStabilityThrust()
{
using ::cuda::proclaim_copyable_arguments;
using ::cuda::proclaims_copyable_arguments;

// thrust function objects with known types
static_assert(proclaims_copyable_arguments<thrust::plus<int>>::value, "");
static_assert(!proclaims_copyable_arguments<thrust::plus<>>::value, "");

// thrust function objects with unknown types
static_assert(!proclaims_copyable_arguments<thrust::plus<addable>>::value, "");
static_assert(!proclaims_copyable_arguments<thrust::plus<>>::value, "");

// thrust function objects with unknown types and opt-in
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(thrust::plus<addable>{}))>::value,
"");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(thrust::plus<>{}))>::value, "");
}
DECLARE_UNITTEST(TestAddressStabilityThrust);

template <typename T>
struct my_plus
{
_CCCL_HOST_DEVICE auto operator()(T a, T b) const -> T
{
return a + b;
}
};

struct pathological_plus
{
// can copy
_CCCL_HOST_DEVICE auto operator()(int a, int b) const -> int
{
return a + b;
}

// cannot copy
_CCCL_HOST_DEVICE auto operator()(const float& a, const float& b) const -> float
{
return a + b;
}
};

void TestAddressStability()
#define MY_GENERIC_PLUS(suffix, param) \
struct my_generic_plus##suffix \
{ \
template <typename T> \
_CCCL_HOST_DEVICE auto operator()(param a, param b) const -> T \
{ \
return a + b; \
} \
}

MY_GENERIC_PLUS(, T);
MY_GENERIC_PLUS(_lref, T&);
MY_GENERIC_PLUS(_rref, T&&);
MY_GENERIC_PLUS(_clref, const T&);
MY_GENERIC_PLUS(_crref, const T&&);

void TestAddressStabilityUserDefinedFunctionObject()
{
using ::cuda::proclaim_copyable_arguments;
using ::cuda::proclaims_copyable_arguments;

// by-value overload
static_assert(proclaims_copyable_arguments<my_plus<int>>::value, "");

// by-reference overload
static_assert(!proclaims_copyable_arguments<my_plus<int&>>::value, "");
static_assert(!proclaims_copyable_arguments<my_plus<const int&>>::value, "");
static_assert(!proclaims_copyable_arguments<my_plus<int&&>>::value, "");
static_assert(!proclaims_copyable_arguments<my_plus<const int&&>>::value, "");

// by-reference overload with opt-in
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus<int&>{}))>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus<const int&>{}))>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus<int&&>{}))>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus<const int&&>{}))>::value, "");

// pathological overloaded set
static_assert(!proclaims_copyable_arguments<pathological_plus>::value, "");

// call operator is a template
static_assert(!proclaims_copyable_arguments<my_generic_plus>::value, ""); // may be solvable if we know T
static_assert(!proclaims_copyable_arguments<my_generic_plus_lref>::value, "");
static_assert(!proclaims_copyable_arguments<my_generic_plus_rref>::value, "");
static_assert(!proclaims_copyable_arguments<my_generic_plus_clref>::value, "");
static_assert(!proclaims_copyable_arguments<my_generic_plus_crref>::value, "");
}
DECLARE_UNITTEST(TestAddressStabilityUserDefinedFunctionObject);

_CCCL_HOST_DEVICE auto my_plus_func(int a, int b) -> int
{
return a + b;
}

_CCCL_HOST_DEVICE auto my_plus_func_ref(const int& a, const int& b) -> int
{
return a + b;
}

void TestAddressStabilityUserDefinedFunctions()
{
using ::cuda::proclaim_copyable_arguments;
using ::cuda::proclaims_copyable_arguments;

static_assert(!proclaims_copyable_arguments<thrust::plus<int>>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(thrust::plus<int>{}))>::value, "");
// user-defined function types
static_assert(proclaims_copyable_arguments<int(int, int)>::value, "");
static_assert(!proclaims_copyable_arguments<int(const int&, const int&)>::value, "");

static_assert(proclaims_copyable_arguments<decltype(my_plus_func)>::value, "");
static_assert(!proclaims_copyable_arguments<decltype(my_plus_func_ref)>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus_func_ref))>::value, "");

// user-defined function pointer types
static_assert(proclaims_copyable_arguments<int (*)(int, int)>::value, "");
static_assert(!proclaims_copyable_arguments<int (*)(const int&, const int&)>::value, "");

static_assert(proclaims_copyable_arguments<decltype(&my_plus_func)>::value, "");
static_assert(!proclaims_copyable_arguments<decltype(&my_plus_func_ref)>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(&my_plus_func_ref))>::value, "");

// TODO(bgruber): test user-defined function reference types?
}
DECLARE_UNITTEST(TestAddressStabilityUserDefinedFunctions);

struct my_plus_proxy_ref
{
int* a_ptr;

// has a by-value argument
auto operator()(::cuda::std::reference_wrapper<int> a, ::cuda::std::reference_wrapper<int> b) const -> int
{
int* address = &a.get(); // allows to recover the address of the argument
ASSERT_EQUAL(address, a_ptr);
return a + b;
}
};

void TestAddressStabilityProxyReferences()
{
using ::cuda::proclaims_copyable_arguments;

static_assert(!proclaims_copyable_arguments<my_plus>::value, "");
static_assert(proclaims_copyable_arguments<decltype(proclaim_copyable_arguments(my_plus{}))>::value, "");
int a = 1;
int b = 2;
ASSERT_EQUAL(my_plus_proxy_ref{&a}(a, b), 3);
static_assert(!proclaims_copyable_arguments<my_plus_proxy_ref>::value, ""); // TODO
}
DECLARE_UNITTEST(TestAddressStability);
DECLARE_UNITTEST(TestAddressStabilityProxyReferences);
Loading

0 comments on commit 7a23e84

Please sign in to comment.