diff --git a/include/gridtools/fn/cartesian.hpp b/include/gridtools/fn/cartesian.hpp index f844795a0..1d08b999a 100644 --- a/include/gridtools/fn/cartesian.hpp +++ b/include/gridtools/fn/cartesian.hpp @@ -16,6 +16,10 @@ #include "./common_interface.hpp" #include "./executor.hpp" +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#include "../common/cuda_type_traits.hpp" +#endif + namespace gridtools::fn { namespace cartesian::dim { using i = integral_constant; @@ -44,6 +48,12 @@ namespace gridtools::fn { template GT_FUNCTION auto deref(iterator const &it) { +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 + if constexpr (std::is_pointer_v && + is_texture_type>>::value) { + return __ldg(it.m_ptr); + } +#endif return *it.m_ptr; } diff --git a/include/gridtools/fn/sid_neighbor_table.hpp b/include/gridtools/fn/sid_neighbor_table.hpp index 1cf7cd480..a422a9680 100644 --- a/include/gridtools/fn/sid_neighbor_table.hpp +++ b/include/gridtools/fn/sid_neighbor_table.hpp @@ -16,6 +16,10 @@ #include "../fn/unstructured.hpp" #include "../sid/concept.hpp" +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#include "../common/cuda_type_traits.hpp" +#endif + namespace gridtools::fn::sid_neighbor_table { namespace sid_neighbor_table_impl_ { template (table.strides), index); for (std::size_t element_idx = 0; element_idx < MaxNumNeighbors; ++element_idx) { - neighbors[element_idx] = *ptr; +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 + if constexpr (std::is_pointer_v && + is_texture_type>>::value) + neighbors[element_idx] = __ldg(ptr); + else +#endif + neighbors[element_idx] = *ptr; sid::shift(ptr, sid::get_stride(table.strides), 1_c); } return neighbors; diff --git a/include/gridtools/fn/unstructured.hpp b/include/gridtools/fn/unstructured.hpp index 3793f95ee..6148719f0 100644 --- a/include/gridtools/fn/unstructured.hpp +++ b/include/gridtools/fn/unstructured.hpp @@ -20,6 +20,10 @@ #include "./executor.hpp" #include "./neighbor_table.hpp" +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +#include "../common/cuda_type_traits.hpp" +#endif + namespace gridtools::fn { namespace unstructured::dim { using horizontal = integral_constant; @@ -80,7 +84,14 @@ namespace gridtools::fn { GT_FUNCTION constexpr auto deref(iterator const &it) { GT_PROMISE(can_deref(it)); decltype(auto) stride = host_device::at_key(sid::get_stride(it.m_strides)); - return *sid::shifted(it.m_ptr, stride, it.m_index); + auto ptr = sid::shifted(it.m_ptr, stride, it.m_index); +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 + if constexpr (std::is_pointer_v && + is_texture_type>>::value) { + return __ldg(ptr); + } +#endif + return *ptr; } template