From 566593d3612c188d20256911a854ceb28259935e Mon Sep 17 00:00:00 2001 From: Georgy Evtushenko Date: Wed, 7 Jun 2023 10:47:39 +0400 Subject: [PATCH] Primitive --- cub/device/dispatch/dispatch_scan.cuh | 33 +++++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh index fc2f58126..8974d6715 100644 --- a/cub/device/dispatch/dispatch_scan.cuh +++ b/cub/device/dispatch/dispatch_scan.cuh @@ -206,17 +206,19 @@ namespace detail namespace scan { // TODO Only for sum/max as ScanOp? -template +template ::PRIMITIVE> struct sm90_tuning { static constexpr int threads = 128; static constexpr int items = 15; - using delay_constructor = detail::default_delay_constructor_t; + using delay_constructor = detail::exponential_backoff_constructor_t<32, 1140>; }; template -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 192; static constexpr int items = 22; @@ -225,7 +227,7 @@ struct sm90_tuning }; template -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 512; static constexpr int items = 12; @@ -234,7 +236,7 @@ struct sm90_tuning }; template -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 128; static constexpr int items = 24; @@ -243,7 +245,7 @@ struct sm90_tuning }; template -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 224; static constexpr int items = 24; @@ -251,17 +253,28 @@ struct sm90_tuning using delay_constructor = detail::fixed_delay_constructor_t<632, 1290>; }; -template -struct sm90_tuning +#if CUB_IS_INT128_ENABLED +template <> +struct sm90_tuning<__int128_t, 16, false> +{ + static constexpr int threads = 576; + static constexpr int items = 21; + + using delay_constructor = detail::fixed_delay_constructor_t<860, 630>; +}; + +template <> +struct sm90_tuning<__uint128_t, 16, false> { static constexpr int threads = 576; static constexpr int items = 21; using delay_constructor = detail::fixed_delay_constructor_t<860, 630>; }; +#endif template <> -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 128; static constexpr int items = 24; @@ -270,7 +283,7 @@ struct sm90_tuning }; template <> -struct sm90_tuning +struct sm90_tuning { static constexpr int threads = 224; static constexpr int items = 24;