From bc4e182a042cf38e1e18d483e6f3175359e0c8ba Mon Sep 17 00:00:00 2001 From: AuroraPerego Date: Thu, 5 Dec 2024 16:10:49 +0100 Subject: [PATCH] add math functions for complex type for oneAPI backend --- include/alpaka/math/Complex.hpp | 313 ++++++++++++++++- .../alpaka/math/MathUniformCudaHipBuiltIn.hpp | 325 +----------------- 2 files changed, 313 insertions(+), 325 deletions(-) diff --git a/include/alpaka/math/Complex.hpp b/include/alpaka/math/Complex.hpp index f265c7bfa231..624dd565cfbe 100644 --- a/include/alpaka/math/Complex.hpp +++ b/include/alpaka/math/Complex.hpp @@ -1,4 +1,4 @@ -/* Copyright 2022 Sergei Bastrakov +/* Copyright 2024 Sergei Bastrakov, Aurora Perego * SPDX-License-Identifier: MPL-2.0 */ @@ -579,4 +579,315 @@ namespace alpaka } // namespace internal using internal::Complex; + +#if defined(ALPAKA_ACC_SYCL_ENABLED) || defined(ALPAKA_ACC_GPU_CUDA_ENABLED) || defined(ALPAKA_ACC_GPU_HIP_ENABLED) + + namespace math::trait + { + + //! The abs trait specialization for complex types. + template + struct Abs> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + return sqrt(ctx, arg.real() * arg.real() + arg.imag() * arg.imag()); + } + }; + + //! The acos trait specialization for complex types. + template + struct Acos> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // This holds everywhere, including the branch cuts: acos(z) = -i * ln(z + i * sqrt(1 - z^2)) + return Complex{static_cast(0.0), static_cast(-1.0)} + * log( + ctx, + arg + + Complex{static_cast(0.0), static_cast(1.0)} + * sqrt(ctx, static_cast(1.0) - arg * arg)); + } + }; + + //! The acosh trait specialization for complex types. + template + struct Acosh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // acos(z) = ln(z + sqrt(z-1) * sqrt(z+1)) + return log(ctx, arg + sqrt(ctx, arg - static_cast(1.0)) * sqrt(ctx, arg + static_cast(1.0))); + } + }; + + //! The arg Complex specialization for complex types. + template + struct Arg> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& argument) + { + return atan2(ctx, argument.imag(), argument.real()); + } + }; + + //! The asin trait specialization for complex types. + template + struct Asin> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // This holds everywhere, including the branch cuts: asin(z) = i * ln(sqrt(1 - z^2) - i * z) + return Complex{static_cast(0.0), static_cast(1.0)} + * log( + ctx, + sqrt(ctx, static_cast(1.0) - arg * arg) + - Complex{static_cast(0.0), static_cast(1.0)} * arg); + } + }; + + //! The asinh trait specialization for complex types. + template + struct Asinh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // asinh(z) = ln(z + sqrt(z^2 + 1)) + return log(ctx, arg + sqrt(ctx, arg * arg + static_cast(1.0))); + } + }; + + //! The atan trait specialization for complex types. + template + struct Atan> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // This holds everywhere, including the branch cuts: atan(z) = -i/2 * ln((i - z) / (i + z)) + return Complex{static_cast(0.0), static_cast(-0.5)} + * log( + ctx, + (Complex{static_cast(0.0), static_cast(1.0)} - arg) + / (Complex{static_cast(0.0), static_cast(1.0)} + arg)); + } + }; + + //! The atanh trait specialization for complex types. + template + struct Atanh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // atanh(z) = 0.5 * (ln(1 + z) - ln(1 - z)) + return static_cast(0.5) + * (log(ctx, static_cast(1.0) + arg) - log(ctx, static_cast(1.0) - arg)); + } + }; + + //! The conj specialization for complex types. + template + struct Conj> + { + ALPAKA_FN_ACC auto operator()(TAcc const& /* conj_ctx */, Complex const& arg) + { + return Complex{arg.real(), -arg.imag()}; + } + }; + + //! The cos trait specialization for complex types. + template + struct Cos> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // cos(z) = 0.5 * (exp(i * z) + exp(-i * z)) + return T(0.5) + * (exp(ctx, Complex{static_cast(0.0), static_cast(1.0)} * arg) + + exp(ctx, Complex{static_cast(0.0), static_cast(-1.0)} * arg)); + } + }; + + //! The cosh trait specialization for complex types. + template + struct Cosh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // cosh(z) = 0.5 * (exp(z) + exp(-z)) + return T(0.5) * (exp(ctx, arg) + exp(ctx, static_cast(-1.0) * arg)); + } + }; + + //! The exp trait specialization for complex types. + template + struct Exp> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // exp(z) = exp(x + iy) = exp(x) * (cos(y) + i * sin(y)) + auto re = T{}, im = T{}; + sincos(ctx, arg.imag(), im, re); + return exp(ctx, arg.real()) * Complex{re, im}; + } + }; + + //! The log trait specialization for complex types. + template + struct Log> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& argument) + { + // Branch cut along the negative real axis (same as for std::complex), + // principal value of ln(z) = ln(|z|) + i * arg(z) + return log(ctx, abs(ctx, argument)) + + Complex{static_cast(0.0), static_cast(1.0)} * arg(ctx, argument); + } + }; + + //! The log2 trait specialization for complex types. + template + struct Log2> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& argument) + { + return log(ctx, argument) / log(ctx, static_cast(2)); + } + }; + + //! The log10 trait specialization for complex types. + template + struct Log10> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& argument) + { + return log(ctx, argument) / log(ctx, static_cast(10)); + } + }; + + //! The pow trait specialization for complex types. + template + struct Pow, Complex> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& base, Complex const& exponent) + { + // Type promotion matching rules of complex std::pow but simplified given our math only supports float + // and double, no long double. + using Promoted + = Complex && is_decayed_v, float, double>>; + // pow(z1, z2) = e^(z2 * log(z1)) + return exp(ctx, Promoted{exponent} * log(ctx, Promoted{base})); + } + }; + + //! The pow trait specialization for complex and real types. + template + struct Pow, U> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& base, U const& exponent) + { + return pow(ctx, base, Complex{exponent}); + } + }; + + //! The pow trait specialization for real and complex types. + template + struct Pow> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, T const& base, Complex const& exponent) + { + return pow(ctx, Complex{base}, exponent); + } + }; + + //! The rsqrt trait specialization for complex types. + template + struct Rsqrt> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + return static_cast(1.0) / sqrt(ctx, arg); + } + }; + + //! The sin trait specialization for complex types. + template + struct Sin> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // sin(z) = (exp(i * z) - exp(-i * z)) / 2i + return (exp(ctx, Complex{static_cast(0.0), static_cast(1.0)} * arg) + - exp(ctx, Complex{static_cast(0.0), static_cast(-1.0)} * arg)) + / Complex{static_cast(0.0), static_cast(2.0)}; + } + }; + + //! The sinh trait specialization for complex types. + template + struct Sinh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // sinh(z) = (exp(z) - exp(-i * z)) / 2 + return (exp(ctx, arg) - exp(ctx, static_cast(-1.0) * arg)) / static_cast(2.0); + } + }; + + //! The sincos trait specialization for complex types. + template + struct SinCos> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg, Complex& result_sin, Complex& result_cos) + -> void + { + result_sin = sin(ctx, arg); + result_cos = cos(ctx, arg); + } + }; + + //! The sqrt trait specialization for complex types. + template + struct Sqrt> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& argument) + { + // Branch cut along the negative real axis (same as for std::complex), + // principal value of sqrt(z) = sqrt(|z|) * e^(i * arg(z) / 2) + auto const halfArg = T(0.5) * arg(ctx, argument); + auto re = T{}, im = T{}; + sincos(ctx, halfArg, im, re); + return sqrt(ctx, abs(ctx, argument)) * Complex(re, im); + } + }; + + //! The tan trait specialization for complex types. + template + struct Tan> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // tan(z) = i * (e^-iz - e^iz) / (e^-iz + e^iz) = i * (1 - e^2iz) / (1 + e^2iz) + // Warning: this straightforward implementation can easily result in NaN as 0/0 or inf/inf. + auto const expValue = exp(ctx, Complex{static_cast(0.0), static_cast(2.0)} * arg); + return Complex{static_cast(0.0), static_cast(1.0)} * (static_cast(1.0) - expValue) + / (static_cast(1.0) + expValue); + } + }; + + //! The tanh trait specialization for complex types. + template + struct Tanh> + { + ALPAKA_FN_ACC auto operator()(TAcc const& ctx, Complex const& arg) + { + // tanh(z) = (e^z - e^-z)/(e^z+e^-z) + return (exp(ctx, arg) - exp(ctx, static_cast(-1.0) * arg)) + / (exp(ctx, arg) + exp(ctx, static_cast(-1.0) * arg)); + } + }; + } // namespace math::trait + +#endif + } // namespace alpaka diff --git a/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp b/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp index 292795ff22d7..c2b96e5c411a 100644 --- a/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp +++ b/include/alpaka/math/MathUniformCudaHipBuiltIn.hpp @@ -1,4 +1,4 @@ -/* Copyright 2023 Axel Huebl, Benjamin Worpitz, Matthias Werner, Bert Wesarg, Valentin Gehrke, René Widera, +/* Copyright 2024 Axel Huebl, Benjamin Worpitz, Matthias Werner, Bert Wesarg, Valentin Gehrke, René Widera, * Jan Stephan, Andrea Bocci, Bernhard Manfred Gruber, Jeffrey Kelling, Sergei Bastrakov * SPDX-License-Identifier: MPL-2.0 */ @@ -305,18 +305,6 @@ namespace alpaka::math } }; - //! The CUDA abs trait specialization for complex types. - template - struct Abs> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - return sqrt(ctx, arg.real() * arg.real() + arg.imag() * arg.imag()); - } - }; - //! The CUDA acos trait specialization for real types. template struct Acos>> @@ -334,19 +322,6 @@ namespace alpaka::math } }; - //! The CUDA acos trait specialization for complex types. - template - struct Acos> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // This holds everywhere, including the branch cuts: acos(z) = -i * ln(z + i * sqrt(1 - z^2)) - return Complex{0.0, -1.0} * log(ctx, arg + Complex{0.0, 1.0} * sqrt(ctx, T(1.0) - arg * arg)); - } - }; - //! The CUDA acosh trait specialization for real types. template struct Acosh>> @@ -364,19 +339,6 @@ namespace alpaka::math } }; - //! The CUDA acosh trait specialization for complex types. - template - struct Acosh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // acos(z) = ln(z + sqrt(z-1) * sqrt(z+1)) - return log(ctx, arg + sqrt(ctx, arg - static_cast(1.0)) * sqrt(ctx, arg + static_cast(1.0))); - } - }; - //! The CUDA arg trait specialization for real types. template struct Arg>> @@ -390,18 +352,6 @@ namespace alpaka::math } }; - //! The CUDA arg Complex specialization for complex types. - template - struct Arg> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& argument) - { - return atan2(ctx, argument.imag(), argument.real()); - } - }; - //! The CUDA asin trait specialization for real types. template struct Asin>> @@ -419,19 +369,6 @@ namespace alpaka::math } }; - //! The CUDA asin trait specialization for complex types. - template - struct Asin> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // This holds everywhere, including the branch cuts: asin(z) = i * ln(sqrt(1 - z^2) - i * z) - return Complex{0.0, 1.0} * log(ctx, sqrt(ctx, T(1.0) - arg * arg) - Complex{0.0, 1.0} * arg); - } - }; - //! The CUDA asinh trait specialization for real types. template struct Asinh>> @@ -449,19 +386,6 @@ namespace alpaka::math } }; - //! The CUDA asinh trait specialization for complex types. - template - struct Asinh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // asinh(z) = ln(z + sqrt(z^2 + 1)) - return log(ctx, arg + sqrt(ctx, arg * arg + static_cast(1.0))); - } - }; - //! The CUDA atan trait specialization for real types. template struct Atan>> @@ -479,19 +403,6 @@ namespace alpaka::math } }; - //! The CUDA atan trait specialization for complex types. - template - struct Atan> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // This holds everywhere, including the branch cuts: atan(z) = -i/2 * ln((i - z) / (i + z)) - return Complex{0.0, -0.5} * log(ctx, (Complex{0.0, 1.0} - arg) / (Complex{0.0, 1.0} + arg)); - } - }; - //! The CUDA atanh trait specialization for real types. template struct Atanh>> @@ -509,20 +420,6 @@ namespace alpaka::math } }; - //! The CUDA atanh trait specialization for complex types. - template - struct Atanh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // atanh(z) = 0.5 * (ln(1 + z) - ln(1 - z)) - return static_cast(0.5) - * (log(ctx, static_cast(1.0) + arg) - log(ctx, static_cast(1.0) - arg)); - } - }; - //! The CUDA atan2 trait specialization. template struct Atan2< @@ -591,16 +488,6 @@ namespace alpaka::math } }; - //! The CUDA conj specialization for complex types. - template - struct Conj> - { - __host__ __device__ auto operator()(ConjUniformCudaHipBuiltIn const& /* conj_ctx */, Complex const& arg) - { - return Complex{arg.real(), -arg.imag()}; - } - }; - //! The CUDA copysign trait specialization for real types. template struct Copysign< @@ -642,19 +529,6 @@ namespace alpaka::math } }; - //! The CUDA cos trait specialization for complex types. - template - struct Cos> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // cos(z) = 0.5 * (exp(i * z) + exp(-i * z)) - return T(0.5) * (exp(ctx, Complex{0.0, 1.0} * arg) + exp(ctx, Complex{0.0, -1.0} * arg)); - } - }; - //! The CUDA cosh trait specialization for real types. template struct Cosh>> @@ -672,19 +546,6 @@ namespace alpaka::math } }; - //! The CUDA cosh trait specialization for complex types. - template - struct Cosh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // cosh(z) = 0.5 * (exp(z) + exp(-z)) - return T(0.5) * (exp(ctx, arg) + exp(ctx, static_cast(-1.0) * arg)); - } - }; - //! The CUDA erf trait specialization. template struct Erf>> @@ -719,21 +580,6 @@ namespace alpaka::math } }; - //! The CUDA exp trait specialization for complex types. - template - struct Exp> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // exp(z) = exp(x + iy) = exp(x) * (cos(y) + i * sin(y)) - auto re = T{}, im = T{}; - sincos(ctx, arg.imag(), im, re); - return exp(ctx, arg.real()) * Complex{re, im}; - } - }; - //! The CUDA floor trait specialization. template struct Floor>> @@ -855,20 +701,6 @@ namespace alpaka::math } }; - //! The CUDA log trait specialization for complex types. - template - struct Log> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& argument) - { - // Branch cut along the negative real axis (same as for std::complex), - // principal value of ln(z) = ln(|z|) + i * arg(z) - return log(ctx, abs(ctx, argument)) + Complex{0.0, 1.0} * arg(ctx, argument); - } - }; - //! The CUDA log2 trait specialization for real types. template struct Log2>> @@ -903,18 +735,6 @@ namespace alpaka::math } }; - //! The CUDA log10 trait specialization for complex types. - template - struct Log10> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& argument) - { - return log(ctx, argument) / log(ctx, static_cast(10)); - } - }; - //! The CUDA max trait specialization. template struct Max< @@ -1007,47 +827,6 @@ namespace alpaka::math } }; - //! The CUDA pow trait specialization for complex types. - template - struct Pow, Complex> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& base, Complex const& exponent) - { - // Type promotion matching rules of complex std::pow but simplified given our math only supports float - // and double, no long double. - using Promoted - = Complex && is_decayed_v, float, double>>; - // pow(z1, z2) = e^(z2 * log(z1)) - return exp(ctx, Promoted{exponent} * log(ctx, Promoted{base})); - } - }; - - //! The CUDA pow trait specialization for complex and real types. - template - struct Pow, U> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& base, U const& exponent) - { - return pow(ctx, base, Complex{exponent}); - } - }; - - //! The CUDA pow trait specialization for real and complex types. - template - struct Pow> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, T const& base, Complex const& exponent) - { - return pow(ctx, Complex{base}, exponent); - } - }; - //! The CUDA remainder trait specialization. template struct Remainder< @@ -1144,18 +923,6 @@ namespace alpaka::math } }; - //! The CUDA rsqrt trait specialization for complex types. - template - struct Rsqrt> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - return T{1.0} / sqrt(ctx, arg); - } - }; - //! The CUDA sin trait specialization for real types. template struct Sin>> @@ -1173,20 +940,6 @@ namespace alpaka::math } }; - //! The CUDA sin trait specialization for complex types. - template - struct Sin> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // sin(z) = (exp(i * z) - exp(-i * z)) / 2i - return (exp(ctx, Complex{0.0, 1.0} * arg) - exp(ctx, Complex{0.0, -1.0} * arg)) - / Complex{0.0, 2.0}; - } - }; - //! The CUDA sinh trait specialization for real types. template struct Sinh>> @@ -1204,19 +957,6 @@ namespace alpaka::math } }; - //! The CUDA sinh trait specialization for complex types. - template - struct Sinh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // sinh(z) = (exp(z) - exp(-i * z)) / 2 - return (exp(ctx, arg) - exp(ctx, static_cast(-1.0) * arg)) / static_cast(2.0); - } - }; - //! The CUDA sincos trait specialization for real types. template struct SinCos>> @@ -1236,23 +976,6 @@ namespace alpaka::math } }; - //! The CUDA sincos trait specialization for complex types. - template - struct SinCos> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()( - TCtx const& ctx, - Complex const& arg, - Complex& result_sin, - Complex& result_cos) -> void - { - result_sin = sin(ctx, arg); - result_cos = cos(ctx, arg); - } - }; - //! The CUDA sqrt trait specialization for real types. template struct Sqrt>> @@ -1270,23 +993,6 @@ namespace alpaka::math } }; - //! The CUDA sqrt trait specialization for complex types. - template - struct Sqrt> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& argument) - { - // Branch cut along the negative real axis (same as for std::complex), - // principal value of sqrt(z) = sqrt(|z|) * e^(i * arg(z) / 2) - auto const halfArg = T(0.5) * arg(ctx, argument); - auto re = T{}, im = T{}; - sincos(ctx, halfArg, im, re); - return sqrt(ctx, abs(ctx, argument)) * Complex(re, im); - } - }; - //! The CUDA tan trait specialization for real types. template struct Tan>> @@ -1304,21 +1010,6 @@ namespace alpaka::math } }; - //! The CUDA tan trait specialization for complex types. - template - struct Tan> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // tan(z) = i * (e^-iz - e^iz) / (e^-iz + e^iz) = i * (1 - e^2iz) / (1 + e^2iz) - // Warning: this straightforward implementation can easily result in NaN as 0/0 or inf/inf. - auto const expValue = exp(ctx, Complex{0.0, 2.0} * arg); - return Complex{0.0, 1.0} * (T{1.0} - expValue) / (T{1.0} + expValue); - } - }; - //! The CUDA tanh trait specialization for real types. template struct Tanh>> @@ -1336,20 +1027,6 @@ namespace alpaka::math } }; - //! The CUDA tanh trait specialization for complex types. - template - struct Tanh> - { - //! Take context as original (accelerator) type, since we call other math functions - template - __host__ __device__ auto operator()(TCtx const& ctx, Complex const& arg) - { - // tanh(z) = (e^z - e^-z)/(e^z+e^-z) - return (exp(ctx, arg) - exp(ctx, static_cast(-1.0) * arg)) - / (exp(ctx, arg) + exp(ctx, static_cast(-1.0) * arg)); - } - }; - //! The CUDA trunc trait specialization. template struct Trunc>>