1616
1717// ================================================================================
1818// this file has been auto-generated, do not modify its contents!
19- // date: 2025-08-12 13:55:51.042675
20- // git hash: 714ca6b5fd63ef3497d80ef018cb9a9460c91391
19+ // date: 2025-08-21 10: 13:04.148230
20+ // git hash: 4d0d49cad7962d3f9ba4f2a0abfa2faea3ec7efa
2121// ================================================================================
2222
2323#ifndef KERNEL_FLOAT_MACROS_H
@@ -1823,12 +1823,14 @@ KERNEL_FLOAT_INLINE zip_common_type<F, L, R> zip_common(F fun, const L& left, co
18231823 template <typename L, typename R, typename C = promote_t <L, vector_value_type<R>>, typename E> \
18241824 KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, vector<L, E>, R> operator OP ( \
18251825 const vector<L, E>& left, \
1826- const R& right) { \
1826+ const R \
1827+ & right) { \
18271828 return zip_common (ops::NAME<C> {}, left, right); \
18281829 } \
18291830 template <typename L, typename R, typename C = promote_t <vector_value_type<L>, R>, typename E> \
18301831 KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, L, vector<R, E>> operator OP ( \
1831- const L& left, \
1832+ const L \
1833+ & left, \
18321834 const vector<R, E>& right) { \
18331835 return zip_common (ops::NAME<C> {}, left, right); \
18341836 }
@@ -1869,16 +1871,16 @@ static constexpr bool is_vector_assign_allowed =
18691871 >;
18701872// clang-format on
18711873
1872- #define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP (NAME, OP ) \
1873- template < \
1874- typename T, \
1875- typename E, \
1876- typename R, \
1877- typename = enable_if_t <is_vector_assign_allowed<ops::NAME, T, E, R>>> \
1878- KERNEL_FLOAT_INLINE vector<T, E>& operator OP (vector<T, E>& lhs, const R& rhs) { \
1879- using F = ops::NAME<T>; \
1880- lhs = zip_common (F {}, lhs, rhs); \
1881- return lhs; \
1874+ #define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP (NAME, OP ) \
1875+ template < \
1876+ typename T, \
1877+ typename E, \
1878+ typename R, \
1879+ typename = enable_if_t <is_vector_assign_allowed<ops::NAME, T, E, R>>> \
1880+ KERNEL_FLOAT_INLINE vector<T, E>& operator OP (vector<T, E>& lhs, const R & rhs) { \
1881+ using F = ops::NAME<T>; \
1882+ lhs = zip_common (F {}, lhs, rhs); \
1883+ return lhs; \
18821884 }
18831885
18841886KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP (add, +=)
@@ -2975,7 +2977,8 @@ struct vector_ref<T, N, const U, Alignment> {
29752977 template <typename T, size_t N, typename U, size_t Alignment, typename V> \
29762978 KERNEL_FLOAT_INLINE vector_ref<T, N, U, Alignment> operator OP_ASSIGN ( \
29772979 vector_ref<T, N, U, Alignment> ptr, \
2978- const V& value) { \
2980+ const V \
2981+ & value) { \
29792982 ptr.write (ptr.read () OP value); \
29802983 return ptr; \
29812984 }
@@ -3023,19 +3026,27 @@ struct vector_ptr {
30233026 vector_ptr () = default;
30243027
30253028 /* *
3026- * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Align` elements .
3029+ * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Alignment` .
30273030 */
3031+ template <typename V = U, enable_if_t <Alignment != alignof (V), int > = 0 >
30283032 KERNEL_FLOAT_INLINE explicit vector_ptr (pointer_type p) : data_(p) {}
30293033
3034+ /* *
3035+ * Constructor from a given pointer. This assumes that the alignment of the pointer equals `Alignment`.
3036+ */
3037+ template <typename V = U, enable_if_t <Alignment == alignof (V), int > = 0 >
3038+ KERNEL_FLOAT_INLINE vector_ptr (pointer_type p) : data_(p) {}
3039+
30303040 /* *
30313041 * Constructs a vector_ptr from another vector_ptr with potentially different alignment and type. This constructor
30323042 * only allows conversion if the alignment of the source is greater than or equal to the alignment of the target.
30333043 */
3034- template <typename T2, size_t N2, size_t A2>
3035- KERNEL_FLOAT_INLINE vector_ptr (
3036- vector_ptr<T2, N2, U, A2> p,
3037- enable_if_t <detail::alignment_divisible(A2, Alignment), int> = {}) :
3038- data_ (p.get()) {}
3044+ template <
3045+ typename T2,
3046+ size_t N2,
3047+ size_t A2,
3048+ enable_if_t <detail::alignment_divisible(A2, Alignment), int > = 0 >
3049+ KERNEL_FLOAT_INLINE vector_ptr (vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
30393050
30403051 /* *
30413052 * Shorthand for `at(0)`.
@@ -3109,19 +3120,25 @@ struct vector_ptr<T, N, const U, Alignment> {
31093120
31103121 vector_ptr () = default;
31113122
3123+ template <typename V = U, enable_if_t <Alignment != alignof (V), int > = 0 >
31123124 KERNEL_FLOAT_INLINE explicit vector_ptr (pointer_type p) : data_(p) {}
31133125
3114- template <typename T2, size_t N2, size_t A2>
3115- KERNEL_FLOAT_INLINE vector_ptr (
3116- vector_ptr<T2, N2, const U, A2> p,
3117- enable_if_t <detail::alignment_divisible(A2, Alignment), int> = {}) :
3118- data_ (p.get()) {}
3126+ template <typename V = U, enable_if_t <Alignment == alignof (V), int > = 0 >
3127+ KERNEL_FLOAT_INLINE vector_ptr (pointer_type p) : data_(p) {}
31193128
3120- template <typename T2, size_t N2, size_t A2>
3121- KERNEL_FLOAT_INLINE vector_ptr (
3122- vector_ptr<T2, N2, U, A2> p,
3123- enable_if_t <detail::alignment_divisible(A2, Alignment), int> = {}) :
3124- data_ (p.get()) {}
3129+ template <
3130+ typename T2,
3131+ size_t N2,
3132+ size_t A2,
3133+ enable_if_t <detail::alignment_divisible(A2, Alignment), int > = 0 >
3134+ KERNEL_FLOAT_INLINE vector_ptr (vector_ptr<T2, N2, const U, A2> p) : data_(p.get()) {}
3135+
3136+ template <
3137+ typename T2,
3138+ size_t N2,
3139+ size_t A2,
3140+ enable_if_t <detail::alignment_divisible(A2, Alignment), int > = 0 >
3141+ KERNEL_FLOAT_INLINE vector_ptr (vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
31253142
31263143 KERNEL_FLOAT_INLINE vector_ref<value_type, N, const U, Alignment> operator *() const {
31273144 return vector_ref<value_type, N, const U, Alignment> {data_};
@@ -3175,7 +3192,7 @@ KERNEL_FLOAT_INLINE vector_ptr<T, N, U, A>& operator+=(vector_ptr<T, N, U, A>& p
31753192/* *
31763193 * Creates a `vector_ptr<T, N>` from a raw pointer `T*` by asserting a specific alignment `N`.
31773194 *
3178- * @tparam N The alignment constraint for the vector_ptr. Defaults to KERNEL_FLOAT_MAX_ALIGNMENT.
3195+ * @tparam N The alignment constraint for the vector_ptr.
31793196 * @tparam T The type of the elements pointed to by the raw pointer.
31803197 */
31813198template <size_t N, typename T>
@@ -3199,6 +3216,9 @@ KERNEL_FLOAT_INLINE vector_ptr<T, 1, T, KERNEL_FLOAT_MAX_ALIGNMENT> assert_align
31993216template <typename T, size_t N = 1 , typename U = T, size_t Align = N>
32003217using vec_ptr = vector_ptr<T, N, U, Align * sizeof (U)>;
32013218
3219+ template <typename T, typename U = T>
3220+ using view_ptr = vector_ptr<T, 1 , U, alignof (U)>;
3221+
32023222#if defined(__cpp_deduction_guides)
32033223template <typename T>
32043224vector_ptr (T*) -> vector_ptr<T, 1, T>;
@@ -4749,6 +4769,7 @@ KERNEL_FLOAT_DEVICE half2_t normalize_trig_input(half2_t x) {
47494769 static constexpr double ONE_OVER_TWOPI = 0.15915494309189535 ;
47504770 static constexpr double OFFSET = -2042.0 ;
47514771
4772+ // ws = (x / 2pi) - ((x / 2pi + OFFSET) - OFFSET)
47524773 half2_t ws = __hfma2 (x, make_half2 (-ONE_OVER_TWOPI), make_half2 (-OFFSET)) + make_half2 (OFFSET);
47534774 return __hfma2 (x, make_half2 (ONE_OVER_TWOPI), ws);
47544775}
0 commit comments