Add view_ptr that allows viewing a pointer U* through a different data type T

stijnh · stijnh · commit f0f74b83e7ab · 2025-08-21T10:13:50.000+02:00
diff --git a/include/kernel_float/approx.h b/include/kernel_float/approx.h
@@ -127,6 +127,7 @@ KERNEL_FLOAT_DEVICE half2_t normalize_trig_input(half2_t x) {
     static constexpr double ONE_OVER_TWOPI = 0.15915494309189535;
     static constexpr double OFFSET = -2042.0;
 
+    // ws = (x / 2pi) - ((x / 2pi + OFFSET) - OFFSET)
     half2_t ws = __hfma2(x, make_half2(-ONE_OVER_TWOPI), make_half2(-OFFSET)) + make_half2(OFFSET);
     return __hfma2(x, make_half2(ONE_OVER_TWOPI), ws);
 }
diff --git a/include/kernel_float/binops.h b/include/kernel_float/binops.h
@@ -118,12 +118,14 @@ KERNEL_FLOAT_INLINE zip_common_type<F, L, R> zip_common(F fun, const L& left, co
     template<typename L, typename R, typename C = promote_t<L, vector_value_type<R>>, typename E> \
     KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, vector<L, E>, R> operator OP(               \
         const vector<L, E>& left,                                                                 \
-        const R& right) {                                                                         \
+        const R                                                                                   \
+        & right) {                                                                                \
         return zip_common(ops::NAME<C> {}, left, right);                                          \
     }                                                                                             \
     template<typename L, typename R, typename C = promote_t<vector_value_type<L>, R>, typename E> \
     KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, L, vector<R, E>> operator OP(               \
-        const L& left,                                                                            \
+        const L                                                                                   \
+        & left,                                                                                   \
         const vector<R, E>& right) {                                                              \
         return zip_common(ops::NAME<C> {}, left, right);                                          \
     }
@@ -164,16 +166,16 @@ static constexpr bool is_vector_assign_allowed =
         >;
 // clang-format on
 
-#define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(NAME, OP)                               \
-    template<                                                                        \
-        typename T,                                                                  \
-        typename E,                                                                  \
-        typename R,                                                                  \
-        typename = enable_if_t<is_vector_assign_allowed<ops::NAME, T, E, R>>>        \
-    KERNEL_FLOAT_INLINE vector<T, E>& operator OP(vector<T, E>& lhs, const R& rhs) { \
-        using F = ops::NAME<T>;                                                      \
-        lhs = zip_common(F {}, lhs, rhs);                                            \
-        return lhs;                                                                  \
+#define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(NAME, OP)                                \
+    template<                                                                         \
+        typename T,                                                                   \
+        typename E,                                                                   \
+        typename R,                                                                   \
+        typename = enable_if_t<is_vector_assign_allowed<ops::NAME, T, E, R>>>         \
+    KERNEL_FLOAT_INLINE vector<T, E>& operator OP(vector<T, E>& lhs, const R & rhs) { \
+        using F = ops::NAME<T>;                                                       \
+        lhs = zip_common(F {}, lhs, rhs);                                             \
+        return lhs;                                                                   \
     }
 
 KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(add, +=)
diff --git a/include/kernel_float/memory.h b/include/kernel_float/memory.h
@@ -414,7 +414,8 @@ struct vector_ref<T, N, const U, Alignment> {
     template<typename T, size_t N, typename U, size_t Alignment, typename V> \
     KERNEL_FLOAT_INLINE vector_ref<T, N, U, Alignment> operator OP_ASSIGN(   \
         vector_ref<T, N, U, Alignment> ptr,                                  \
-        const V& value) {                                                    \
+        const V                                                              \
+        & value) {                                                           \
         ptr.write(ptr.read() OP value);                                      \
         return ptr;                                                          \
     }
@@ -462,19 +463,27 @@ struct vector_ptr {
     vector_ptr() = default;
 
     /**
-     * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Align` elements.
+     * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Alignment`.
      */
+    template<typename V = U, enable_if_t<Alignment != alignof(V), int> = 0>
     KERNEL_FLOAT_INLINE explicit vector_ptr(pointer_type p) : data_(p) {}
 
+    /**
+     * Constructor from a given pointer. This assumes that the alignment of the pointer equals `Alignment`.
+     */
+    template<typename V = U, enable_if_t<Alignment == alignof(V), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(pointer_type p) : data_(p) {}
+
     /**
      * Constructs a vector_ptr from another vector_ptr with potentially different alignment and type. This constructor
      * only allows conversion if the alignment of the source is greater than or equal to the alignment of the target.
      */
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
 
     /**
      * Shorthand for `at(0)`.
@@ -548,19 +557,25 @@ struct vector_ptr<T, N, const U, Alignment> {
 
     vector_ptr() = default;
 
+    template<typename V = U, enable_if_t<Alignment != alignof(V), int> = 0>
     KERNEL_FLOAT_INLINE explicit vector_ptr(pointer_type p) : data_(p) {}
 
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, const U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<typename V = U, enable_if_t<Alignment == alignof(V), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(pointer_type p) : data_(p) {}
 
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, const U, A2> p) : data_(p.get()) {}
+
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
 
     KERNEL_FLOAT_INLINE vector_ref<value_type, N, const U, Alignment> operator*() const {
         return vector_ref<value_type, N, const U, Alignment> {data_};
@@ -614,7 +629,7 @@ KERNEL_FLOAT_INLINE vector_ptr<T, N, U, A>& operator+=(vector_ptr<T, N, U, A>& p
 /**
  * Creates a `vector_ptr<T, N>` from a raw pointer `T*` by asserting a specific alignment `N`.
  *
- * @tparam N The alignment constraint for the vector_ptr. Defaults to KERNEL_FLOAT_MAX_ALIGNMENT.
+ * @tparam N The alignment constraint for the vector_ptr.
  * @tparam T The type of the elements pointed to by the raw pointer.
  */
 template<size_t N, typename T>
@@ -638,6 +653,9 @@ KERNEL_FLOAT_INLINE vector_ptr<T, 1, T, KERNEL_FLOAT_MAX_ALIGNMENT> assert_align
 template<typename T, size_t N = 1, typename U = T, size_t Align = N>
 using vec_ptr = vector_ptr<T, N, U, Align * sizeof(U)>;
 
+template<typename T, typename U = T>
+using view_ptr = vector_ptr<T, 1, U, alignof(U)>;
+
 #if defined(__cpp_deduction_guides)
 template<typename T>
 vector_ptr(T*) -> vector_ptr<T, 1, T>;
diff --git a/single_include/kernel_float.h b/single_include/kernel_float.h
@@ -16,8 +16,8 @@
 
 //================================================================================
 // this file has been auto-generated, do not modify its contents!
-// date: 2025-08-12 13:55:51.042675
-// git hash: 714ca6b5fd63ef3497d80ef018cb9a9460c91391
+// date: 2025-08-21 10:13:04.148230
+// git hash: 4d0d49cad7962d3f9ba4f2a0abfa2faea3ec7efa
 //================================================================================
 
 #ifndef KERNEL_FLOAT_MACROS_H
@@ -1823,12 +1823,14 @@ KERNEL_FLOAT_INLINE zip_common_type<F, L, R> zip_common(F fun, const L& left, co
     template<typename L, typename R, typename C = promote_t<L, vector_value_type<R>>, typename E> \
     KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, vector<L, E>, R> operator OP(               \
         const vector<L, E>& left,                                                                 \
-        const R& right) {                                                                         \
+        const R                                                                                   \
+        & right) {                                                                                \
         return zip_common(ops::NAME<C> {}, left, right);                                          \
     }                                                                                             \
     template<typename L, typename R, typename C = promote_t<vector_value_type<L>, R>, typename E> \
     KERNEL_FLOAT_INLINE zip_common_type<ops::NAME<C>, L, vector<R, E>> operator OP(               \
-        const L& left,                                                                            \
+        const L                                                                                   \
+        & left,                                                                                   \
         const vector<R, E>& right) {                                                              \
         return zip_common(ops::NAME<C> {}, left, right);                                          \
     }
@@ -1869,16 +1871,16 @@ static constexpr bool is_vector_assign_allowed =
         >;
 // clang-format on
 
-#define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(NAME, OP)                               \
-    template<                                                                        \
-        typename T,                                                                  \
-        typename E,                                                                  \
-        typename R,                                                                  \
-        typename = enable_if_t<is_vector_assign_allowed<ops::NAME, T, E, R>>>        \
-    KERNEL_FLOAT_INLINE vector<T, E>& operator OP(vector<T, E>& lhs, const R& rhs) { \
-        using F = ops::NAME<T>;                                                      \
-        lhs = zip_common(F {}, lhs, rhs);                                            \
-        return lhs;                                                                  \
+#define KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(NAME, OP)                                \
+    template<                                                                         \
+        typename T,                                                                   \
+        typename E,                                                                   \
+        typename R,                                                                   \
+        typename = enable_if_t<is_vector_assign_allowed<ops::NAME, T, E, R>>>         \
+    KERNEL_FLOAT_INLINE vector<T, E>& operator OP(vector<T, E>& lhs, const R & rhs) { \
+        using F = ops::NAME<T>;                                                       \
+        lhs = zip_common(F {}, lhs, rhs);                                             \
+        return lhs;                                                                   \
     }
 
 KERNEL_FLOAT_DEFINE_BINARY_ASSIGN_OP(add, +=)
@@ -2975,7 +2977,8 @@ struct vector_ref<T, N, const U, Alignment> {
     template<typename T, size_t N, typename U, size_t Alignment, typename V> \
     KERNEL_FLOAT_INLINE vector_ref<T, N, U, Alignment> operator OP_ASSIGN(   \
         vector_ref<T, N, U, Alignment> ptr,                                  \
-        const V& value) {                                                    \
+        const V                                                              \
+        & value) {                                                           \
         ptr.write(ptr.read() OP value);                                      \
         return ptr;                                                          \
     }
@@ -3023,19 +3026,27 @@ struct vector_ptr {
     vector_ptr() = default;
 
     /**
-     * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Align` elements.
+     * Constructor from a given pointer. It is up to the user to assert that the pointer is aligned to `Alignment`.
      */
+    template<typename V = U, enable_if_t<Alignment != alignof(V), int> = 0>
     KERNEL_FLOAT_INLINE explicit vector_ptr(pointer_type p) : data_(p) {}
 
+    /**
+     * Constructor from a given pointer. This assumes that the alignment of the pointer equals `Alignment`.
+     */
+    template<typename V = U, enable_if_t<Alignment == alignof(V), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(pointer_type p) : data_(p) {}
+
     /**
      * Constructs a vector_ptr from another vector_ptr with potentially different alignment and type. This constructor
      * only allows conversion if the alignment of the source is greater than or equal to the alignment of the target.
      */
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
 
     /**
      * Shorthand for `at(0)`.
@@ -3109,19 +3120,25 @@ struct vector_ptr<T, N, const U, Alignment> {
 
     vector_ptr() = default;
 
+    template<typename V = U, enable_if_t<Alignment != alignof(V), int> = 0>
     KERNEL_FLOAT_INLINE explicit vector_ptr(pointer_type p) : data_(p) {}
 
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, const U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<typename V = U, enable_if_t<Alignment == alignof(V), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(pointer_type p) : data_(p) {}
 
-    template<typename T2, size_t N2, size_t A2>
-    KERNEL_FLOAT_INLINE vector_ptr(
-        vector_ptr<T2, N2, U, A2> p,
-        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = {}) :
-        data_(p.get()) {}
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, const U, A2> p) : data_(p.get()) {}
+
+    template<
+        typename T2,
+        size_t N2,
+        size_t A2,
+        enable_if_t<detail::alignment_divisible(A2, Alignment), int> = 0>
+    KERNEL_FLOAT_INLINE vector_ptr(vector_ptr<T2, N2, U, A2> p) : data_(p.get()) {}
 
     KERNEL_FLOAT_INLINE vector_ref<value_type, N, const U, Alignment> operator*() const {
         return vector_ref<value_type, N, const U, Alignment> {data_};
@@ -3175,7 +3192,7 @@ KERNEL_FLOAT_INLINE vector_ptr<T, N, U, A>& operator+=(vector_ptr<T, N, U, A>& p
 /**
  * Creates a `vector_ptr<T, N>` from a raw pointer `T*` by asserting a specific alignment `N`.
  *
- * @tparam N The alignment constraint for the vector_ptr. Defaults to KERNEL_FLOAT_MAX_ALIGNMENT.
+ * @tparam N The alignment constraint for the vector_ptr.
  * @tparam T The type of the elements pointed to by the raw pointer.
  */
 template<size_t N, typename T>
@@ -3199,6 +3216,9 @@ KERNEL_FLOAT_INLINE vector_ptr<T, 1, T, KERNEL_FLOAT_MAX_ALIGNMENT> assert_align
 template<typename T, size_t N = 1, typename U = T, size_t Align = N>
 using vec_ptr = vector_ptr<T, N, U, Align * sizeof(U)>;
 
+template<typename T, typename U = T>
+using view_ptr = vector_ptr<T, 1, U, alignof(U)>;
+
 #if defined(__cpp_deduction_guides)
 template<typename T>
 vector_ptr(T*) -> vector_ptr<T, 1, T>;
@@ -4749,6 +4769,7 @@ KERNEL_FLOAT_DEVICE half2_t normalize_trig_input(half2_t x) {
     static constexpr double ONE_OVER_TWOPI = 0.15915494309189535;
     static constexpr double OFFSET = -2042.0;
 
+    // ws = (x / 2pi) - ((x / 2pi + OFFSET) - OFFSET)
     half2_t ws = __hfma2(x, make_half2(-ONE_OVER_TWOPI), make_half2(-OFFSET)) + make_half2(OFFSET);
     return __hfma2(x, make_half2(ONE_OVER_TWOPI), ws);
 }
diff --git a/tests/memory.cu b/tests/memory.cu
@@ -208,6 +208,31 @@ struct vector_ptr_test {
             kf::vec<T, N> h = ptr[1];
             ASSERT_EQ_ALL(h[I], T(3.14));
         }
+
+        {
+            // This does *not* require an explicit constructor (N == 1)
+            kf::vector_ptr<T, 1, U> a1_ptr = storage.data;
+            kf::vector_ptr<const T, 1, U> a2_ptr = storage.data;
+            kf::vector_ptr<T, 1, const U> a3_ptr = storage.data;
+            kf::vector_ptr<const T, 1, const U> a4_ptr = storage.data;
+
+            ASSERT_EQ(a1_ptr.get(), static_cast<U*>(storage.data));
+            ASSERT_EQ(a2_ptr.get(), static_cast<U*>(storage.data));
+            ASSERT_EQ(a3_ptr.get(), static_cast<const U*>(storage.data));
+            ASSERT_EQ(a4_ptr.get(), static_cast<const U*>(storage.data));
+
+            // This *does* require an explicit constructor (N > 1)
+            kf::vector_ptr<T, 2, U> b1_ptr = kf::vector_ptr<T, 2, U>(storage.data);
+            kf::vector_ptr<const T, 2, U> b2_ptr = kf::vector_ptr<const T, 2, U>(storage.data);
+            kf::vector_ptr<T, 2, const U> b3_ptr = kf::vector_ptr<T, 2, const U>(storage.data);
+            kf::vector_ptr<const T, 2, const U> b4_ptr =
+                kf::vector_ptr<const T, 2, const U>(storage.data);
+
+            ASSERT_EQ(b1_ptr.get(), static_cast<U*>(storage.data));
+            ASSERT_EQ(b2_ptr.get(), static_cast<U*>(storage.data));
+            ASSERT_EQ(b3_ptr.get(), static_cast<const U*>(storage.data));
+            ASSERT_EQ(b4_ptr.get(), static_cast<const U*>(storage.data));
+        }
     }
 };
 

Original file line number	Diff line number	Diff line change
`@@ -127,6 +127,7 @@ KERNEL_FLOAT_DEVICE half2_t normalize_trig_input(half2_t x) {`
`127`	`127`	`static constexpr double ONE_OVER_TWOPI = 0.15915494309189535;`
`128`	`128`	`static constexpr double OFFSET = -2042.0;`
`129`	`129`
	`130`	`+ // ws = (x / 2pi) - ((x / 2pi + OFFSET) - OFFSET)`
`130`	`131`	`half2_t ws = __hfma2(x, make_half2(-ONE_OVER_TWOPI), make_half2(-OFFSET)) + make_half2(OFFSET);`
`131`	`132`	`return __hfma2(x, make_half2(ONE_OVER_TWOPI), ws);`
`132`	`133`	`}`