@@ -3228,67 +3228,101 @@ Get(D d, VFromD<D> v) {
3228
3228
}
3229
3229
}
3230
3230
3231
- #define HWY_RVV_SET (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
3232
- MLEN, NAME, OP) \
3233
- template <size_t kIndex > \
3234
- HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3235
- NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v) { \
3236
- return __riscv_v##OP##_v_##CHAR##SEW##LMULH##_##CHAR##SEW##LMUL ( \
3237
- dest, kIndex , v); /* no AVL */ \
3231
+ #define HWY_RVV_PARTIAL_VEC_SET_HALF (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
3232
+ LMULH, SHIFT, MLEN, NAME, OP) \
3233
+ template <size_t kIndex > \
3234
+ HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3235
+ NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v, \
3236
+ size_t half_N) { \
3237
+ static_assert (kIndex == 0 || kIndex == 1 , " kIndex must be 0 or 1" ); \
3238
+ const DFromV<decltype (dest)> d; \
3239
+ HWY_IF_CONSTEXPR (kIndex == 0 ) { \
3240
+ return __riscv_v##OP##_v_v_##CHAR##SEW##LMUL##_tu (dest, Ext (d, v), \
3241
+ half_N); \
3242
+ } \
3243
+ else { \
3244
+ return SlideUp (dest, Ext (d, v), half_N); \
3245
+ } \
3238
3246
}
3239
- #define HWY_RVV_SET_VIRT (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3240
- SHIFT, MLEN, NAME, OP) \
3241
- template <size_t kIndex > \
3242
- HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3243
- NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v) { \
3244
- static_assert (kIndex == 0 || kIndex == 1 , " kIndex must be 0 or 1" ); \
3245
- auto d = HWY_RVV_D (BASE, SEW, HWY_LANES (HWY_RVV_T (BASE, SEW)), SHIFT){}; \
3246
- auto df2 = \
3247
- HWY_RVV_D (BASE, SEW, HWY_LANES (HWY_RVV_T (BASE, SEW)), SHIFT - 1 ){}; \
3248
- HWY_IF_CONSTEXPR (kIndex == 0 ) { \
3249
- return __riscv_vmv_v_v_##CHAR##SEW##LMUL##_tu (dest, Ext (d, v), \
3250
- Lanes (df2)); \
3251
- } \
3252
- else { \
3253
- return SlideUp (dest, Ext (d, v), Lanes (df2)); \
3254
- } \
3247
+ #define HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST ( \
3248
+ BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP) \
3249
+ template <size_t kIndex > \
3250
+ HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3251
+ NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMUL) v, \
3252
+ size_t half_N) { \
3253
+ static_assert (kIndex == 0 || kIndex == 1 , " kIndex must be 0 or 1" ); \
3254
+ HWY_IF_CONSTEXPR (kIndex == 0 ) { \
3255
+ return __riscv_v##OP##_v_v_##CHAR##SEW##LMUL##_tu (dest, v, half_N); \
3256
+ } \
3257
+ else { \
3258
+ return SlideUp (dest, v, half_N); \
3259
+ } \
3255
3260
}
3256
- #define HWY_RVV_SET_SMALLEST (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3257
- SHIFT, MLEN, NAME, OP) \
3258
- template <size_t kIndex > \
3261
+ HWY_RVV_FOREACH (HWY_RVV_PARTIAL_VEC_SET_HALF, PartialVecSetHalf, mv, _GET_SET)
3262
+ HWY_RVV_FOREACH (HWY_RVV_PARTIAL_VEC_SET_HALF, PartialVecSetHalf, mv,
3263
+ _GET_SET_VIRT)
3264
+ HWY_RVV_FOREACH (HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST, PartialVecSetHalf, mv,
3265
+ _GET_SET_SMALLEST)
3266
+ #undef HWY_RVV_PARTIAL_VEC_SET_HALF
3267
+ #undef HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST
3268
+
3269
+ #define HWY_RVV_SET (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
3270
+ MLEN, NAME, OP) \
3271
+ template <size_t kIndex , size_t N> \
3259
3272
HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3260
- NAME(HWY_RVV_V (BASE, SEW, LMUL) dest , HWY_RVV_V(BASE, SEW, LMUL) v) { \
3261
- static_assert ( kIndex == 0 || kIndex == 1 , " kIndex must be 0 or 1 " ); \
3262
- auto d = HWY_RVV_D (BASE, SEW, HWY_LANES ( HWY_RVV_T (BASE, SEW)), SHIFT){}; \
3263
- HWY_IF_CONSTEXPR ( kIndex == 0 ) { \
3264
- return __riscv_vmv_v_v_##CHAR##SEW##LMUL## _tu ( dest, v, Lanes (d) / 2 ); \
3273
+ NAME(HWY_RVV_D (BASE, SEW, N, SHIFT) d , HWY_RVV_V(BASE, SEW, LMUL) dest, \
3274
+ HWY_RVV_V(BASE, SEW, LMULH) v) { \
3275
+ HWY_IF_CONSTEXPR ( detail::IsFull (d)) { \
3276
+ return __riscv_v##OP##_v_##CHAR##SEW##LMULH##_##CHAR##SEW## LMUL ( \
3277
+ dest, kIndex , v); /* no AVL */ \
3265
3278
} \
3266
3279
else { \
3267
- return SlideUp (dest, v, Lanes (d) / 2 ); \
3280
+ const Half<decltype (d)> dh; \
3281
+ return PartialVecSetHalf<kIndex >(dest, v, Lanes (dh)); \
3268
3282
} \
3269
3283
}
3284
+ #define HWY_RVV_SET_VIRT (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3285
+ SHIFT, MLEN, NAME, OP) \
3286
+ template <size_t kIndex , size_t N> \
3287
+ HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3288
+ NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(BASE, SEW, LMUL) dest, \
3289
+ HWY_RVV_V(BASE, SEW, LMULH) v) { \
3290
+ const Half<decltype (d)> dh; \
3291
+ return PartialVecSetHalf<kIndex >(dest, v, Lanes (dh)); \
3292
+ }
3293
+ #define HWY_RVV_SET_SMALLEST (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3294
+ SHIFT, MLEN, NAME, OP) \
3295
+ template <size_t kIndex , size_t N> \
3296
+ HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3297
+ NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(BASE, SEW, LMUL) dest, \
3298
+ HWY_RVV_V(BASE, SEW, LMUL) v) { \
3299
+ return PartialVecSetHalf<kIndex >(dest, v, Lanes (d) / 2 ); \
3300
+ }
3301
+ #define HWY_RVV_SET_SMALLEST_VIRT (BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
3302
+ LMULH, SHIFT, MLEN, NAME, OP) \
3303
+ template <size_t kIndex , size_t N> \
3304
+ HWY_API HWY_RVV_V (BASE, SEW, LMUL) \
3305
+ NAME(HWY_RVV_D(BASE, SEW, N, SHIFT - 1 ) d, \
3306
+ HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMUL) v) { \
3307
+ return PartialVecSetHalf<kIndex >(dest, v, Lanes (d) / 2 ); \
3308
+ }
3270
3309
HWY_RVV_FOREACH (HWY_RVV_SET, Set, set, _GET_SET)
3271
3310
HWY_RVV_FOREACH (HWY_RVV_SET_VIRT, Set, set, _GET_SET_VIRT)
3272
3311
HWY_RVV_FOREACH (HWY_RVV_SET_SMALLEST, Set, set, _GET_SET_SMALLEST)
3312
+ HWY_RVV_FOREACH_UI163264 (HWY_RVV_SET_SMALLEST_VIRT, Set, set, _GET_SET_SMALLEST)
3313
+ HWY_RVV_FOREACH_F (HWY_RVV_SET_SMALLEST_VIRT, Set, set, _GET_SET_SMALLEST)
3273
3314
#undef HWY_RVV_SET
3274
3315
#undef HWY_RVV_SET_VIRT
3275
3316
#undef HWY_RVV_SET_SMALLEST
3317
+ #undef HWY_RVV_SET_SMALLEST_VIRT
3276
3318
3277
- template <size_t kIndex , class D >
3319
+ template <size_t kIndex , class D , HWY_RVV_IF_EMULATED_D(D) >
3278
3320
static HWY_INLINE HWY_MAYBE_UNUSED VFromD<D> Set (
3279
3321
D d, VFromD<D> dest, VFromD<AdjustSimdTagToMinVecPow2<Half<D>>> v) {
3280
- static_assert (kIndex == 0 || kIndex == 1 , " kIndex must be 0 or 1" );
3281
-
3282
- const AdjustSimdTagToMinVecPow2<Half<decltype (d)>> dh;
3283
- HWY_IF_CONSTEXPR (kIndex == 0 || detail::IsFull (d)) {
3284
- (void )dh;
3285
- return Set<kIndex >(dest, v);
3286
- }
3287
- else {
3288
- const size_t slide_up_amt =
3289
- (dh.Pow2 () < DFromV<decltype (v)>().Pow2 ()) ? Lanes (dh) : (Lanes (d) / 2 );
3290
- return SlideUp (dest, ResizeBitCast (d, v), slide_up_amt);
3291
- }
3322
+ const RebindToUnsigned<decltype (d)> du;
3323
+ return BitCast (
3324
+ d, Set<kIndex >(du, BitCast (du, dest),
3325
+ BitCast (RebindToUnsigned<DFromV<decltype (v)>>(), v)));
3292
3326
}
3293
3327
3294
3328
} // namespace detail
0 commit comments