Skip to content

Commit 2056a41

Browse files
Merge pull request google#2420 from johnplatts:hwy_rvv_concat_fix_123124
PiperOrigin-RevId: 712516345
2 parents a219783 + e892ab4 commit 2056a41

File tree

1 file changed

+79
-45
lines changed

1 file changed

+79
-45
lines changed

Diff for: hwy/ops/rvv-inl.h

+79-45
Original file line numberDiff line numberDiff line change
@@ -3228,67 +3228,101 @@ Get(D d, VFromD<D> v) {
32283228
}
32293229
}
32303230

3231-
#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
3232-
MLEN, NAME, OP) \
3233-
template <size_t kIndex> \
3234-
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3235-
NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v) { \
3236-
return __riscv_v##OP##_v_##CHAR##SEW##LMULH##_##CHAR##SEW##LMUL( \
3237-
dest, kIndex, v); /* no AVL */ \
3231+
#define HWY_RVV_PARTIAL_VEC_SET_HALF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
3232+
LMULH, SHIFT, MLEN, NAME, OP) \
3233+
template <size_t kIndex> \
3234+
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3235+
NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v, \
3236+
size_t half_N) { \
3237+
static_assert(kIndex == 0 || kIndex == 1, "kIndex must be 0 or 1"); \
3238+
const DFromV<decltype(dest)> d; \
3239+
HWY_IF_CONSTEXPR(kIndex == 0) { \
3240+
return __riscv_v##OP##_v_v_##CHAR##SEW##LMUL##_tu(dest, Ext(d, v), \
3241+
half_N); \
3242+
} \
3243+
else { \
3244+
return SlideUp(dest, Ext(d, v), half_N); \
3245+
} \
32383246
}
3239-
#define HWY_RVV_SET_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3240-
SHIFT, MLEN, NAME, OP) \
3241-
template <size_t kIndex> \
3242-
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3243-
NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMULH) v) { \
3244-
static_assert(kIndex == 0 || kIndex == 1, "kIndex must be 0 or 1"); \
3245-
auto d = HWY_RVV_D(BASE, SEW, HWY_LANES(HWY_RVV_T(BASE, SEW)), SHIFT){}; \
3246-
auto df2 = \
3247-
HWY_RVV_D(BASE, SEW, HWY_LANES(HWY_RVV_T(BASE, SEW)), SHIFT - 1){}; \
3248-
HWY_IF_CONSTEXPR(kIndex == 0) { \
3249-
return __riscv_vmv_v_v_##CHAR##SEW##LMUL##_tu(dest, Ext(d, v), \
3250-
Lanes(df2)); \
3251-
} \
3252-
else { \
3253-
return SlideUp(dest, Ext(d, v), Lanes(df2)); \
3254-
} \
3247+
#define HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST( \
3248+
BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP) \
3249+
template <size_t kIndex> \
3250+
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3251+
NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMUL) v, \
3252+
size_t half_N) { \
3253+
static_assert(kIndex == 0 || kIndex == 1, "kIndex must be 0 or 1"); \
3254+
HWY_IF_CONSTEXPR(kIndex == 0) { \
3255+
return __riscv_v##OP##_v_v_##CHAR##SEW##LMUL##_tu(dest, v, half_N); \
3256+
} \
3257+
else { \
3258+
return SlideUp(dest, v, half_N); \
3259+
} \
32553260
}
3256-
#define HWY_RVV_SET_SMALLEST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3257-
SHIFT, MLEN, NAME, OP) \
3258-
template <size_t kIndex> \
3261+
HWY_RVV_FOREACH(HWY_RVV_PARTIAL_VEC_SET_HALF, PartialVecSetHalf, mv, _GET_SET)
3262+
HWY_RVV_FOREACH(HWY_RVV_PARTIAL_VEC_SET_HALF, PartialVecSetHalf, mv,
3263+
_GET_SET_VIRT)
3264+
HWY_RVV_FOREACH(HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST, PartialVecSetHalf, mv,
3265+
_GET_SET_SMALLEST)
3266+
#undef HWY_RVV_PARTIAL_VEC_SET_HALF
3267+
#undef HWY_RVV_PARTIAL_VEC_SET_HALF_SMALLEST
3268+
3269+
#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
3270+
MLEN, NAME, OP) \
3271+
template <size_t kIndex, size_t N> \
32593272
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3260-
NAME(HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMUL) v) { \
3261-
static_assert(kIndex == 0 || kIndex == 1, "kIndex must be 0 or 1"); \
3262-
auto d = HWY_RVV_D(BASE, SEW, HWY_LANES(HWY_RVV_T(BASE, SEW)), SHIFT){}; \
3263-
HWY_IF_CONSTEXPR(kIndex == 0) { \
3264-
return __riscv_vmv_v_v_##CHAR##SEW##LMUL##_tu(dest, v, Lanes(d) / 2); \
3273+
NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(BASE, SEW, LMUL) dest, \
3274+
HWY_RVV_V(BASE, SEW, LMULH) v) { \
3275+
HWY_IF_CONSTEXPR(detail::IsFull(d)) { \
3276+
return __riscv_v##OP##_v_##CHAR##SEW##LMULH##_##CHAR##SEW##LMUL( \
3277+
dest, kIndex, v); /* no AVL */ \
32653278
} \
32663279
else { \
3267-
return SlideUp(dest, v, Lanes(d) / 2); \
3280+
const Half<decltype(d)> dh; \
3281+
return PartialVecSetHalf<kIndex>(dest, v, Lanes(dh)); \
32683282
} \
32693283
}
3284+
#define HWY_RVV_SET_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3285+
SHIFT, MLEN, NAME, OP) \
3286+
template <size_t kIndex, size_t N> \
3287+
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3288+
NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(BASE, SEW, LMUL) dest, \
3289+
HWY_RVV_V(BASE, SEW, LMULH) v) { \
3290+
const Half<decltype(d)> dh; \
3291+
return PartialVecSetHalf<kIndex>(dest, v, Lanes(dh)); \
3292+
}
3293+
#define HWY_RVV_SET_SMALLEST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
3294+
SHIFT, MLEN, NAME, OP) \
3295+
template <size_t kIndex, size_t N> \
3296+
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3297+
NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(BASE, SEW, LMUL) dest, \
3298+
HWY_RVV_V(BASE, SEW, LMUL) v) { \
3299+
return PartialVecSetHalf<kIndex>(dest, v, Lanes(d) / 2); \
3300+
}
3301+
#define HWY_RVV_SET_SMALLEST_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
3302+
LMULH, SHIFT, MLEN, NAME, OP) \
3303+
template <size_t kIndex, size_t N> \
3304+
HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
3305+
NAME(HWY_RVV_D(BASE, SEW, N, SHIFT - 1) d, \
3306+
HWY_RVV_V(BASE, SEW, LMUL) dest, HWY_RVV_V(BASE, SEW, LMUL) v) { \
3307+
return PartialVecSetHalf<kIndex>(dest, v, Lanes(d) / 2); \
3308+
}
32703309
HWY_RVV_FOREACH(HWY_RVV_SET, Set, set, _GET_SET)
32713310
HWY_RVV_FOREACH(HWY_RVV_SET_VIRT, Set, set, _GET_SET_VIRT)
32723311
HWY_RVV_FOREACH(HWY_RVV_SET_SMALLEST, Set, set, _GET_SET_SMALLEST)
3312+
HWY_RVV_FOREACH_UI163264(HWY_RVV_SET_SMALLEST_VIRT, Set, set, _GET_SET_SMALLEST)
3313+
HWY_RVV_FOREACH_F(HWY_RVV_SET_SMALLEST_VIRT, Set, set, _GET_SET_SMALLEST)
32733314
#undef HWY_RVV_SET
32743315
#undef HWY_RVV_SET_VIRT
32753316
#undef HWY_RVV_SET_SMALLEST
3317+
#undef HWY_RVV_SET_SMALLEST_VIRT
32763318

3277-
template <size_t kIndex, class D>
3319+
template <size_t kIndex, class D, HWY_RVV_IF_EMULATED_D(D)>
32783320
static HWY_INLINE HWY_MAYBE_UNUSED VFromD<D> Set(
32793321
D d, VFromD<D> dest, VFromD<AdjustSimdTagToMinVecPow2<Half<D>>> v) {
3280-
static_assert(kIndex == 0 || kIndex == 1, "kIndex must be 0 or 1");
3281-
3282-
const AdjustSimdTagToMinVecPow2<Half<decltype(d)>> dh;
3283-
HWY_IF_CONSTEXPR(kIndex == 0 || detail::IsFull(d)) {
3284-
(void)dh;
3285-
return Set<kIndex>(dest, v);
3286-
}
3287-
else {
3288-
const size_t slide_up_amt =
3289-
(dh.Pow2() < DFromV<decltype(v)>().Pow2()) ? Lanes(dh) : (Lanes(d) / 2);
3290-
return SlideUp(dest, ResizeBitCast(d, v), slide_up_amt);
3291-
}
3322+
const RebindToUnsigned<decltype(d)> du;
3323+
return BitCast(
3324+
d, Set<kIndex>(du, BitCast(du, dest),
3325+
BitCast(RebindToUnsigned<DFromV<decltype(v)>>(), v)));
32923326
}
32933327

32943328
} // namespace detail

0 commit comments

Comments
 (0)