Skip to content

Commit 140f307

Browse files
Merge pull request google#2453 from cambridgeconsultants:cc_up_LoadMaskBits_optimisation
PiperOrigin-RevId: 721750804
2 parents a74db0c + 4d81fed commit 140f307

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

Diff for: hwy/ops/arm_sve-inl.h

+5
Original file line numberDiff line numberDiff line change
@@ -5086,6 +5086,10 @@ HWY_API V AverageRound(const V a, const V b) {
50865086
// `p` points to at least 8 readable bytes, not all of which need be valid.
50875087
template <class D, HWY_IF_T_SIZE_D(D, 1)>
50885088
HWY_INLINE svbool_t LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
5089+
#if HWY_COMPILER_CLANG >= 1901 || HWY_COMPILER_GCC_ACTUAL >= 1200
5090+
(void)d;
5091+
return *(const svbool_t*)bits;
5092+
#else
50895093
// TODO(janwas): with SVE2.1, load to vector, then PMOV
50905094
const RebindToUnsigned<D> du;
50915095
const svuint8_t iota = Iota(du, 0);
@@ -5098,6 +5102,7 @@ HWY_INLINE svbool_t LoadMaskBits(D d, const uint8_t* HWY_RESTRICT bits) {
50985102
const svuint8_t bit =
50995103
svdupq_n_u8(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
51005104
return TestBit(rep8, bit);
5105+
#endif
51015106
}
51025107

51035108
template <class D, HWY_IF_T_SIZE_D(D, 2)>

0 commit comments

Comments
 (0)