Skip to content

Commit c31d350

Browse files
lsalzmanweb-flow
authored andcommitted
Bug 1917964 - Use __builtin_convertvector and __builtin_shufflevector on GCC when available. r=aosmond
GCC upstream recommends we use __builtin_convertvector and __builtin_shufflevector instead of __builtin_shuffle for better code generation. See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116666#c7 Differential Revision: https://phabricator.services.mozilla.com/D222067
1 parent c2e7f6c commit c31d350

File tree

1 file changed

+97
-90
lines changed

1 file changed

+97
-90
lines changed

swgl/src/vector_type.h

+97-90
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ struct VectorMask<float> {
9494
typedef int type;
9595
};
9696

97+
# ifdef __has_builtin
98+
# if __has_builtin(__builtin_convertvector)
99+
# define HAS_BUILTIN_CONVERTVECTOR
100+
# endif
101+
# if __has_builtin(__builtin_shufflevector)
102+
# define HAS_BUILTIN_SHUFFLEVECTOR
103+
# endif
104+
# endif
105+
97106
template <typename T, int N>
98107
struct VectorType {
99108
enum { SIZE = N };
@@ -136,6 +145,13 @@ struct VectorType {
136145
T& operator[](size_t i) { return elements[i]; }
137146
T operator[](size_t i) const { return elements[i]; }
138147

148+
# ifdef HAS_BUILTIN_CONVERTVECTOR
149+
template <typename U, int M>
150+
operator VectorType<U, M>() const {
151+
return VectorType<U, M>::wrap(
152+
__builtin_convertvector(data, typename VectorType<U, M>::data_type));
153+
}
154+
# else
139155
template <typename U>
140156
operator VectorType<U, 2>() const {
141157
return VectorType<U, 2>::wrap(
@@ -173,6 +189,7 @@ struct VectorType {
173189
U(elements[15]),
174190
});
175191
}
192+
# endif
176193

177194
VectorType operator-() const { return wrap(-data); }
178195
VectorType operator~() const { return wrap(~data); }
@@ -268,42 +285,6 @@ struct VectorType {
268285
return *this;
269286
}
270287

271-
VectorType<T, 4> shuffle(VectorType b, mask_index x, mask_index y,
272-
mask_index z, mask_index w) const {
273-
return VectorType<T, 4>::wrap(__builtin_shuffle(
274-
data, b.data, (typename VectorType<T, 4>::mask_type){x, y, z, w}));
275-
}
276-
VectorType<T, 8> shuffle(VectorType b, mask_index x, mask_index y,
277-
mask_index z, mask_index w, mask_index s,
278-
mask_index t, mask_index u, mask_index v) const {
279-
return VectorType<T, 8>::wrap(__builtin_shuffle(
280-
data, b.data,
281-
(typename VectorType<T, 8>::mask_type){x, y, z, w, s, t, u, v}));
282-
}
283-
VectorType<T, 16> shuffle(VectorType b, mask_index x, mask_index y,
284-
mask_index z, mask_index w, mask_index s,
285-
mask_index t, mask_index u, mask_index v,
286-
mask_index i, mask_index j, mask_index k,
287-
mask_index l, mask_index m, mask_index n,
288-
mask_index o, mask_index p) const {
289-
return VectorType<T, 16>::wrap(
290-
__builtin_shuffle(data, b.data,
291-
(typename VectorType<T, 16>::mask_type){
292-
x, y, z, w, s, t, u, v, i, j, k, l, m, n, o, p}));
293-
}
294-
295-
VectorType<T, 4> swizzle(mask_index x, mask_index y, mask_index z,
296-
mask_index w) const {
297-
return VectorType<T, 4>::wrap(__builtin_shuffle(
298-
data, (typename VectorType<T, 4>::mask_type){x, y, z, w}));
299-
}
300-
VectorType<T, 8> swizzle(mask_index x, mask_index y, mask_index z,
301-
mask_index w, mask_index s, mask_index t,
302-
mask_index u, mask_index v) const {
303-
return VectorType<T, 8>::wrap(__builtin_shuffle(
304-
data, (typename VectorType<T, 8>::mask_type){x, y, z, w, s, t, u, v}));
305-
}
306-
307288
SI VectorType wrap(half_type low, half_type high) {
308289
VectorType v;
309290
v.low_half = low;
@@ -315,53 +296,86 @@ struct VectorType {
315296
return VectorType<T, N * 2>::wrap(data, high.data);
316297
}
317298

318-
# define xxxx swizzle(0, 0, 0, 0)
319-
# define yyyy swizzle(1, 1, 1, 1)
320-
# define zzzz swizzle(2, 2, 2, 2)
321-
# define wwww swizzle(3, 3, 3, 3)
322-
# define xxyy swizzle(0, 0, 1, 1)
323-
# define xxzz swizzle(0, 0, 2, 2)
324-
# define yyww swizzle(1, 1, 3, 3)
325-
# define zzww swizzle(2, 2, 3, 3)
326-
# define xyxy swizzle(0, 1, 0, 1)
327-
# define xzxz swizzle(0, 2, 0, 2)
328-
# define ywyw swizzle(1, 3, 1, 3)
329-
# define zwzw swizzle(2, 3, 2, 3)
330-
# define zwxy swizzle(2, 3, 0, 1)
331-
# define zyxw swizzle(2, 1, 0, 3)
332-
# define xxyz swizzle(0, 0, 1, 2)
333-
# define xyyz swizzle(0, 1, 1, 2)
334-
# define xyzz swizzle(0, 1, 2, 2)
335-
# define xzyw swizzle(0, 2, 1, 3)
336-
# define yzwx swizzle(1, 2, 3, 0)
337-
# define wxyz swizzle(3, 0, 1, 2)
338-
# define wzyx swizzle(3, 2, 1, 0)
339-
# define xxxxyyyy XXXXYYYY()
340-
VectorType<T, 8> XXXXYYYY() const {
341-
return swizzle(0, 0, 0, 0).combine(swizzle(1, 1, 1, 1));
342-
}
343-
# define zzzzwwww ZZZZWWWW()
344-
VectorType<T, 8> ZZZZWWWW() const {
345-
return swizzle(2, 2, 2, 2).combine(swizzle(3, 3, 3, 3));
346-
}
347-
# define xyzwxyzw XYZWXYZW()
348-
VectorType<T, 8> XYZWXYZW() const { return combine(*this); }
349-
# define xyxyxyxy XYXYXYXY()
350-
VectorType<T, 8> XYXYXYXY() const {
351-
return swizzle(0, 1, 0, 1).combine(swizzle(0, 1, 0, 1));
352-
}
353-
# define zwzwzwzw ZWZWZWZW()
354-
VectorType<T, 8> ZWZWZWZW() const {
355-
return swizzle(2, 3, 2, 3).combine(swizzle(2, 3, 2, 3));
356-
}
357-
# define xxyyzzww XXYYZZWW()
358-
VectorType<T, 8> XXYYZZWW() const {
359-
return swizzle(0, 0, 1, 1).combine(swizzle(2, 2, 3, 3));
360-
}
361-
# define xxxxyyyyzzzzwwww XXXXYYYYZZZZWWWW()
362-
VectorType<T, 16> XXXXYYYYZZZZWWWW() {
363-
return XXXXYYYY().combine(ZZZZWWWW());
299+
# ifdef HAS_BUILTIN_SHUFFLEVECTOR
300+
template <mask_index... INDEXES, int M = sizeof...(INDEXES)>
301+
VectorType<T, M> shuffle(VectorType b) const {
302+
return VectorType<T, M>::wrap(
303+
__builtin_shufflevector(data, b.data, INDEXES...));
364304
}
305+
306+
template <mask_index... INDEXES, int M = sizeof...(INDEXES)>
307+
VectorType<T, M> swizzle() const {
308+
return VectorType<T, M>::wrap(
309+
__builtin_shufflevector(data, data, INDEXES...));
310+
}
311+
# else
312+
template <mask_index... INDEXES, int M = sizeof...(INDEXES)>
313+
VectorType<T, M> shuffle(VectorType<T, M> b) const {
314+
return VectorType<T, M>::wrap(__builtin_shuffle(
315+
data, b.data, (typename VectorType<T, M>::mask_type){INDEXES...}));
316+
}
317+
318+
template <mask_index A, mask_index B, mask_index C, mask_index D,
319+
mask_index E, mask_index F, mask_index G, mask_index H>
320+
VectorType<T, 8> shuffle(VectorType<T, 4> b) const {
321+
return shuffle<A, B, C, D>(b).combine(shuffle<E, F, G, H>(b));
322+
}
323+
324+
template <mask_index A, mask_index B, mask_index C, mask_index D,
325+
mask_index E, mask_index F, mask_index G, mask_index H,
326+
mask_index I, mask_index J, mask_index K, mask_index L,
327+
mask_index W, mask_index X, mask_index Y, mask_index Z>
328+
VectorType<T, 16> shuffle(VectorType<T, 4> b) const {
329+
return shuffle<A, B, C, D, E, F, G, H>(b).combine(
330+
shuffle<I, J, K, L, W, X, Y, Z>(b));
331+
}
332+
333+
template <mask_index A, mask_index B, mask_index C, mask_index D,
334+
mask_index E, mask_index F, mask_index G, mask_index H,
335+
mask_index I, mask_index J, mask_index K, mask_index L,
336+
mask_index W, mask_index X, mask_index Y, mask_index Z>
337+
VectorType<T, 16> shuffle(VectorType<T, 8> b) const {
338+
return shuffle<A, B, C, D, E, F, G, H>(b).combine(
339+
shuffle<I, J, K, L, W, X, Y, Z>(b));
340+
}
341+
342+
template <mask_index... INDEXES, int M = sizeof...(INDEXES)>
343+
VectorType<T, M> swizzle() const {
344+
return shuffle<INDEXES...>(*this);
345+
}
346+
# endif
347+
348+
# define SWIZZLE(...) template swizzle<__VA_ARGS__>()
349+
350+
# define xxxx SWIZZLE(0, 0, 0, 0)
351+
# define yyyy SWIZZLE(1, 1, 1, 1)
352+
# define zzzz SWIZZLE(2, 2, 2, 2)
353+
# define wwww SWIZZLE(3, 3, 3, 3)
354+
# define xxyy SWIZZLE(0, 0, 1, 1)
355+
# define xxzz SWIZZLE(0, 0, 2, 2)
356+
# define yyww SWIZZLE(1, 1, 3, 3)
357+
# define zzww SWIZZLE(2, 2, 3, 3)
358+
# define xyxy SWIZZLE(0, 1, 0, 1)
359+
# define xzxz SWIZZLE(0, 2, 0, 2)
360+
# define ywyw SWIZZLE(1, 3, 1, 3)
361+
# define zwzw SWIZZLE(2, 3, 2, 3)
362+
# define zwxy SWIZZLE(2, 3, 0, 1)
363+
# define zyxw SWIZZLE(2, 1, 0, 3)
364+
# define xxyz SWIZZLE(0, 0, 1, 2)
365+
# define xyyz SWIZZLE(0, 1, 1, 2)
366+
# define xyzz SWIZZLE(0, 1, 2, 2)
367+
# define xzyw SWIZZLE(0, 2, 1, 3)
368+
# define yzwx SWIZZLE(1, 2, 3, 0)
369+
# define wxyz SWIZZLE(3, 0, 1, 2)
370+
# define wzyx SWIZZLE(3, 2, 1, 0)
371+
# define xxxxyyyy SWIZZLE(0, 0, 0, 0, 1, 1, 1, 1)
372+
# define zzzzwwww SWIZZLE(2, 2, 2, 2, 3, 3, 3, 3)
373+
# define xyzwxyzw SWIZZLE(0, 1, 2, 3, 0, 1, 2, 3)
374+
# define xyxyxyxy SWIZZLE(0, 1, 0, 1, 0, 1, 0, 1)
375+
# define zwzwzwzw SWIZZLE(2, 3, 2, 3, 2, 3, 2, 3)
376+
# define xxyyzzww SWIZZLE(0, 0, 1, 1, 2, 2, 3, 3)
377+
# define xxxxyyyyzzzzwwww \
378+
SWIZZLE(0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3)
365379
};
366380

367381
template <typename T>
@@ -388,7 +402,7 @@ struct VectorType<T, 2> {
388402
};
389403

390404
# define CONVERT(vector, type) ((type)(vector))
391-
# define SHUFFLE(a, b, ...) a.shuffle(b, __VA_ARGS__)
405+
# define SHUFFLE(a, b, ...) ((a).template shuffle<__VA_ARGS__>(b))
392406

393407
template <typename T, int N>
394408
SI VectorType<T, N * 2> combine(VectorType<T, N> a, VectorType<T, N> b) {
@@ -478,7 +492,6 @@ SI VectorType<T, 8> zip2High(VectorType<T, 8> a, VectorType<T, 8> b) {
478492
return SHUFFLE(a, b, 4, 5, 12, 13, 6, 7, 14, 15);
479493
}
480494

481-
#ifdef __clang__
482495
template <typename T>
483496
SI VectorType<T, 8> zip(VectorType<T, 4> a, VectorType<T, 4> b) {
484497
return SHUFFLE(a, b, 0, 4, 1, 5, 2, 6, 3, 7);
@@ -488,12 +501,6 @@ template <typename T>
488501
SI VectorType<T, 16> zip(VectorType<T, 8> a, VectorType<T, 8> b) {
489502
return SHUFFLE(a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15);
490503
}
491-
#else
492-
template <typename T, int N>
493-
SI VectorType<T, N * 2> zip(VectorType<T, N> a, VectorType<T, N> b) {
494-
return combine(zipLow(a, b), zipHigh(a, b));
495-
}
496-
#endif
497504

498505
template <typename T>
499506
struct Unaligned {

0 commit comments

Comments
 (0)