@@ -94,6 +94,15 @@ struct VectorMask<float> {
94
94
typedef int type;
95
95
};
96
96
97
+ # ifdef __has_builtin
98
+ # if __has_builtin(__builtin_convertvector)
99
+ # define HAS_BUILTIN_CONVERTVECTOR
100
+ # endif
101
+ # if __has_builtin(__builtin_shufflevector)
102
+ # define HAS_BUILTIN_SHUFFLEVECTOR
103
+ # endif
104
+ # endif
105
+
97
106
template <typename T, int N>
98
107
struct VectorType {
99
108
enum { SIZE = N };
@@ -136,6 +145,13 @@ struct VectorType {
136
145
T& operator [](size_t i) { return elements[i]; }
137
146
T operator [](size_t i) const { return elements[i]; }
138
147
148
+ # ifdef HAS_BUILTIN_CONVERTVECTOR
149
+ template <typename U, int M>
150
+ operator VectorType<U, M>() const {
151
+ return VectorType<U, M>::wrap (
152
+ __builtin_convertvector (data, typename VectorType<U, M>::data_type));
153
+ }
154
+ # else
139
155
template <typename U>
140
156
operator VectorType<U, 2 >() const {
141
157
return VectorType<U, 2 >::wrap (
@@ -173,6 +189,7 @@ struct VectorType {
173
189
U (elements[15 ]),
174
190
});
175
191
}
192
+ # endif
176
193
177
194
VectorType operator -() const { return wrap (-data); }
178
195
VectorType operator ~() const { return wrap (~data); }
@@ -268,42 +285,6 @@ struct VectorType {
268
285
return *this ;
269
286
}
270
287
271
- VectorType<T, 4 > shuffle (VectorType b, mask_index x, mask_index y,
272
- mask_index z, mask_index w) const {
273
- return VectorType<T, 4 >::wrap (__builtin_shuffle (
274
- data, b.data , (typename VectorType<T, 4 >::mask_type){x, y, z, w}));
275
- }
276
- VectorType<T, 8 > shuffle (VectorType b, mask_index x, mask_index y,
277
- mask_index z, mask_index w, mask_index s,
278
- mask_index t, mask_index u, mask_index v) const {
279
- return VectorType<T, 8 >::wrap (__builtin_shuffle (
280
- data, b.data ,
281
- (typename VectorType<T, 8 >::mask_type){x, y, z, w, s, t, u, v}));
282
- }
283
- VectorType<T, 16 > shuffle (VectorType b, mask_index x, mask_index y,
284
- mask_index z, mask_index w, mask_index s,
285
- mask_index t, mask_index u, mask_index v,
286
- mask_index i, mask_index j, mask_index k,
287
- mask_index l, mask_index m, mask_index n,
288
- mask_index o, mask_index p) const {
289
- return VectorType<T, 16 >::wrap (
290
- __builtin_shuffle (data, b.data ,
291
- (typename VectorType<T, 16 >::mask_type){
292
- x, y, z, w, s, t, u, v, i, j, k, l, m, n, o, p}));
293
- }
294
-
295
- VectorType<T, 4 > swizzle (mask_index x, mask_index y, mask_index z,
296
- mask_index w) const {
297
- return VectorType<T, 4 >::wrap (__builtin_shuffle (
298
- data, (typename VectorType<T, 4 >::mask_type){x, y, z, w}));
299
- }
300
- VectorType<T, 8 > swizzle (mask_index x, mask_index y, mask_index z,
301
- mask_index w, mask_index s, mask_index t,
302
- mask_index u, mask_index v) const {
303
- return VectorType<T, 8 >::wrap (__builtin_shuffle (
304
- data, (typename VectorType<T, 8 >::mask_type){x, y, z, w, s, t, u, v}));
305
- }
306
-
307
288
SI VectorType wrap (half_type low, half_type high) {
308
289
VectorType v;
309
290
v.low_half = low;
@@ -315,53 +296,86 @@ struct VectorType {
315
296
return VectorType<T, N * 2 >::wrap (data, high.data );
316
297
}
317
298
318
- # define xxxx swizzle (0 , 0 , 0 , 0 )
319
- # define yyyy swizzle (1 , 1 , 1 , 1 )
320
- # define zzzz swizzle (2 , 2 , 2 , 2 )
321
- # define wwww swizzle (3 , 3 , 3 , 3 )
322
- # define xxyy swizzle (0 , 0 , 1 , 1 )
323
- # define xxzz swizzle (0 , 0 , 2 , 2 )
324
- # define yyww swizzle (1 , 1 , 3 , 3 )
325
- # define zzww swizzle (2 , 2 , 3 , 3 )
326
- # define xyxy swizzle (0 , 1 , 0 , 1 )
327
- # define xzxz swizzle (0 , 2 , 0 , 2 )
328
- # define ywyw swizzle (1 , 3 , 1 , 3 )
329
- # define zwzw swizzle (2 , 3 , 2 , 3 )
330
- # define zwxy swizzle (2 , 3 , 0 , 1 )
331
- # define zyxw swizzle (2 , 1 , 0 , 3 )
332
- # define xxyz swizzle (0 , 0 , 1 , 2 )
333
- # define xyyz swizzle (0 , 1 , 1 , 2 )
334
- # define xyzz swizzle (0 , 1 , 2 , 2 )
335
- # define xzyw swizzle (0 , 2 , 1 , 3 )
336
- # define yzwx swizzle (1 , 2 , 3 , 0 )
337
- # define wxyz swizzle (3 , 0 , 1 , 2 )
338
- # define wzyx swizzle (3 , 2 , 1 , 0 )
339
- # define xxxxyyyy XXXXYYYY ()
340
- VectorType<T, 8> XXXXYYYY() const {
341
- return swizzle (0 , 0 , 0 , 0 ).combine (swizzle (1 , 1 , 1 , 1 ));
342
- }
343
- # define zzzzwwww ZZZZWWWW ()
344
- VectorType<T, 8> ZZZZWWWW() const {
345
- return swizzle (2 , 2 , 2 , 2 ).combine (swizzle (3 , 3 , 3 , 3 ));
346
- }
347
- # define xyzwxyzw XYZWXYZW ()
348
- VectorType<T, 8> XYZWXYZW() const { return combine (*this ); }
349
- # define xyxyxyxy XYXYXYXY ()
350
- VectorType<T, 8> XYXYXYXY() const {
351
- return swizzle (0 , 1 , 0 , 1 ).combine (swizzle (0 , 1 , 0 , 1 ));
352
- }
353
- # define zwzwzwzw ZWZWZWZW ()
354
- VectorType<T, 8> ZWZWZWZW() const {
355
- return swizzle (2 , 3 , 2 , 3 ).combine (swizzle (2 , 3 , 2 , 3 ));
356
- }
357
- # define xxyyzzww XXYYZZWW ()
358
- VectorType<T, 8> XXYYZZWW() const {
359
- return swizzle (0 , 0 , 1 , 1 ).combine (swizzle (2 , 2 , 3 , 3 ));
360
- }
361
- # define xxxxyyyyzzzzwwww XXXXYYYYZZZZWWWW ()
362
- VectorType<T, 16> XXXXYYYYZZZZWWWW() {
363
- return XXXXYYYY ().combine (ZZZZWWWW ());
299
+ # ifdef HAS_BUILTIN_SHUFFLEVECTOR
300
+ template <mask_index... INDEXES, int M = sizeof ...(INDEXES)>
301
+ VectorType<T, M> shuffle (VectorType b) const {
302
+ return VectorType<T, M>::wrap (
303
+ __builtin_shufflevector (data, b.data , INDEXES...));
364
304
}
305
+
306
+ template <mask_index... INDEXES, int M = sizeof ...(INDEXES)>
307
+ VectorType<T, M> swizzle () const {
308
+ return VectorType<T, M>::wrap (
309
+ __builtin_shufflevector (data, data, INDEXES...));
310
+ }
311
+ # else
312
+ template <mask_index... INDEXES, int M = sizeof ...(INDEXES)>
313
+ VectorType<T, M> shuffle (VectorType<T, M> b) const {
314
+ return VectorType<T, M>::wrap (__builtin_shuffle (
315
+ data, b.data , (typename VectorType<T, M>::mask_type){INDEXES...}));
316
+ }
317
+
318
+ template <mask_index A, mask_index B, mask_index C, mask_index D,
319
+ mask_index E, mask_index F, mask_index G, mask_index H>
320
+ VectorType<T, 8 > shuffle (VectorType<T, 4 > b) const {
321
+ return shuffle<A, B, C, D>(b).combine (shuffle<E, F, G, H>(b));
322
+ }
323
+
324
+ template <mask_index A, mask_index B, mask_index C, mask_index D,
325
+ mask_index E, mask_index F, mask_index G, mask_index H,
326
+ mask_index I, mask_index J, mask_index K, mask_index L,
327
+ mask_index W, mask_index X, mask_index Y, mask_index Z>
328
+ VectorType<T, 16 > shuffle (VectorType<T, 4 > b) const {
329
+ return shuffle<A, B, C, D, E, F, G, H>(b).combine (
330
+ shuffle<I, J, K, L, W, X, Y, Z>(b));
331
+ }
332
+
333
+ template <mask_index A, mask_index B, mask_index C, mask_index D,
334
+ mask_index E, mask_index F, mask_index G, mask_index H,
335
+ mask_index I, mask_index J, mask_index K, mask_index L,
336
+ mask_index W, mask_index X, mask_index Y, mask_index Z>
337
+ VectorType<T, 16 > shuffle (VectorType<T, 8 > b) const {
338
+ return shuffle<A, B, C, D, E, F, G, H>(b).combine (
339
+ shuffle<I, J, K, L, W, X, Y, Z>(b));
340
+ }
341
+
342
+ template <mask_index... INDEXES, int M = sizeof ...(INDEXES)>
343
+ VectorType<T, M> swizzle () const {
344
+ return shuffle<INDEXES...>(*this );
345
+ }
346
+ # endif
347
+
348
+ # define SWIZZLE (...) template swizzle<__VA_ARGS__>()
349
+
350
+ # define xxxx SWIZZLE (0 , 0 , 0 , 0 )
351
+ # define yyyy SWIZZLE (1 , 1 , 1 , 1 )
352
+ # define zzzz SWIZZLE (2 , 2 , 2 , 2 )
353
+ # define wwww SWIZZLE (3 , 3 , 3 , 3 )
354
+ # define xxyy SWIZZLE (0 , 0 , 1 , 1 )
355
+ # define xxzz SWIZZLE (0 , 0 , 2 , 2 )
356
+ # define yyww SWIZZLE (1 , 1 , 3 , 3 )
357
+ # define zzww SWIZZLE (2 , 2 , 3 , 3 )
358
+ # define xyxy SWIZZLE (0 , 1 , 0 , 1 )
359
+ # define xzxz SWIZZLE (0 , 2 , 0 , 2 )
360
+ # define ywyw SWIZZLE (1 , 3 , 1 , 3 )
361
+ # define zwzw SWIZZLE (2 , 3 , 2 , 3 )
362
+ # define zwxy SWIZZLE (2 , 3 , 0 , 1 )
363
+ # define zyxw SWIZZLE (2 , 1 , 0 , 3 )
364
+ # define xxyz SWIZZLE (0 , 0 , 1 , 2 )
365
+ # define xyyz SWIZZLE (0 , 1 , 1 , 2 )
366
+ # define xyzz SWIZZLE (0 , 1 , 2 , 2 )
367
+ # define xzyw SWIZZLE (0 , 2 , 1 , 3 )
368
+ # define yzwx SWIZZLE (1 , 2 , 3 , 0 )
369
+ # define wxyz SWIZZLE (3 , 0 , 1 , 2 )
370
+ # define wzyx SWIZZLE (3 , 2 , 1 , 0 )
371
+ # define xxxxyyyy SWIZZLE (0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 )
372
+ # define zzzzwwww SWIZZLE (2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 )
373
+ # define xyzwxyzw SWIZZLE (0 , 1 , 2 , 3 , 0 , 1 , 2 , 3 )
374
+ # define xyxyxyxy SWIZZLE (0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 )
375
+ # define zwzwzwzw SWIZZLE (2 , 3 , 2 , 3 , 2 , 3 , 2 , 3 )
376
+ # define xxyyzzww SWIZZLE (0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 )
377
+ # define xxxxyyyyzzzzwwww \
378
+ SWIZZLE (0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 )
365
379
};
366
380
367
381
template <typename T>
@@ -388,7 +402,7 @@ struct VectorType<T, 2> {
388
402
};
389
403
390
404
# define CONVERT (vector, type ) ((type)(vector))
391
- # define SHUFFLE (a, b, ...) a. shuffle(b, __VA_ARGS__ )
405
+ # define SHUFFLE (a, b, ...) ((a). template shuffle<__VA_ARGS__>(b) )
392
406
393
407
template <typename T, int N>
394
408
SI VectorType<T, N * 2 > combine (VectorType<T, N> a, VectorType<T, N> b) {
@@ -478,7 +492,6 @@ SI VectorType<T, 8> zip2High(VectorType<T, 8> a, VectorType<T, 8> b) {
478
492
return SHUFFLE (a, b, 4 , 5 , 12 , 13 , 6 , 7 , 14 , 15 );
479
493
}
480
494
481
- #ifdef __clang__
482
495
template <typename T>
483
496
SI VectorType<T, 8 > zip (VectorType<T, 4 > a, VectorType<T, 4 > b) {
484
497
return SHUFFLE (a, b, 0 , 4 , 1 , 5 , 2 , 6 , 3 , 7 );
@@ -488,12 +501,6 @@ template <typename T>
488
501
SI VectorType<T, 16 > zip (VectorType<T, 8 > a, VectorType<T, 8 > b) {
489
502
return SHUFFLE (a, b, 0 , 8 , 1 , 9 , 2 , 10 , 3 , 11 , 4 , 12 , 5 , 13 , 6 , 14 , 7 , 15 );
490
503
}
491
- #else
492
- template <typename T, int N>
493
- SI VectorType<T, N * 2 > zip (VectorType<T, N> a, VectorType<T, N> b) {
494
- return combine (zipLow (a, b), zipHigh (a, b));
495
- }
496
- #endif
497
504
498
505
template <typename T>
499
506
struct Unaligned {
0 commit comments