-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][AVX10.2] Remove YMM rounding from VCVTTP.*QS #132414
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-ir Author: Phoebe Wang (phoebewang) ChangesRef: https://cdrdv2.intel.com/v1/dl/getContent/784343 Patch is 100.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132414.diff 16 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ea0d6df4a33c2..583f4534dfab2 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4615,7 +4615,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2dqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+ def vcvttpd2dqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4627,7 +4627,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2udqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+ def vcvttpd2udqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4639,7 +4639,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttpd2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4651,7 +4651,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttpd2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4663,7 +4663,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2dqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+ def vcvttps2dqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4675,7 +4675,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2udqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+ def vcvttps2udqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4687,7 +4687,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttps2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4699,7 +4699,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttps2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 9dbfed42667ef..6509a4ebf9c77 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -92,37 +92,22 @@ _mm_maskz_cvtts_pd_epi32(__mmask16 __U, __m128d __A) {
// 256 Bit : Double -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epi32(__m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)__W, __U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epi32(__mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U));
}
-#define _mm256_cvtts_roundpd_epi32(__A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epu32(__m128d __A) {
@@ -145,37 +130,22 @@ _mm_maskz_cvtts_pd_epu32(__mmask8 __U, __m128d __A) {
// 256 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epu32(__m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)__W, __U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epu32(__mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U));
}
-#define _mm256_cvtts_roundpd_epu32(__A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : Double -> long
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epi64(__m128d __A) {
@@ -198,37 +168,22 @@ _mm_maskz_cvtts_pd_epi64(__mmask8 __U, __m128d __A) {
// 256 Bit : Double -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epi64(__m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epi64(__mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundpd_epi64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \
- (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
-
// 128 Bit : Double -> ulong
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epu64(__m128d __A) {
@@ -252,37 +207,22 @@ _mm_maskz_cvtts_pd_epu64(__mmask8 __U, __m128d __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epu64(__m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epu64(__mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundpd_epu64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
-
// 128 Bit : float -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
@@ -304,38 +244,22 @@ _mm_maskz_cvtts_ps_epi32(__mmask8 __U, __m128 __A) {
// 256 Bit : float -> int
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epi32(__m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epi32(__mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epi32(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : float -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
@@ -358,38 +282,22 @@ _mm_maskz_cvtts_ps_epu32(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epu32(__m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epu32(__mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epu32(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 bit : float -> long
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
@@ -411,37 +319,21 @@ _mm_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epi64(__m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epi64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
- (int)__R))
-
// 128 bit : float -> ulong
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
@@ -463,38 +355,22 @@ _mm_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epu64(__m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ...
[truncated]
|
@llvm/pr-subscribers-mc Author: Phoebe Wang (phoebewang) ChangesRef: https://cdrdv2.intel.com/v1/dl/getContent/784343 Patch is 100.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132414.diff 16 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index ea0d6df4a33c2..583f4534dfab2 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4615,7 +4615,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2dqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+ def vcvttpd2dqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4627,7 +4627,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2udqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
+ def vcvttpd2udqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4639,7 +4639,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttpd2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4651,7 +4651,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttpd2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttpd2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4663,7 +4663,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2dqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+ def vcvttps2dqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4675,7 +4675,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2udqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
+ def vcvttps2udqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4687,7 +4687,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttps2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4699,7 +4699,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
}
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
- def vcvttps2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
+ def vcvttps2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
}
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
diff --git a/clang/lib/Headers/avx10_2satcvtdsintrin.h b/clang/lib/Headers/avx10_2satcvtdsintrin.h
index 9dbfed42667ef..6509a4ebf9c77 100644
--- a/clang/lib/Headers/avx10_2satcvtdsintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtdsintrin.h
@@ -92,37 +92,22 @@ _mm_maskz_cvtts_pd_epi32(__mmask16 __U, __m128d __A) {
// 256 Bit : Double -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epi32(__m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)__W, __U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epi32(__mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2dqs256_mask(
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U));
}
-#define _mm256_cvtts_roundpd_epi32(__A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epu32(__m128d __A) {
@@ -145,37 +130,22 @@ _mm_maskz_cvtts_pd_epu32(__mmask8 __U, __m128d __A) {
// 256 Bit : Double -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epu32(__m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)__W, __U));
}
static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epu32(__mmask8 __U, __m256d __A) {
- return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask(
- (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m128i)__builtin_ia32_vcvttpd2udqs256_mask(
+ (__v4df)__A, (__v4si)_mm_setzero_si128(), __U));
}
-#define _mm256_cvtts_roundpd_epu32(__A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
- ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
- (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : Double -> long
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epi64(__m128d __A) {
@@ -198,37 +168,22 @@ _mm_maskz_cvtts_pd_epi64(__mmask8 __U, __m128d __A) {
// 256 Bit : Double -> long
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epi64(__m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epi64(__mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2qqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundpd_epi64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \
- (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
-
// 128 Bit : Double -> ulong
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtts_pd_epu64(__m128d __A) {
@@ -252,37 +207,22 @@ _mm_maskz_cvtts_pd_epu64(__mmask8 __U, __m128d __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_pd_epu64(__m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_pd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_pd_epu64(__mmask8 __U, __m256d __A) {
- return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask(
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_mask(
+ (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundpd_epu64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
- (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
-
// 128 Bit : float -> int
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask(
@@ -304,38 +244,22 @@ _mm_maskz_cvtts_ps_epi32(__mmask8 __U, __m128 __A) {
// 256 Bit : float -> int
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epi32(__m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epi32(__mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2dqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epi32(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 Bit : float -> uint
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu32(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask(
@@ -358,38 +282,22 @@ _mm_maskz_cvtts_ps_epu32(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epu32(__m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epu32(__mmask8 __U, __m256 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask(
- (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2udqs256_mask(
+ (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epu32(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
- (__mmask8) - 1, (int)(__R)))
-
-#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
-
-#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
- (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
- (__mmask8)__U, (int)(__R)))
-
// 128 bit : float -> long
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epi64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask(
@@ -411,37 +319,21 @@ _mm_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epi64(__m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)__W, __U));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtts_ps_epi64(__mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2qqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U));
}
-#define _mm256_cvtts_roundps_epi64(__A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
- (int)__R))
-
-#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
-
-#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \
- ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
- (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
- (int)__R))
-
// 128 bit : float -> ulong
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtts_ps_epu64(__m128 __A) {
return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask(
@@ -463,38 +355,22 @@ _mm_maskz_cvtts_ps_epu64(__mmask8 __U, __m128 __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvtts_ps_epu64(__m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1,
- _MM_FROUND_CUR_DIRECTION));
+ return ((__m256i)__builtin_ia32_vcvttps2uqqs256_mask(
+ (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1));
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtts_ps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
- return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask(
- (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION));
+ return ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the clang-format warning fix on the header
Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343