Skip to content

Commit 5b12946

Browse files
committed
[X86][SSE] Add support for extracting constant bit data from broadcasted constants
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288499 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a985f0f commit 5b12946

File tree

4 files changed

+57
-46
lines changed

4 files changed

+57
-46
lines changed

lib/Target/X86/X86ISelLowering.cpp

+44-24
Original file line numberDiff line numberDiff line change
@@ -5138,6 +5138,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
51385138
assert(UndefElts.empty() && "Expected an empty UndefElts vector");
51395139
assert(EltBits.empty() && "Expected an empty EltBits vector");
51405140

5141+
Op = peekThroughBitcasts(Op);
5142+
51415143
EVT VT = Op.getValueType();
51425144
unsigned SizeInBits = VT.getSizeInBits();
51435145
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
@@ -5170,35 +5172,35 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
51705172
return true;
51715173
};
51725174

5173-
// Extract constant bits from constant pool scalar/vector.
5175+
auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
5176+
APInt &Undefs) {
5177+
if (!Cst)
5178+
return false;
5179+
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
5180+
if (isa<UndefValue>(Cst)) {
5181+
Mask = APInt::getNullValue(SizeInBits);
5182+
Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
5183+
return true;
5184+
}
5185+
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5186+
Mask = CInt->getValue().zextOrTrunc(SizeInBits);
5187+
Undefs = APInt::getNullValue(SizeInBits);
5188+
return true;
5189+
}
5190+
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5191+
Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
5192+
Undefs = APInt::getNullValue(SizeInBits);
5193+
return true;
5194+
}
5195+
return false;
5196+
};
5197+
5198+
// Extract constant bits from constant pool vector.
51745199
if (auto *Cst = getTargetConstantFromNode(Op)) {
51755200
Type *CstTy = Cst->getType();
51765201
if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
51775202
return false;
51785203

5179-
auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
5180-
APInt &Undefs) {
5181-
if (!Cst)
5182-
return false;
5183-
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
5184-
if (isa<UndefValue>(Cst)) {
5185-
Mask = APInt::getNullValue(SizeInBits);
5186-
Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
5187-
return true;
5188-
}
5189-
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5190-
Mask = CInt->getValue().zextOrTrunc(SizeInBits);
5191-
Undefs = APInt::getNullValue(SizeInBits);
5192-
return true;
5193-
}
5194-
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5195-
Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
5196-
Undefs = APInt::getNullValue(SizeInBits);
5197-
return true;
5198-
}
5199-
return false;
5200-
};
5201-
52025204
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
52035205
for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) {
52045206
APInt Bits, Undefs;
@@ -5211,9 +5213,27 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
52115213
return SplitBitData();
52125214
}
52135215

5216+
// Extract constant bits from a broadcasted constant pool scalar.
5217+
if (Op.getOpcode() == X86ISD::VBROADCAST &&
5218+
EltSizeInBits <= Op.getScalarValueSizeInBits()) {
5219+
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5220+
APInt Bits, Undefs;
5221+
if (ExtractConstantBits(Broadcast, Bits, Undefs)) {
5222+
unsigned NumBroadcastBits = Op.getScalarValueSizeInBits();
5223+
unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits;
5224+
for (unsigned i = 0; i != NumBroadcastElts; ++i) {
5225+
MaskBits |= Bits.shl(i * NumBroadcastBits);
5226+
UndefBits |= Undefs.shl(i * NumBroadcastBits);
5227+
}
5228+
return SplitBitData();
5229+
}
5230+
}
5231+
}
5232+
52145233
return false;
52155234
}
52165235

5236+
// TODO: Merge more of this with getTargetConstantBitsFromNode.
52175237
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
52185238
unsigned MaskEltSizeInBits,
52195239
SmallVectorImpl<uint64_t> &RawMask) {

test/CodeGen/X86/clear_upper_vector_element_bits.ll

+5-11
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,11 @@ define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind {
4141
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
4242
; SSE-NEXT: retq
4343
;
44-
; AVX1-LABEL: _clearupper4xi32a:
45-
; AVX1: # BB#0:
46-
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
47-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
48-
; AVX1-NEXT: retq
49-
;
50-
; AVX2-LABEL: _clearupper4xi32a:
51-
; AVX2: # BB#0:
52-
; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
53-
; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
54-
; AVX2-NEXT: retq
44+
; AVX-LABEL: _clearupper4xi32a:
45+
; AVX: # BB#0:
46+
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
47+
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
48+
; AVX-NEXT: retq
5549
%x0 = extractelement <4 x i32> %0, i32 0
5650
%x1 = extractelement <4 x i32> %0, i32 1
5751
%x2 = extractelement <4 x i32> %0, i32 2

test/CodeGen/X86/vec_int_to_fp.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,8 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
590590
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
591591
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
592592
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
593-
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
594-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
593+
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
594+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
595595
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
596596
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
597597
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
@@ -889,8 +889,8 @@ define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
889889
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
890890
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
891891
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
892-
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
893-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
892+
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
893+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
894894
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
895895
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
896896
; AVX2-NEXT: retq
@@ -3274,8 +3274,8 @@ define <4 x double> @uitofp_load_4i32_to_4f64(<4 x i32> *%a) {
32743274
; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1
32753275
; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2
32763276
; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1
3277-
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
3278-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
3277+
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
3278+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
32793279
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
32803280
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
32813281
; AVX2-NEXT: retq

test/CodeGen/X86/vec_uint_to_fp-fastmath.ll

+2-5
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@
2626
; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
2727
; AVX2-NEXT: .long 1199570944 # float 65536
2828

29-
; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
30-
; AVX2-NEXT: .long 65535 # 0xffff
31-
3229
define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
3330
; SSE2-LABEL: test_uitofp_v4i32_to_v4f32:
3431
; SSE2: # BB#0:
@@ -69,8 +66,8 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
6966
; AVX2-NEXT: vcvtdq2ps %xmm1, %xmm1
7067
; AVX2-NEXT: vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2
7168
; AVX2-NEXT: vmulps %xmm2, %xmm1, %xmm1
72-
; AVX2-NEXT: vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2
73-
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
69+
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
70+
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
7471
; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
7572
; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
7673
; AVX2-NEXT: retq

0 commit comments

Comments
 (0)