-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] SimplifyDemandedBitsForTargetNode - add X86ISD::BLENDI handling #133102
Conversation
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesFull diff: https://github.com/llvm/llvm-project/pull/133102.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b128a6dadbbb6..72977923bac2b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44227,6 +44227,35 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
return false;
}
+ case X86ISD::BLENDI: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt Mask = getBLENDIBlendMask(Op);
+
+ APInt DemandedEltsLHS = OriginalDemandedElts & ~Mask;
+ if (SimplifyDemandedBits(LHS, OriginalDemandedBits, DemandedEltsLHS, Known,
+ TLO, Depth + 1))
+ return true;
+
+ APInt DemandedEltsRHS = OriginalDemandedElts & Mask;
+ if (SimplifyDemandedBits(RHS, OriginalDemandedBits, DemandedEltsRHS, Known,
+ TLO, Depth + 1))
+ return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+ LHS, OriginalDemandedBits, DemandedEltsLHS, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+ RHS, OriginalDemandedBits, DemandedEltsRHS, TLO.DAG, Depth + 1);
+ if (NewLHS || NewRHS) {
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT,
+ NewLHS, NewRHS, Op.getOperand(2)));
+ }
+ break;
+ }
case X86ISD::BLENDV: {
SDValue Sel = Op.getOperand(0);
SDValue LHS = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 6da4102bc2ecd..303873599ca52 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -75,39 +75,15 @@ define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) {
}
;
-; TODO - Avoid sign extension ops when just extracting the sign bits.
+; Avoid sign extension ops when just extracting the sign bits.
;
define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
-; VTEST-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX1: # %bb.0:
-; VTEST-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VTEST-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
-; VTEST-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; VTEST-AVX1-NEXT: vmovmskpd %ymm0, %eax
-; VTEST-AVX1-NEXT: vzeroupper
-; VTEST-AVX1-NEXT: retq
-;
-; VTEST-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX2: # %bb.0:
-; VTEST-AVX2-NEXT: vmovmskpd %ymm0, %eax
-; VTEST-AVX2-NEXT: vzeroupper
-; VTEST-AVX2-NEXT: retq
-;
-; MOVMSK-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX1: # %bb.0:
-; MOVMSK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; MOVMSK-AVX1-NEXT: vmovmskpd %ymm0, %eax
-; MOVMSK-AVX1-NEXT: vzeroupper
-; MOVMSK-AVX1-NEXT: retq
-;
-; MOVMSK-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX2: # %bb.0:
-; MOVMSK-AVX2-NEXT: vmovmskpd %ymm0, %eax
-; MOVMSK-AVX2-NEXT: vzeroupper
-; MOVMSK-AVX2-NEXT: retq
+; CHECK-LABEL: movmskpd_cmpgt_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovmskpd %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%1 = icmp sgt <4 x i64> zeroinitializer, %a0
%2 = sext <4 x i1> %1 to <4 x i64>
%3 = bitcast <4 x i64> %2 to <4 x double>
@@ -116,33 +92,11 @@ define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
}
define i32 @movmskps_ashr_v8i32(<8 x i32> %a0) {
-; VTEST-AVX1-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX1: # %bb.0:
-; VTEST-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
-; VTEST-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; VTEST-AVX1-NEXT: vmovmskps %ymm0, %eax
-; VTEST-AVX1-NEXT: vzeroupper
-; VTEST-AVX1-NEXT: retq
-;
-; VTEST-AVX2-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX2: # %bb.0:
-; VTEST-AVX2-NEXT: vmovmskps %ymm0, %eax
-; VTEST-AVX2-NEXT: vzeroupper
-; VTEST-AVX2-NEXT: retq
-;
-; MOVMSK-AVX1-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX1: # %bb.0:
-; MOVMSK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
-; MOVMSK-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; MOVMSK-AVX1-NEXT: vmovmskps %ymm0, %eax
-; MOVMSK-AVX1-NEXT: vzeroupper
-; MOVMSK-AVX1-NEXT: retq
-;
-; MOVMSK-AVX2-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX2: # %bb.0:
-; MOVMSK-AVX2-NEXT: vmovmskps %ymm0, %eax
-; MOVMSK-AVX2-NEXT: vzeroupper
-; MOVMSK-AVX2-NEXT: retq
+; CHECK-LABEL: movmskps_ashr_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovmskps %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
%2 = bitcast <8 x i32> %1 to <8 x float>
%3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 42f09d04da26e..1ae1d61091362 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2187,15 +2187,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pcmpgtb %xmm1, %xmm3
-; SSE41-NEXT: pxor %xmm4, %xmm4
-; SSE41-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT: paddw %xmm2, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5],xmm2[6],xmm4[7]
-; SSE41-NEXT: psrlw $8, %xmm2
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128]
; SSE41-NEXT: psrlw $8, %xmm3
+; SSE41-NEXT: paddw %xmm4, %xmm4
+; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4,5],xmm4[6],xmm2[7]
+; SSE41-NEXT: psrlw $8, %xmm2
; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: paddb %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -2223,15 +2222,15 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT: vpaddw %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4,5],xmm4[6],xmm3[7]
-; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128]
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpmovsxbw %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index c0c93646b5aaf..1a5c3730c1839 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -273,8 +273,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm4
-; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm4
; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187]
@@ -711,8 +710,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
; AVX1-NEXT: vpor %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm5
-; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm5
; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4
; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187]
|
; CHECK: # %bb.0: | ||
; CHECK-NEXT: vmovmskpd %ymm0, %eax | ||
; CHECK-NEXT: vzeroupper | ||
; CHECK-NEXT: retq | ||
%1 = icmp sgt <4 x i64> zeroinitializer, %a0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sgt or sge?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sgt - we're trying to splat the sign mask
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/18975 Here is the relevant piece of the build log for the reference
|
No description provided.