Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86] SimplifyDemandedBitsForTargetNode - add X86ISD::BLENDI handling #133102

Merged
merged 1 commit into from
Mar 27, 2025

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Mar 26, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Mar 26, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/133102.diff

4 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+29)
  • (modified) llvm/test/CodeGen/X86/combine-movmsk-avx.ll (+11-57)
  • (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+11-12)
  • (modified) llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll (+2-4)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b128a6dadbbb6..72977923bac2b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44227,6 +44227,35 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
     }
     return false;
   }
+  case X86ISD::BLENDI: {
+    SDValue LHS = Op.getOperand(0);
+    SDValue RHS = Op.getOperand(1);
+    APInt Mask = getBLENDIBlendMask(Op);
+
+    APInt DemandedEltsLHS = OriginalDemandedElts & ~Mask;
+    if (SimplifyDemandedBits(LHS, OriginalDemandedBits, DemandedEltsLHS, Known,
+                             TLO, Depth + 1))
+      return true;
+
+    APInt DemandedEltsRHS = OriginalDemandedElts & Mask;
+    if (SimplifyDemandedBits(RHS, OriginalDemandedBits, DemandedEltsRHS, Known,
+                             TLO, Depth + 1))
+      return true;
+
+    // Attempt to avoid multi-use ops if we don't need anything from them.
+    SDValue NewLHS = SimplifyMultipleUseDemandedBits(
+        LHS, OriginalDemandedBits, DemandedEltsLHS, TLO.DAG, Depth + 1);
+    SDValue NewRHS = SimplifyMultipleUseDemandedBits(
+        RHS, OriginalDemandedBits, DemandedEltsRHS, TLO.DAG, Depth + 1);
+    if (NewLHS || NewRHS) {
+      NewLHS = NewLHS ? NewLHS : LHS;
+      NewRHS = NewRHS ? NewRHS : RHS;
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT,
+                                           NewLHS, NewRHS, Op.getOperand(2)));
+    }
+    break;
+  }
   case X86ISD::BLENDV: {
     SDValue Sel = Op.getOperand(0);
     SDValue LHS = Op.getOperand(1);
diff --git a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
index 6da4102bc2ecd..303873599ca52 100644
--- a/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
+++ b/llvm/test/CodeGen/X86/combine-movmsk-avx.ll
@@ -75,39 +75,15 @@ define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) {
 }
 
 ;
-; TODO - Avoid sign extension ops when just extracting the sign bits.
+; Avoid sign extension ops when just extracting the sign bits.
 ;
 
 define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
-; VTEST-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX1:       # %bb.0:
-; VTEST-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VTEST-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm1
-; VTEST-AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; VTEST-AVX1-NEXT:    vmovmskpd %ymm0, %eax
-; VTEST-AVX1-NEXT:    vzeroupper
-; VTEST-AVX1-NEXT:    retq
-;
-; VTEST-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; VTEST-AVX2:       # %bb.0:
-; VTEST-AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; VTEST-AVX2-NEXT:    vzeroupper
-; VTEST-AVX2-NEXT:    retq
-;
-; MOVMSK-AVX1-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX1:       # %bb.0:
-; MOVMSK-AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm1
-; MOVMSK-AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; MOVMSK-AVX1-NEXT:    vmovmskpd %ymm0, %eax
-; MOVMSK-AVX1-NEXT:    vzeroupper
-; MOVMSK-AVX1-NEXT:    retq
-;
-; MOVMSK-AVX2-LABEL: movmskpd_cmpgt_v4i64:
-; MOVMSK-AVX2:       # %bb.0:
-; MOVMSK-AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; MOVMSK-AVX2-NEXT:    vzeroupper
-; MOVMSK-AVX2-NEXT:    retq
+; CHECK-LABEL: movmskpd_cmpgt_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovmskpd %ymm0, %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %1 = icmp sgt <4 x i64> zeroinitializer, %a0
   %2 = sext <4 x i1> %1 to <4 x i64>
   %3 = bitcast <4 x i64> %2 to <4 x double>
@@ -116,33 +92,11 @@ define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
 }
 
 define i32 @movmskps_ashr_v8i32(<8 x i32> %a0)  {
-; VTEST-AVX1-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX1:       # %bb.0:
-; VTEST-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
-; VTEST-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; VTEST-AVX1-NEXT:    vmovmskps %ymm0, %eax
-; VTEST-AVX1-NEXT:    vzeroupper
-; VTEST-AVX1-NEXT:    retq
-;
-; VTEST-AVX2-LABEL: movmskps_ashr_v8i32:
-; VTEST-AVX2:       # %bb.0:
-; VTEST-AVX2-NEXT:    vmovmskps %ymm0, %eax
-; VTEST-AVX2-NEXT:    vzeroupper
-; VTEST-AVX2-NEXT:    retq
-;
-; MOVMSK-AVX1-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX1:       # %bb.0:
-; MOVMSK-AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
-; MOVMSK-AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; MOVMSK-AVX1-NEXT:    vmovmskps %ymm0, %eax
-; MOVMSK-AVX1-NEXT:    vzeroupper
-; MOVMSK-AVX1-NEXT:    retq
-;
-; MOVMSK-AVX2-LABEL: movmskps_ashr_v8i32:
-; MOVMSK-AVX2:       # %bb.0:
-; MOVMSK-AVX2-NEXT:    vmovmskps %ymm0, %eax
-; MOVMSK-AVX2-NEXT:    vzeroupper
-; MOVMSK-AVX2-NEXT:    retq
+; CHECK-LABEL: movmskps_ashr_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovmskps %ymm0, %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
   %2 = bitcast <8 x i32> %1 to <8 x float>
   %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll
index 42f09d04da26e..1ae1d61091362 100644
--- a/llvm/test/CodeGen/X86/combine-sdiv.ll
+++ b/llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2187,15 +2187,14 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
 ; SSE41-NEXT:    pxor %xmm0, %xmm0
 ; SSE41-NEXT:    pxor %xmm3, %xmm3
 ; SSE41-NEXT:    pcmpgtb %xmm1, %xmm3
-; SSE41-NEXT:    pxor %xmm4, %xmm4
-; SSE41-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT:    paddw %xmm2, %xmm2
-; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5],xmm2[6],xmm4[7]
-; SSE41-NEXT:    psrlw $8, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
 ; SSE41-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
 ; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128]
 ; SSE41-NEXT:    psrlw $8, %xmm3
+; SSE41-NEXT:    paddw %xmm4, %xmm4
+; SSE41-NEXT:    pmovsxbw %xmm1, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2],xmm2[3,4,5],xmm4[6],xmm2[7]
+; SSE41-NEXT:    psrlw $8, %xmm2
 ; SSE41-NEXT:    packuswb %xmm3, %xmm2
 ; SSE41-NEXT:    paddb %xmm1, %xmm2
 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
@@ -2223,15 +2222,15 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm2
-; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4,5],xmm4[6],xmm3[7]
-; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
 ; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128]
 ; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2],xmm3[3,4,5],xmm2[6],xmm3[7]
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm1
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; AVX1-NEXT:    vpsraw $8, %xmm2, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
index c0c93646b5aaf..1a5c3730c1839 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
@@ -273,8 +273,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
 ; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm4
-; AVX1-NEXT:    vpsllw $7, %xmm4, %xmm4
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm4
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7]
 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
 ; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187]
@@ -711,8 +710,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind {
 ; AVX1-NEXT:    vpor %xmm4, %xmm5, %xmm4
 ; AVX1-NEXT:    vpsubb %xmm4, %xmm2, %xmm2
 ; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm5
-; AVX1-NEXT:    vpsllw $7, %xmm5, %xmm5
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm5
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
 ; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
 ; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187]

; CHECK: # %bb.0:
; CHECK-NEXT: vmovmskpd %ymm0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%1 = icmp sgt <4 x i64> zeroinitializer, %a0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgt or sge?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgt - we're trying to splat the sign mask

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@RKSimon RKSimon merged commit f7a3334 into llvm:main Mar 27, 2025
13 checks passed
@RKSimon RKSimon deleted the x86-demandedbits-blendi branch March 27, 2025 09:32
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 27, 2025

LLVM Buildbot has detected a new failure on builder lldb-x86_64-debian running on lldb-x86_64-debian while building llvm at step 6 "test".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/18975

Here is the relevant piece of the build log for the reference
Step 6 (test) failure: build (failure)
...
PASS: lldb-api :: commands/gui/spawn-threads/TestGuiSpawnThreads.py (35 of 2802)
PASS: lldb-api :: functionalities/process_save_core_minidump/TestProcessSaveCoreMinidump.py (36 of 2802)
PASS: lldb-api :: functionalities/recursion/TestValueObjectRecursion.py (37 of 2802)
PASS: lldb-api :: tools/lldb-dap/variables/TestDAP_variables.py (38 of 2802)
PASS: lldb-api :: lang/cpp/namespace/TestNamespace.py (39 of 2802)
PASS: lldb-api :: commands/target/create-deps/TestTargetCreateDeps.py (40 of 2802)
PASS: lldb-api :: functionalities/gdb_remote_client/TestXMLRegisterFlags.py (41 of 2802)
PASS: lldb-api :: functionalities/breakpoint/thread_plan_user_breakpoint/TestThreadPlanUserBreakpoint.py (42 of 2802)
PASS: lldb-api :: functionalities/gdb_remote_client/TestRegDefinitionInParts.py (43 of 2802)
PASS: lldb-api :: types/TestLongTypes.py (44 of 2802)
FAIL: lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py (45 of 2802)
******************** TEST 'lldb-api :: tools/lldb-dap/launch/TestDAP_launch.py' FAILED ********************
Script:
--
/usr/bin/python3 /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/dotest.py -u CXXFLAGS -u CFLAGS --env LLVM_LIBS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./lib --env LLVM_INCLUDE_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/include --env LLVM_TOOLS_DIR=/home/worker/2.0.1/lldb-x86_64-debian/build/./bin --arch x86_64 --build-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex --lldb-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-lldb/lldb-api --clang-module-cache-dir /home/worker/2.0.1/lldb-x86_64-debian/build/lldb-test-build.noindex/module-cache-clang/lldb-api --executable /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/lldb --compiler /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/clang --dsymutil /home/worker/2.0.1/lldb-x86_64-debian/build/./bin/dsymutil --make /usr/bin/gmake --llvm-tools-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./bin --lldb-obj-root /home/worker/2.0.1/lldb-x86_64-debian/build/tools/lldb --lldb-libs-dir /home/worker/2.0.1/lldb-x86_64-debian/build/./lib -t /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/tools/lldb-dap/launch -p TestDAP_launch.py
--
Exit Code: 1

Command Output (stdout):
--
lldb version 21.0.0git (https://github.com/llvm/llvm-project.git revision f7a3334016580d4e69134b574b8b4081d348ff83)
  clang revision f7a3334016580d4e69134b574b8b4081d348ff83
  llvm revision f7a3334016580d4e69134b574b8b4081d348ff83
Skipping the following test categories: ['libc++', 'dsym', 'gmodules', 'debugserver', 'objc']

--
Command Output (stderr):
--
Change dir to: /home/worker/2.0.1/lldb-x86_64-debian/llvm-project/lldb/test/API/tools/lldb-dap/launch
runCmd: settings clear -all

output: 

runCmd: settings set symbols.enable-external-lookup false

output: 

runCmd: settings set target.inherit-tcc true

output: 

runCmd: settings set target.disable-aslr false

output: 

runCmd: settings set target.detach-on-error false

output: 


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants