From 6c1c97c5b6744397063d9976bead154be38b8388 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 5 Jul 2024 16:53:06 +0100 Subject: [PATCH] [InstCombine][X86] Peek through bitcast+shuffle+bitcast sequence when folding BLENDV to SELECT Mentioned on #96882 --- llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp | 5 +++-- llvm/test/Transforms/InstCombine/X86/blend_x86.ll | 10 +++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 8eea368b5f86f9..322cb6f6f5819b 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -2882,6 +2882,8 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { return SelectInst::Create(NewSelector, Op1, Op0, "blendv"); } + Mask = InstCombiner::peekThroughBitcast(Mask); + // Peek through a one-use shuffle - VectorCombine should have simplified // this for cases where we're splitting wider vectors to use blendv // intrinsics. @@ -2895,13 +2897,12 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { any_of(ShuffleMask, [NumElts](int M) { return M < 0 || M >= NumElts; })) break; - Mask = MaskSrc; + Mask = InstCombiner::peekThroughBitcast(MaskSrc); } // Convert to a vector select if we can bypass casts and find a boolean // vector condition value. Value *BoolVec; - Mask = InstCombiner::peekThroughBitcast(Mask); if (match(Mask, m_SExt(m_Value(BoolVec))) && BoolVec->getType()->isVectorTy() && BoolVec->getType()->getScalarSizeInBits() == 1) { diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll index 6ed9acd718ccc0..aa49f493c9fa1a 100644 --- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll +++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll @@ -285,17 +285,13 @@ define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, define <4 x float> @sel_v16i8_bitcast_shuffle_bitcast_cmp(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) { ; CHECK-LABEL: @sel_v16i8_bitcast_shuffle_bitcast_cmp( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> ; CHECK-NEXT: [[A_BC:%.*]] = bitcast <8 x float> [[A]] to <8 x i32> ; CHECK-NEXT: [[B_BC:%.*]] = bitcast <8 x float> [[B]] to <8 x i32> -; CHECK-NEXT: [[SEXT_LO:%.*]] = shufflevector <8 x i32> [[SEXT]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[A_LO:%.*]] = shufflevector <8 x i32> [[A_BC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[B_LO:%.*]] = shufflevector <8 x i32> [[B_BC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[A_LO_BC:%.*]] = bitcast <4 x i32> [[A_LO]] to <16 x i8> -; CHECK-NEXT: [[B_LO_BC:%.*]] = bitcast <4 x i32> [[B_LO]] to <16 x i8> -; CHECK-NEXT: [[SEXT_LO_BC:%.*]] = bitcast <4 x i32> [[SEXT_LO]] to <16 x i8> -; CHECK-NEXT: [[BLENDV:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A_LO_BC]], <16 x i8> [[B_LO_BC]], <16 x i8> [[SEXT_LO_BC]]) -; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i8> [[BLENDV]] to <4 x float> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[B_LO]], <4 x i32> [[A_LO]] +; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> ; CHECK-NEXT: ret <4 x float> [[RES]] ; %cmp = fcmp olt <8 x float> %a, %b